@stencila/schema
Version:
Stencila schema and other specifications
502 lines (461 loc) • 16.8 kB
JavaScript
const { src, parallel, series, watch } = require('gulp')
const Ajv = require('ajv')
const betterAjvErrors = require('better-ajv-errors')
const del = require('del')
const fs = require('fs-extra')
const globby = require('globby')
const jls = require('vscode-json-languageservice')
const jstt = require('json-schema-to-typescript')
const path = require('path')
const refParser = require('json-schema-ref-parser')
const through2 = require('through2')
const yaml = require('js-yaml')
/**
* Process a schema object.
*
* This function processes hand written YAML or JSON schema definitions with the aim
* of making schema authoring less tedious and error prone. Including the following modifications:
*
* - `$schema`: set to `http://json-schema.org/draft-07/schema#` if not specified
* - `$id`: set to `https://stencila.github.com/schema/${schema.title}.schema.json` if not specified
* - `properties.*.from`: set to `schema.title`
* - `properties.type.enum[0]`: set to `schema.title`
* - `additionalProperties`: set to `false` if not specified
* - `category`: set to the category (subdirectory) the schema belongs to
* - `children`: a list of child schemas
* - `descendants`: a list of all descendant schemas
* - `source`: the location of the source file for the schema definition
*
* If the schema defines the `$extends` keyword then in addition:
*
* - `parent`: set to the title of the parent schema (we don't use `extends` because that affects Typescript generation)
* - `properties`: are merged from ancestors and the current schema
* - `required`: are merged from ancestors and the current schema
*/
function processSchema(schemas, aliases, schema) {
if (schema.$processed) return
try {
if (!schema.$schema)
schema.$schema = `http://json-schema.org/draft-07/schema#`
if (!schema.$id)
schema.$id = `https://stencila.github.com/schema/${
schema.title
}.schema.json`
// Don't modify any other schema
if (!schema.$id.includes('stencila')) return
schema.category = path.dirname(schema.source)
schema.children = []
schema.descendants = []
if (schema.properties) {
schema.type = 'object'
let typesAliases = {}
for (const [name, property] of Object.entries(schema.properties)) {
schema.properties[name].from = schema.title
// Registered declared aliases
if (property.aliases) {
for (const alias of property.aliases) typesAliases[alias] = name
}
// Add and register aliases for array properties
if (property.type === 'array' && name.endsWith('s')) {
const alias = name.slice(0, -1)
if (!property.aliases) property.aliases = []
if (!property.aliases.includes(alias)) property.aliases.push(alias)
typesAliases[alias] = name
}
}
if (Object.keys(typesAliases).length) {
aliases[schema.title] = typesAliases
schema.aliases = true
}
if (schema.additionalProperties === undefined) {
schema.additionalProperties = false
}
}
if (schema.$extends) {
// Get the base schema and ensure that it
// has been processed (to collect properties)
let base = parent(schema)
processSchema(schemas, aliases, base)
// Do extension of properties from base
if (base.properties)
schema.properties = { ...base.properties, ...(schema.properties || {}) }
if (base.required)
schema.required = [...base.required, ...(schema.required || [])]
// For all ancestors add this schema to the descendants list
schema.parent = base.title
base.children.push(schema.title)
let ancestor = base
while (ancestor) {
ancestor.descendants.push(schema.title)
ancestor = parent(ancestor)
}
function parent(schema) {
if (!schema.$extends) return null
const parentPath = path.join(
path.dirname(schema.source),
schema.$extends
)
const parent = schemas.get(parentPath)
if (!parent)
throw new Error(`Schema in "$extends" not found for: "${parentPath}"`)
return parent
}
}
if (path.extname(schema.source) === '.yaml') {
// Replace any `$ref`s to YAML with a ref to the JSON generated in this function
walk(schema)
function walk(node) {
if (typeof node !== 'object') return
for (let [key, child] of Object.entries(node)) {
if (key === '$ref' && typeof child === 'string')
node[key] = path.basename(child).replace('.yaml', '.json')
walk(child)
}
}
}
schema.$processed = true
} catch (error) {
throw new Error(
`Error when processing "${schema.source}": "${error.stack}"`
)
}
}
/**
* Generate `built/*.schema.json` files from `schema/*.schema.{yaml,json}` files.
*
* This function does not use Gulp file streams because it needs to load all the schemas
* into memory at once. It does
*/
async function jsonschema() {
// Asynchronously read all the schema definition files into a map of objects
const filePaths = await globby('schema/**/*.schema.{yaml,json}')
const schemas = new Map(
await Promise.all(
filePaths.map(async filePath => {
let source = path.relative('schema', filePath)
let schema = yaml.safeLoad(await fs.readFile(filePath))
return [source, { ...schema, source }]
})
)
)
// Process each of the schemas collecting aliases along the way
const aliases = {}
for (let schema of schemas.values()) processSchema(schemas, aliases, schema)
// Do final processing and write schema objects to file
await fs.ensureDir('built')
for (let schema of schemas.values()) {
// Generate the destination path from the source and then
// rewrite source so that it can be use for a "Edit this schema" link in docs.
const destPath = path.join(
'built',
path.basename(schema.source).replace('.yaml', '.json')
)
schema.source = `https://github.com/stencila/schema/blob/master/schema/${
schema.source
}`
// Create a final `properties.type.enum` (all `Thing`s should have this) based on `$descendants`
if (
schema.$id.includes('stencila') &&
schema.properties &&
schema.properties.type
) {
if (schema.descendants) {
schema.properties.type.enum = [
schema.title,
...schema.descendants.sort()
]
} else {
schema.properties.type.enum = [schema.title]
}
schema.properties.type.default = schema.title
}
// Remove unnecessary processing keywords
delete schema.$extends
delete schema.$processed
await fs.writeJSON(destPath, schema, { spaces: 2 })
}
// Output `aliases.json`
await fs.writeJSON(path.join('built', 'aliases.json'), aliases, { spaces: 2 })
// Output `types.schema.json`
// This 'meta' schema provides a list of type schemas as:
// - an entry point for the generation of Typescript type definitions
// - a lookup for all types for use in `util.ts` functions
const properties = {}
const required = []
for (const schema of schemas.values()) {
if (
!(schema.title && schema.$id && schema.$id.startsWith('https://stencila'))
)
continue
properties[schema.title] = {
allOf: [{ $ref: `${schema.title}.schema.json` }]
}
required.push(schema.title)
}
const types = {
$schema: 'http://json-schema.org/draft-07/schema#',
title: 'Types',
properties,
required
}
await fs.writeJSON('built/types.schema.json', types, { spaces: 2 })
// Copy the built files into `dist` for publishing package
await fs.ensureDir('dist')
await Promise.all(
(await globby('built/**/*')).map(
async file => await fs.copy(file, path.join('dist', file))
)
)
}
/**
* Generate `built/stencila.jsonld` from the `built/*.schema.json` files
*
* Generates a `@context` similar to https://github.com/codemeta/codemeta/blob/master/codemeta.jsonld
* but with any extension class or properties defined using `Class` or `Property` (see using https://meta.schema.org/).
*/
function jsonld() {
const types = {}
const properties = {}
// Process each JSON file
return (
src('built/*.schema.json')
.pipe(
through2.obj((file, enc, cb) => {
const schema = JSON.parse(file.contents)
// Create a schema.org [`Class`](https://meta.schema.org/Class) for those
// classes that are defined by this schema, otherwise link to source.
const cid = schema['@id']
if (cid) {
if (cid.startsWith('stencila:')) {
types[schema.title] = {
'@id': cid,
'@type': 'schema:Class',
'schema:name': schema.title,
'schema:description': schema.description
}
} else {
types[schema.title] = {
'@id': cid
}
}
} else {
console.error(
`Warning: @id is not defined at the top level in ${file.path}`
)
}
// Create a [`Property`](https://meta.schema.org/Property)
// TODO: Implement schema:rangeIncludes property - requires the
// resolving `$refs`. See https://github.com/epoberezkin/ajv/issues/125#issuecomment-408960384
// for an approach to that.
const typeProperties =
schema.properties ||
(schema.allOf && schema.allOf[1] && schema.allOf[1].properties)
if (typeProperties) {
for (let [name, property] of Object.entries(typeProperties)) {
const pid = property['@id']
if (!pid) continue
if (pid.startsWith('stencila:')) {
if (!properties[name]) {
properties[name] = {
'@id': pid,
'@type': 'schema:Property',
'schema:name': name,
'schema:description': property.description,
'schema:domainIncludes': [{ '@id': cid }]
}
} else {
if (properties[name]['@id'] !== pid) {
throw new Error(
`Property "${name}" has more than one @id "${
properties[name]['@id']
}" and "${pid}"`
)
}
properties[name]['schema:domainIncludes'].push({ '@id': cid })
}
} else {
properties[name] = {
'@id': pid
}
}
}
}
cb(null, file)
})
)
// Sigh. To make this a writeable stream so the 'end' works
// https://github.com/gulpjs/gulp/issues/1637#issuecomment-216958503
.on('data', () => {})
// Now write the context
.on('end', () => {
const jsonld = {
'@context': {
// Contexts referred to, including this one
schema: 'https://schema.org/',
bioschemas: 'http://bioschemas.org',
codemeta: 'https://doi.org/10.5063/schema/codemeta-2.0',
stencila: 'https://stencila.github.io/schema/01-draft',
// Alias `@type` and `@id`
// See "Addressing the “@” issue" at https://datalanguage.com/news/publishing-json-ld-for-developers
// for why this is useful.
type: '@type',
id: '@id'
}
}
// Add types and properties alphabetically
for (let [key, value] of [
...[...Object.entries(types)].sort(),
...[...Object.entries(properties)].sort()
])
jsonld[key] = value
fs.ensureDirSync('built')
fs.writeJSONSync(path.join('built', 'stencila.jsonld'), jsonld, {
spaces: 2
})
})
)
}
/**
* Generate `types.ts` from `built/types.schema.json`
*/
async function ts() {
const src = 'built/types.schema.json'
const dest = 'types.ts'
const options = {
bannerComment: `/* tslint:disable */
/**
* This file was automatically generated.
* Do not modify it by hand. Instead, modify the source \`.schema.yaml\` file
* in the \`schema\` directory and run \`npm run build\` to regenerate this file.
*/
`
}
const ts = await jstt.compileFromFile(src, options)
fs.writeFileSync(dest, ts)
}
/**
* Check the generated JSON Schemas in `built/types.schema.json` are valid.
*/
async function check() {
const schema = await fs.readJSON('built/types.schema.json')
const metaSchema = require('ajv/lib/refs/json-schema-draft-07.json')
const ajv = new Ajv({ jsonPointers: true })
const validate = ajv.compile(metaSchema)
if (!validate(schema)) {
const message = betterAjvErrors(metaSchema, schema, validate.errors, {
format: 'cli',
indent: 2
})
console.log(message)
throw new Error('💣 Oh, oh, the schema is invalid')
}
}
/**
* Test that the examples are valid
*/
function test() {
const ajv = new Ajv({
jsonPointers: true,
allErrors: true,
loadSchema: uri => {
const match = uri.match(/https:\/\/stencila.github.com\/schema\/(.+)$/)
if (!match) throw new Error(`Not able to get schema from URI "${uri}"`)
return fs.readJSON(path.join('built', match[1]))
}
})
return src('examples/**/*.{yaml,json}').pipe(
through2.obj((file, enc, cb) => {
;(async function() {
const example = yaml.safeLoad(file.contents)
if (!example) throw new Error(`Example seems to be empty: ${file.path}`)
const type = example.type
if (!type)
throw new Error(
`Example does not have a "type" property: ${file.path}`
)
let validator = ajv.getSchema(
`https://stencila.github.com/schema/${type}.schema.json`
)
if (!validator) {
let schema = await fs.readJSON(
path.join('built', type + '.schema.json')
)
validator = await ajv.compileAsync(schema)
}
const valid = validator(example)
const relativePath = path.relative(process.cwd(), file.path)
if (file.basename.includes('invalid')) {
if (valid) {
throw new Error(
`? Woops. "${relativePath}" is supposed to be invalid, but it's not.`
)
} else {
const contents = file.contents.toString()
// All validation errors should have a corresponding comment in the file
// Previously we used:
// const errors = betterAjvErrors(schema, example, validator.errors, { format: 'json'})
// to generate the list of errors. However despite the use of `allErrors: true` above, Ajv
// does not always return all errors (because it is optimimised for speed?). So instead, here we
// use the vscode JSON language service to generate a list of errors.
const json = JSON.stringify(yaml.safeLoad(contents))
const textDoc = jls.TextDocument.create(
'foo://bar/file.json',
'json',
0,
json
)
const jsonDoc = jls
.getLanguageService({})
.parseJSONDocument(textDoc)
const errors = jsonDoc.validate(textDoc, validator.schema)
for (let error of errors) {
if (!contents.includes(error.message)) {
throw new Error(
`💣 Oh, oh, "${relativePath}" is expected to contain the comment "${
error.message
}".`
)
}
}
}
} else {
if (!valid) {
const message = betterAjvErrors(
validator.schema,
example,
validator.errors,
{
format: 'cli',
indent: 2
}
)
console.log(message)
throw new Error(`💣 Oh, oh, "${relativePath}" is invalid`)
}
}
})()
.then(() => cb(null))
.catch(error => cb(error))
})
)
}
/**
* Clean up!
*/
function clean() {
return del(['dist', 'built', 'types.ts'])
}
exports.jsonschema = jsonschema
exports.check = series(jsonschema, check)
exports.jsonld = series(jsonschema, jsonld)
exports.ts = series(jsonschema, ts)
exports.test = series(jsonschema, test)
exports.build = series(
clean,
jsonschema,
parallel(check, test),
parallel(jsonld, ts)
)
exports.watch = () =>
watch(['schema', 'examples'], { ignoreInitial: false }, exports.build)
exports.clean = clean