UNPKG

jsonld-streaming-parser

Version:

A fast and lightweight streaming JSON-LD parser

github.com/rubensworks/streaming-jsonld-parser.js

rubensworks/streaming-jsonld-parser.js

486 lines • 25.3 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.JsonLdParser = void 0; // tslint:disable-next-line:no-var-requires const Parser = require('@bergos/jsonparse'); const jsonld_context_parser_1 = require("jsonld-context-parser"); const readable_stream_1 = require("readable-stream"); const EntryHandlerArrayValue_1 = require("./entryhandler/EntryHandlerArrayValue"); const EntryHandlerContainer_1 = require("./entryhandler/EntryHandlerContainer"); const EntryHandlerInvalidFallback_1 = require("./entryhandler/EntryHandlerInvalidFallback"); const EntryHandlerPredicate_1 = require("./entryhandler/EntryHandlerPredicate"); const EntryHandlerKeywordContext_1 = require("./entryhandler/keyword/EntryHandlerKeywordContext"); const EntryHandlerKeywordGraph_1 = require("./entryhandler/keyword/EntryHandlerKeywordGraph"); const EntryHandlerKeywordId_1 = require("./entryhandler/keyword/EntryHandlerKeywordId"); const EntryHandlerKeywordIncluded_1 = require("./entryhandler/keyword/EntryHandlerKeywordIncluded"); const EntryHandlerKeywordNest_1 = require("./entryhandler/keyword/EntryHandlerKeywordNest"); const EntryHandlerKeywordType_1 = require("./entryhandler/keyword/EntryHandlerKeywordType"); const EntryHandlerKeywordUnknownFallback_1 = require("./entryhandler/keyword/EntryHandlerKeywordUnknownFallback"); const EntryHandlerKeywordValue_1 = require("./entryhandler/keyword/EntryHandlerKeywordValue"); const ParsingContext_1 = require("./ParsingContext"); const Util_1 = require("./Util"); const http_link_header_1 = require("http-link-header"); const EntryHandlerKeywordAnnotation_1 = require("./entryhandler/keyword/EntryHandlerKeywordAnnotation"); /** * A stream transformer that parses JSON-LD (text) streams to an {@link RDF.Stream}. */ class JsonLdParser extends readable_stream_1.Transform { constructor(options) { super({ readableObjectMode: true }); options = options || {}; this.options = options; this.parsingContext = new ParsingContext_1.ParsingContext(Object.assign({ parser: this }, options)); this.util = new Util_1.Util({ dataFactory: options.dataFactory, parsingContext: this.parsingContext }); this.jsonParser = new Parser(); this.contextJobs = []; this.typeJobs = []; this.contextAwaitingJobs = []; this.lastDepth = 0; this.lastKeys = []; this.lastOnValueJob = Promise.resolve(); this.attachJsonParserListeners(); this.on('end', () => { if (typeof this.jsonParser.mode !== 'undefined') { this.emit('error', new Error('Unclosed document')); } }); } /** * Construct a JsonLdParser from the given HTTP response. * * This will throw an error if no valid JSON response is received * (application/ld+json, application/json, or something+json). * * For raw JSON responses, exactly one link header pointing to a JSON-LD context is required. * * This method is not responsible for handling redirects. * * @param baseIRI The URI of the received response. * @param mediaType The received content type. * @param headers Optional HTTP headers. * @param options Optional parser options. */ static fromHttpResponse(baseIRI, mediaType, headers, options) { let context; let wellKnownMediaTypes = ['application/activity+json']; if (options && options.wellKnownMediaTypes) { wellKnownMediaTypes = options.wellKnownMediaTypes; } // Special cases when receiving something else than the JSON-LD media type or the wellKnownMediaTypes if (mediaType !== 'application/ld+json' && !wellKnownMediaTypes.includes(mediaType)) { // Only accept JSON or JSON extension types if (mediaType !== 'application/json' && !mediaType.endsWith('+json')) { throw new jsonld_context_parser_1.ErrorCoded(`Unsupported JSON-LD media type ${mediaType}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED); } // We need exactly one JSON-LD context in the link header if (headers && headers.has('Link')) { headers.forEach((value, key) => { if (key === 'link') { const linkHeader = (0, http_link_header_1.parse)(value); for (const link of linkHeader.get('rel', 'http://www.w3.org/ns/json-ld#context')) { if (context) { throw new jsonld_context_parser_1.ErrorCoded('Multiple JSON-LD context link headers were found on ' + baseIRI, jsonld_context_parser_1.ERROR_CODES.MULTIPLE_CONTEXT_LINK_HEADERS); } context = link.uri; } } }); } if (!context && !(options === null || options === void 0 ? void 0 : options.ignoreMissingContextLinkHeader)) { throw new jsonld_context_parser_1.ErrorCoded(`Missing context link header for media type ${mediaType} on ${baseIRI}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED); } } // Check if the streaming profile is present let streamingProfile; if (headers && headers.has('Content-Type')) { const contentType = headers.get('Content-Type'); const match = /; *profile=([^"]*)/.exec(contentType); if (match && match[1] === 'http://www.w3.org/ns/json-ld#streaming') { streamingProfile = true; } } return new JsonLdParser(Object.assign({ baseIRI, context, streamingProfile }, options ? options : {})); } /** * Parses the given text stream into a quad stream. * @param {NodeJS.EventEmitter} stream A text stream. * @return {RDF.Stream} A quad stream. */ import(stream) { if ('pipe' in stream) { stream.on('error', (error) => parsed.emit('error', error)); const parsed = stream.pipe(new JsonLdParser(this.options)); return parsed; } else { const output = new readable_stream_1.PassThrough({ readableObjectMode: true }); stream.on('error', (error) => parsed.emit('error', error)); stream.on('data', (data) => output.push(data)); stream.on('end', () => output.push(null)); const parsed = output.pipe(new JsonLdParser(this.options)); return parsed; } } _transform(chunk, encoding, callback) { this.jsonParser.write(chunk); this.lastOnValueJob .then(() => callback(), (error) => callback(error)); } /** * Start a new job for parsing the given value. * * This will let the first valid {@link IEntryHandler} handle the entry. * * @param {any[]} keys The stack of keys. * @param value The value to parse. * @param {number} depth The depth to parse at. * @param {boolean} lastDepthCheck If the lastDepth check should be done for buffer draining. * @return {Promise<void>} A promise resolving when the job is done. */ async newOnValueJob(keys, value, depth, lastDepthCheck) { let flushStacks = true; // When we go up the stack, emit all unidentified values // We need to do this before the new job, because the new job may require determined values from the flushed jobs. if (lastDepthCheck && depth < this.lastDepth) { // Check if we had any RDF lists that need to be terminated with an rdf:nil const listPointer = this.parsingContext.listPointerStack[this.lastDepth]; if (listPointer) { // Terminate the list if the had at least one value if (listPointer.value) { this.push(this.util.dataFactory.quad(listPointer.value, this.util.rdfRest, this.util.rdfNil, this.util.getDefaultGraph())); } // Add the list id to the id stack, so it can be used higher up in the stack listPointer.listId.listHead = true; this.parsingContext.idStack[listPointer.listRootDepth + 1] = [listPointer.listId]; this.parsingContext.listPointerStack.splice(this.lastDepth, 1); } // Flush the buffer for lastDepth // If the parent key is a special type of container, postpone flushing until that parent is handled. if (await EntryHandlerContainer_1.EntryHandlerContainer.isBufferableContainerHandler(this.parsingContext, this.lastKeys, this.lastDepth)) { this.parsingContext.pendingContainerFlushBuffers .push({ depth: this.lastDepth, keys: this.lastKeys.slice(0, this.lastKeys.length) }); flushStacks = false; } else { await this.flushBuffer(this.lastDepth, this.lastKeys); } } const key = await this.util.unaliasKeyword(keys[depth], keys, depth); const parentKey = await this.util.unaliasKeywordParent(keys, depth); this.parsingContext.emittedStack[depth] = true; let handleKey = true; // Keywords inside @reverse is not allowed apart from @context if (jsonld_context_parser_1.Util.isValidKeyword(key) && parentKey === '@reverse' && key !== '@context') { this.emit('error', new jsonld_context_parser_1.ErrorCoded(`Found the @id '${value}' inside an @reverse property`, jsonld_context_parser_1.ERROR_CODES.INVALID_REVERSE_PROPERTY_MAP)); } // Skip further processing if one of the parent nodes are invalid. // We use the validationStack to reuse validation results that were produced before with common key stacks. let inProperty = false; if (this.parsingContext.validationStack.length > 1) { inProperty = this.parsingContext.validationStack[this.parsingContext.validationStack.length - 1].property; } for (let i = Math.max(1, this.parsingContext.validationStack.length - 1); i < keys.length - 1; i++) { const validationResult = this.parsingContext.validationStack[i] || (this.parsingContext.validationStack[i] = await this.validateKey(keys.slice(0, i + 1), i, inProperty)); if (!validationResult.valid) { this.parsingContext.emittedStack[depth] = false; handleKey = false; break; } else if (!inProperty && validationResult.property) { inProperty = true; } } // Skip further processing if this node is part of a literal if (await this.util.isLiteral(keys, depth)) { handleKey = false; } // Get handler if (handleKey) { for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) { const testResult = await entryHandler.test(this.parsingContext, this.util, key, keys, depth); if (testResult) { // Pass processing over to the handler await entryHandler.handle(this.parsingContext, this.util, key, keys, value, depth, testResult); // Flag that this depth is processed if (entryHandler.isStackProcessor()) { this.parsingContext.processingStack[depth] = true; } break; } } } // Validate value indexes on the root. if (depth === 0 && Array.isArray(value)) { await this.util.validateValueIndexes(value); } // When we go up the stack, flush the old stack if (flushStacks && depth < this.lastDepth) { // Reset our stacks this.flushStacks(this.lastDepth); } this.lastDepth = depth; this.lastKeys = keys; // Clear the keyword cache at this depth, and everything underneath. this.parsingContext.unaliasedKeywordCacheStack.splice(depth - 1); } /** * Flush the processing stacks at the given depth. * @param {number} depth A depth. */ flushStacks(depth) { this.parsingContext.processingStack.splice(depth, 1); this.parsingContext.processingType.splice(depth, 1); this.parsingContext.emittedStack.splice(depth, 1); this.parsingContext.idStack.splice(depth, 1); this.parsingContext.graphStack.splice(depth + 1, 1); this.parsingContext.graphContainerTermStack.splice(depth, 1); this.parsingContext.jsonLiteralStack.splice(depth, 1); this.parsingContext.validationStack.splice(depth - 1, 2); this.parsingContext.literalStack.splice(depth, this.parsingContext.literalStack.length - depth); this.parsingContext.annotationsBuffer.splice(depth, 1); // TODO: just like the literal stack, splice all other stack until the end as well? } /** * Flush buffers for the given depth. * * This should be called after the last entry at a given depth was processed. * * @param {number} depth A depth. * @param {any[]} keys A stack of keys. * @return {Promise<void>} A promise resolving if flushing is done. */ async flushBuffer(depth, keys) { let subjects = this.parsingContext.idStack[depth]; const subjectsWasDefined = !!subjects; if (!subjectsWasDefined) { subjects = this.parsingContext.idStack[depth] = [this.util.dataFactory.blankNode()]; } // Flush values at this level const valueBuffer = this.parsingContext.unidentifiedValuesBuffer[depth]; if (valueBuffer) { for (const subject of subjects) { const depthOffsetGraph = await this.util.getDepthOffsetGraph(depth, keys); const graphs = (this.parsingContext.graphStack[depth] || depthOffsetGraph >= 0) ? this.parsingContext.idStack[depth - depthOffsetGraph - 1] : [await this.util.getGraphContainerValue(keys, depth)]; if (graphs) { for (const graph of graphs) { // Flush values to stream if the graph @id is known this.parsingContext.emittedStack[depth] = true; for (const bufferedValue of valueBuffer) { this.util.emitQuadChecked(depth, subject, bufferedValue.predicate, bufferedValue.object, graph, bufferedValue.reverse, bufferedValue.isEmbedded); } } } else { // Place the values in the graphs buffer if the graph @id is not yet known const subGraphBuffer = this.parsingContext.getUnidentifiedGraphBufferSafe(depth - await this.util.getDepthOffsetGraph(depth, keys) - 1); for (const bufferedValue of valueBuffer) { if (bufferedValue.reverse) { subGraphBuffer.push({ object: subject, predicate: bufferedValue.predicate, subject: bufferedValue.object, isEmbedded: bufferedValue.isEmbedded, }); } else { subGraphBuffer.push({ object: bufferedValue.object, predicate: bufferedValue.predicate, subject, isEmbedded: bufferedValue.isEmbedded, }); } } } } this.parsingContext.unidentifiedValuesBuffer.splice(depth, 1); this.parsingContext.literalStack.splice(depth, 1); this.parsingContext.jsonLiteralStack.splice(depth, 1); } // Flush graphs at this level const graphBuffer = this.parsingContext.unidentifiedGraphsBuffer[depth]; if (graphBuffer) { for (const subject of subjects) { // A @graph statement at the root without @id relates to the default graph, // unless there are top-level properties, // others relate to blank nodes. const graph = depth === 1 && subject.termType === 'BlankNode' && !this.parsingContext.topLevelProperties ? this.util.getDefaultGraph() : subject; this.parsingContext.emittedStack[depth] = true; for (const bufferedValue of graphBuffer) { this.parsingContext.emitQuad(depth, this.util.dataFactory.quad(bufferedValue.subject, bufferedValue.predicate, bufferedValue.object, graph)); } } this.parsingContext.unidentifiedGraphsBuffer.splice(depth, 1); } // Push unhandled annotations up the stack as nested annotations const annotationsBuffer = this.parsingContext.annotationsBuffer[depth]; if (annotationsBuffer) { // Throw an error if we reach the top, and still have annotations if (annotationsBuffer.length > 0 && depth === 1) { this.parsingContext.emitError(new jsonld_context_parser_1.ErrorCoded(`Annotations can not be made on top-level nodes`, jsonld_context_parser_1.ERROR_CODES.INVALID_ANNOTATION)); } // Pass the annotations buffer up one level in the stack const annotationsBufferParent = this.parsingContext.getAnnotationsBufferSafe(depth - 1); for (const annotation of annotationsBuffer) { annotationsBufferParent.push(annotation); } delete this.parsingContext.annotationsBuffer[depth]; } } /** * Check if at least one {@link IEntryHandler} validates the entry to true. * @param {any[]} keys A stack of keys. * @param {number} depth A depth. * @param {boolean} inProperty If the current depth is part of a valid property node. * @return {Promise<{ valid: boolean, property: boolean }>} A promise resolving to true or false. */ async validateKey(keys, depth, inProperty) { for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) { if (await entryHandler.validate(this.parsingContext, this.util, keys, depth, inProperty)) { return { valid: true, property: inProperty || entryHandler.isPropertyHandler() }; } } return { valid: false, property: false }; } /** * Attach all required listeners to the JSON parser. * * This should only be called once. */ attachJsonParserListeners() { // Listen to json parser events this.jsonParser.onValue = (value) => { const depth = this.jsonParser.stack.length; const keys = (new Array(depth + 1).fill(0)).map((v, i) => { return i === depth ? this.jsonParser.key : this.jsonParser.stack[i].key; }); if (!this.isParsingContextInner(depth)) { // Don't parse inner nodes inside @context const valueJobCb = () => this.newOnValueJob(keys, value, depth, true); if (!this.parsingContext.streamingProfile && !this.parsingContext.contextTree.getContext(keys.slice(0, -1))) { // If an out-of-order context is allowed, // we have to buffer everything. // We store jobs for @context's and @type's separately, // because at the end, we have to process them first. // We also handle @type because these *could* introduce a type-scoped context. if (keys[depth] === '@context') { let jobs = this.contextJobs[depth]; if (!jobs) { jobs = this.contextJobs[depth] = []; } jobs.push(valueJobCb); } else { this.contextAwaitingJobs.push({ job: valueJobCb, keys, depth }); } } else { // Make sure that our value jobs are chained synchronously this.lastOnValueJob = this.lastOnValueJob.then(valueJobCb); } // Execute all buffered jobs on deeper levels if (!this.parsingContext.streamingProfile && depth === 0) { this.lastOnValueJob = this.lastOnValueJob .then(() => this.executeBufferedJobs()); } } }; this.jsonParser.onError = (error) => { this.emit('error', error); }; } /** * Check if the parser is currently parsing an element that is part of an @context entry. * @param {number} depth A depth. * @return {boolean} A boolean. */ isParsingContextInner(depth) { for (let i = depth; i > 0; i--) { if (this.jsonParser.stack[i - 1].key === '@context') { return true; } } return false; } /** * Execute all buffered jobs. * @return {Promise<void>} A promise resolving if all jobs are finished. */ async executeBufferedJobs() { // Handle context jobs for (const jobs of this.contextJobs) { if (jobs) { for (const job of jobs) { await job(); } } } // Clear the keyword cache. this.parsingContext.unaliasedKeywordCacheStack.splice(0); const contextAwaitingJobs = []; for (const job of this.contextAwaitingJobs) { if ((await this.util.unaliasKeyword(job.keys[job.depth], job.keys, job.depth, true)) === '@type' || typeof job.keys[job.depth] === 'number' && (await this.util.unaliasKeyword(job.keys[job.depth - 1], job.keys, job.depth - 1, true)) === '@type') { // Also capture @type with array values // Remove @type from keys, because we want it to apply to parent later on this.typeJobs.push({ job: job.job, keys: job.keys.slice(0, job.keys.length - 1) }); } else { contextAwaitingJobs.push(job); } } // Handle non-context jobs for (const job of contextAwaitingJobs) { // Check if we have a type (with possible type-scoped context) that should be handled before. // We check all possible parent nodes for the current job, from root to leaves. if (this.typeJobs.length > 0) { // First collect all applicable type jobs const applicableTypeJobs = []; const applicableTypeJobIds = []; for (let i = 0; i < this.typeJobs.length; i++) { const typeJob = this.typeJobs[i]; if (Util_1.Util.isPrefixArray(typeJob.keys, job.keys)) { applicableTypeJobs.push(typeJob); applicableTypeJobIds.push(i); } } // Next, sort the jobs from short to long key length (to ensure types higher up in the tree to be handled first) const sortedTypeJobs = applicableTypeJobs.sort((job1, job2) => job1.keys.length - job2.keys.length); // Finally, execute the jobs in order for (const typeJob of sortedTypeJobs) { await typeJob.job(); } // Remove the executed type jobs // Sort first, so we can efficiently splice const sortedApplicableTypeJobIds = applicableTypeJobIds.sort().reverse(); for (const jobId of sortedApplicableTypeJobIds) { this.typeJobs.splice(jobId, 1); } } await job.job(); } } } exports.JsonLdParser = JsonLdParser; JsonLdParser.DEFAULT_PROCESSING_MODE = '1.1'; JsonLdParser.ENTRY_HANDLERS = [ new EntryHandlerArrayValue_1.EntryHandlerArrayValue(), new EntryHandlerKeywordContext_1.EntryHandlerKeywordContext(), new EntryHandlerKeywordId_1.EntryHandlerKeywordId(), new EntryHandlerKeywordIncluded_1.EntryHandlerKeywordIncluded(), new EntryHandlerKeywordGraph_1.EntryHandlerKeywordGraph(), new EntryHandlerKeywordNest_1.EntryHandlerKeywordNest(), new EntryHandlerKeywordType_1.EntryHandlerKeywordType(), new EntryHandlerKeywordValue_1.EntryHandlerKeywordValue(), new EntryHandlerKeywordAnnotation_1.EntryHandlerKeywordAnnotation(), new EntryHandlerContainer_1.EntryHandlerContainer(), new EntryHandlerKeywordUnknownFallback_1.EntryHandlerKeywordUnknownFallback(), new EntryHandlerPredicate_1.EntryHandlerPredicate(), new EntryHandlerInvalidFallback_1.EntryHandlerInvalidFallback(), ]; //# sourceMappingURL=JsonLdParser.js.map