jsonld-streaming-parser
Version: 
A fast and lightweight streaming JSON-LD parser
486 lines • 25.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.JsonLdParser = void 0;
// tslint:disable-next-line:no-var-requires
const Parser = require('@bergos/jsonparse');
const jsonld_context_parser_1 = require("jsonld-context-parser");
const readable_stream_1 = require("readable-stream");
const EntryHandlerArrayValue_1 = require("./entryhandler/EntryHandlerArrayValue");
const EntryHandlerContainer_1 = require("./entryhandler/EntryHandlerContainer");
const EntryHandlerInvalidFallback_1 = require("./entryhandler/EntryHandlerInvalidFallback");
const EntryHandlerPredicate_1 = require("./entryhandler/EntryHandlerPredicate");
const EntryHandlerKeywordContext_1 = require("./entryhandler/keyword/EntryHandlerKeywordContext");
const EntryHandlerKeywordGraph_1 = require("./entryhandler/keyword/EntryHandlerKeywordGraph");
const EntryHandlerKeywordId_1 = require("./entryhandler/keyword/EntryHandlerKeywordId");
const EntryHandlerKeywordIncluded_1 = require("./entryhandler/keyword/EntryHandlerKeywordIncluded");
const EntryHandlerKeywordNest_1 = require("./entryhandler/keyword/EntryHandlerKeywordNest");
const EntryHandlerKeywordType_1 = require("./entryhandler/keyword/EntryHandlerKeywordType");
const EntryHandlerKeywordUnknownFallback_1 = require("./entryhandler/keyword/EntryHandlerKeywordUnknownFallback");
const EntryHandlerKeywordValue_1 = require("./entryhandler/keyword/EntryHandlerKeywordValue");
const ParsingContext_1 = require("./ParsingContext");
const Util_1 = require("./Util");
const http_link_header_1 = require("http-link-header");
const EntryHandlerKeywordAnnotation_1 = require("./entryhandler/keyword/EntryHandlerKeywordAnnotation");
/**
 * A stream transformer that parses JSON-LD (text) streams to an {@link RDF.Stream}.
 */
class JsonLdParser extends readable_stream_1.Transform {
    constructor(options) {
        super({ readableObjectMode: true });
        options = options || {};
        this.options = options;
        this.parsingContext = new ParsingContext_1.ParsingContext(Object.assign({ parser: this }, options));
        this.util = new Util_1.Util({ dataFactory: options.dataFactory, parsingContext: this.parsingContext });
        this.jsonParser = new Parser();
        this.contextJobs = [];
        this.typeJobs = [];
        this.contextAwaitingJobs = [];
        this.lastDepth = 0;
        this.lastKeys = [];
        this.lastOnValueJob = Promise.resolve();
        this.attachJsonParserListeners();
        this.on('end', () => {
            if (typeof this.jsonParser.mode !== 'undefined') {
                this.emit('error', new Error('Unclosed document'));
            }
        });
    }
    /**
     * Construct a JsonLdParser from the given HTTP response.
     *
     * This will throw an error if no valid JSON response is received
     * (application/ld+json, application/json, or something+json).
     *
     * For raw JSON responses, exactly one link header pointing to a JSON-LD context is required.
     *
     * This method is not responsible for handling redirects.
     *
     * @param baseIRI The URI of the received response.
     * @param mediaType The received content type.
     * @param headers Optional HTTP headers.
     * @param options Optional parser options.
     */
    static fromHttpResponse(baseIRI, mediaType, headers, options) {
        let context;
        let wellKnownMediaTypes = ['application/activity+json'];
        if (options && options.wellKnownMediaTypes) {
            wellKnownMediaTypes = options.wellKnownMediaTypes;
        }
        // Special cases when receiving something else than the JSON-LD media type or the wellKnownMediaTypes
        if (mediaType !== 'application/ld+json' && !wellKnownMediaTypes.includes(mediaType)) {
            // Only accept JSON or JSON extension types
            if (mediaType !== 'application/json' && !mediaType.endsWith('+json')) {
                throw new jsonld_context_parser_1.ErrorCoded(`Unsupported JSON-LD media type ${mediaType}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED);
            }
            // We need exactly one JSON-LD context in the link header
            if (headers && headers.has('Link')) {
                headers.forEach((value, key) => {
                    if (key === 'link') {
                        const linkHeader = (0, http_link_header_1.parse)(value);
                        for (const link of linkHeader.get('rel', 'http://www.w3.org/ns/json-ld#context')) {
                            if (context) {
                                throw new jsonld_context_parser_1.ErrorCoded('Multiple JSON-LD context link headers were found on ' + baseIRI, jsonld_context_parser_1.ERROR_CODES.MULTIPLE_CONTEXT_LINK_HEADERS);
                            }
                            context = link.uri;
                        }
                    }
                });
            }
            if (!context && !(options === null || options === void 0 ? void 0 : options.ignoreMissingContextLinkHeader)) {
                throw new jsonld_context_parser_1.ErrorCoded(`Missing context link header for media type ${mediaType} on ${baseIRI}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED);
            }
        }
        // Check if the streaming profile is present
        let streamingProfile;
        if (headers && headers.has('Content-Type')) {
            const contentType = headers.get('Content-Type');
            const match = /; *profile=([^"]*)/.exec(contentType);
            if (match && match[1] === 'http://www.w3.org/ns/json-ld#streaming') {
                streamingProfile = true;
            }
        }
        return new JsonLdParser(Object.assign({ baseIRI,
            context,
            streamingProfile }, options ? options : {}));
    }
    /**
     * Parses the given text stream into a quad stream.
     * @param {NodeJS.EventEmitter} stream A text stream.
     * @return {RDF.Stream} A quad stream.
     */
    import(stream) {
        if ('pipe' in stream) {
            stream.on('error', (error) => parsed.emit('error', error));
            const parsed = stream.pipe(new JsonLdParser(this.options));
            return parsed;
        }
        else {
            const output = new readable_stream_1.PassThrough({ readableObjectMode: true });
            stream.on('error', (error) => parsed.emit('error', error));
            stream.on('data', (data) => output.push(data));
            stream.on('end', () => output.push(null));
            const parsed = output.pipe(new JsonLdParser(this.options));
            return parsed;
        }
    }
    _transform(chunk, encoding, callback) {
        this.jsonParser.write(chunk);
        this.lastOnValueJob
            .then(() => callback(), (error) => callback(error));
    }
    /**
     * Start a new job for parsing the given value.
     *
     * This will let the first valid {@link IEntryHandler} handle the entry.
     *
     * @param {any[]} keys The stack of keys.
     * @param value The value to parse.
     * @param {number} depth The depth to parse at.
     * @param {boolean} lastDepthCheck If the lastDepth check should be done for buffer draining.
     * @return {Promise<void>} A promise resolving when the job is done.
     */
    async newOnValueJob(keys, value, depth, lastDepthCheck) {
        let flushStacks = true;
        // When we go up the stack, emit all unidentified values
        // We need to do this before the new job, because the new job may require determined values from the flushed jobs.
        if (lastDepthCheck && depth < this.lastDepth) {
            // Check if we had any RDF lists that need to be terminated with an rdf:nil
            const listPointer = this.parsingContext.listPointerStack[this.lastDepth];
            if (listPointer) {
                // Terminate the list if the had at least one value
                if (listPointer.value) {
                    this.push(this.util.dataFactory.quad(listPointer.value, this.util.rdfRest, this.util.rdfNil, this.util.getDefaultGraph()));
                }
                // Add the list id to the id stack, so it can be used higher up in the stack
                listPointer.listId.listHead = true;
                this.parsingContext.idStack[listPointer.listRootDepth + 1] = [listPointer.listId];
                this.parsingContext.listPointerStack.splice(this.lastDepth, 1);
            }
            // Flush the buffer for lastDepth
            // If the parent key is a special type of container, postpone flushing until that parent is handled.
            if (await EntryHandlerContainer_1.EntryHandlerContainer.isBufferableContainerHandler(this.parsingContext, this.lastKeys, this.lastDepth)) {
                this.parsingContext.pendingContainerFlushBuffers
                    .push({ depth: this.lastDepth, keys: this.lastKeys.slice(0, this.lastKeys.length) });
                flushStacks = false;
            }
            else {
                await this.flushBuffer(this.lastDepth, this.lastKeys);
            }
        }
        const key = await this.util.unaliasKeyword(keys[depth], keys, depth);
        const parentKey = await this.util.unaliasKeywordParent(keys, depth);
        this.parsingContext.emittedStack[depth] = true;
        let handleKey = true;
        // Keywords inside @reverse is not allowed apart from @context
        if (jsonld_context_parser_1.Util.isValidKeyword(key) && parentKey === '@reverse' && key !== '@context') {
            this.emit('error', new jsonld_context_parser_1.ErrorCoded(`Found the @id '${value}' inside an @reverse property`, jsonld_context_parser_1.ERROR_CODES.INVALID_REVERSE_PROPERTY_MAP));
        }
        // Skip further processing if one of the parent nodes are invalid.
        // We use the validationStack to reuse validation results that were produced before with common key stacks.
        let inProperty = false;
        if (this.parsingContext.validationStack.length > 1) {
            inProperty = this.parsingContext.validationStack[this.parsingContext.validationStack.length - 1].property;
        }
        for (let i = Math.max(1, this.parsingContext.validationStack.length - 1); i < keys.length - 1; i++) {
            const validationResult = this.parsingContext.validationStack[i]
                || (this.parsingContext.validationStack[i] = await this.validateKey(keys.slice(0, i + 1), i, inProperty));
            if (!validationResult.valid) {
                this.parsingContext.emittedStack[depth] = false;
                handleKey = false;
                break;
            }
            else if (!inProperty && validationResult.property) {
                inProperty = true;
            }
        }
        // Skip further processing if this node is part of a literal
        if (await this.util.isLiteral(keys, depth)) {
            handleKey = false;
        }
        // Get handler
        if (handleKey) {
            for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) {
                const testResult = await entryHandler.test(this.parsingContext, this.util, key, keys, depth);
                if (testResult) {
                    // Pass processing over to the handler
                    await entryHandler.handle(this.parsingContext, this.util, key, keys, value, depth, testResult);
                    // Flag that this depth is processed
                    if (entryHandler.isStackProcessor()) {
                        this.parsingContext.processingStack[depth] = true;
                    }
                    break;
                }
            }
        }
        // Validate value indexes on the root.
        if (depth === 0 && Array.isArray(value)) {
            await this.util.validateValueIndexes(value);
        }
        // When we go up the stack, flush the old stack
        if (flushStacks && depth < this.lastDepth) {
            // Reset our stacks
            this.flushStacks(this.lastDepth);
        }
        this.lastDepth = depth;
        this.lastKeys = keys;
        // Clear the keyword cache at this depth, and everything underneath.
        this.parsingContext.unaliasedKeywordCacheStack.splice(depth - 1);
    }
    /**
     * Flush the processing stacks at the given depth.
     * @param {number} depth A depth.
     */
    flushStacks(depth) {
        this.parsingContext.processingStack.splice(depth, 1);
        this.parsingContext.processingType.splice(depth, 1);
        this.parsingContext.emittedStack.splice(depth, 1);
        this.parsingContext.idStack.splice(depth, 1);
        this.parsingContext.graphStack.splice(depth + 1, 1);
        this.parsingContext.graphContainerTermStack.splice(depth, 1);
        this.parsingContext.jsonLiteralStack.splice(depth, 1);
        this.parsingContext.validationStack.splice(depth - 1, 2);
        this.parsingContext.literalStack.splice(depth, this.parsingContext.literalStack.length - depth);
        this.parsingContext.annotationsBuffer.splice(depth, 1);
        // TODO: just like the literal stack, splice all other stack until the end as well?
    }
    /**
     * Flush buffers for the given depth.
     *
     * This should be called after the last entry at a given depth was processed.
     *
     * @param {number} depth A depth.
     * @param {any[]} keys A stack of keys.
     * @return {Promise<void>} A promise resolving if flushing is done.
     */
    async flushBuffer(depth, keys) {
        let subjects = this.parsingContext.idStack[depth];
        const subjectsWasDefined = !!subjects;
        if (!subjectsWasDefined) {
            subjects = this.parsingContext.idStack[depth] = [this.util.dataFactory.blankNode()];
        }
        // Flush values at this level
        const valueBuffer = this.parsingContext.unidentifiedValuesBuffer[depth];
        if (valueBuffer) {
            for (const subject of subjects) {
                const depthOffsetGraph = await this.util.getDepthOffsetGraph(depth, keys);
                const graphs = (this.parsingContext.graphStack[depth] || depthOffsetGraph >= 0)
                    ? this.parsingContext.idStack[depth - depthOffsetGraph - 1]
                    : [await this.util.getGraphContainerValue(keys, depth)];
                if (graphs) {
                    for (const graph of graphs) {
                        // Flush values to stream if the graph @id is known
                        this.parsingContext.emittedStack[depth] = true;
                        for (const bufferedValue of valueBuffer) {
                            this.util.emitQuadChecked(depth, subject, bufferedValue.predicate, bufferedValue.object, graph, bufferedValue.reverse, bufferedValue.isEmbedded);
                        }
                    }
                }
                else {
                    // Place the values in the graphs buffer if the graph @id is not yet known
                    const subGraphBuffer = this.parsingContext.getUnidentifiedGraphBufferSafe(depth - await this.util.getDepthOffsetGraph(depth, keys) - 1);
                    for (const bufferedValue of valueBuffer) {
                        if (bufferedValue.reverse) {
                            subGraphBuffer.push({
                                object: subject,
                                predicate: bufferedValue.predicate,
                                subject: bufferedValue.object,
                                isEmbedded: bufferedValue.isEmbedded,
                            });
                        }
                        else {
                            subGraphBuffer.push({
                                object: bufferedValue.object,
                                predicate: bufferedValue.predicate,
                                subject,
                                isEmbedded: bufferedValue.isEmbedded,
                            });
                        }
                    }
                }
            }
            this.parsingContext.unidentifiedValuesBuffer.splice(depth, 1);
            this.parsingContext.literalStack.splice(depth, 1);
            this.parsingContext.jsonLiteralStack.splice(depth, 1);
        }
        // Flush graphs at this level
        const graphBuffer = this.parsingContext.unidentifiedGraphsBuffer[depth];
        if (graphBuffer) {
            for (const subject of subjects) {
                // A @graph statement at the root without @id relates to the default graph,
                // unless there are top-level properties,
                // others relate to blank nodes.
                const graph = depth === 1 && subject.termType === 'BlankNode'
                    && !this.parsingContext.topLevelProperties ? this.util.getDefaultGraph() : subject;
                this.parsingContext.emittedStack[depth] = true;
                for (const bufferedValue of graphBuffer) {
                    this.parsingContext.emitQuad(depth, this.util.dataFactory.quad(bufferedValue.subject, bufferedValue.predicate, bufferedValue.object, graph));
                }
            }
            this.parsingContext.unidentifiedGraphsBuffer.splice(depth, 1);
        }
        // Push unhandled annotations up the stack as nested annotations
        const annotationsBuffer = this.parsingContext.annotationsBuffer[depth];
        if (annotationsBuffer) {
            // Throw an error if we reach the top, and still have annotations
            if (annotationsBuffer.length > 0 && depth === 1) {
                this.parsingContext.emitError(new jsonld_context_parser_1.ErrorCoded(`Annotations can not be made on top-level nodes`, jsonld_context_parser_1.ERROR_CODES.INVALID_ANNOTATION));
            }
            // Pass the annotations buffer up one level in the stack
            const annotationsBufferParent = this.parsingContext.getAnnotationsBufferSafe(depth - 1);
            for (const annotation of annotationsBuffer) {
                annotationsBufferParent.push(annotation);
            }
            delete this.parsingContext.annotationsBuffer[depth];
        }
    }
    /**
     * Check if at least one {@link IEntryHandler} validates the entry to true.
     * @param {any[]} keys A stack of keys.
     * @param {number} depth A depth.
     * @param {boolean} inProperty If the current depth is part of a valid property node.
     * @return {Promise<{ valid: boolean, property: boolean }>} A promise resolving to true or false.
     */
    async validateKey(keys, depth, inProperty) {
        for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) {
            if (await entryHandler.validate(this.parsingContext, this.util, keys, depth, inProperty)) {
                return { valid: true, property: inProperty || entryHandler.isPropertyHandler() };
            }
        }
        return { valid: false, property: false };
    }
    /**
     * Attach all required listeners to the JSON parser.
     *
     * This should only be called once.
     */
    attachJsonParserListeners() {
        // Listen to json parser events
        this.jsonParser.onValue = (value) => {
            const depth = this.jsonParser.stack.length;
            const keys = (new Array(depth + 1).fill(0)).map((v, i) => {
                return i === depth ? this.jsonParser.key : this.jsonParser.stack[i].key;
            });
            if (!this.isParsingContextInner(depth)) { // Don't parse inner nodes inside @context
                const valueJobCb = () => this.newOnValueJob(keys, value, depth, true);
                if (!this.parsingContext.streamingProfile
                    && !this.parsingContext.contextTree.getContext(keys.slice(0, -1))) {
                    // If an out-of-order context is allowed,
                    // we have to buffer everything.
                    // We store jobs for @context's and @type's separately,
                    // because at the end, we have to process them first.
                    // We also handle @type because these *could* introduce a type-scoped context.
                    if (keys[depth] === '@context') {
                        let jobs = this.contextJobs[depth];
                        if (!jobs) {
                            jobs = this.contextJobs[depth] = [];
                        }
                        jobs.push(valueJobCb);
                    }
                    else {
                        this.contextAwaitingJobs.push({ job: valueJobCb, keys, depth });
                    }
                }
                else {
                    // Make sure that our value jobs are chained synchronously
                    this.lastOnValueJob = this.lastOnValueJob.then(valueJobCb);
                }
                // Execute all buffered jobs on deeper levels
                if (!this.parsingContext.streamingProfile && depth === 0) {
                    this.lastOnValueJob = this.lastOnValueJob
                        .then(() => this.executeBufferedJobs());
                }
            }
        };
        this.jsonParser.onError = (error) => {
            this.emit('error', error);
        };
    }
    /**
     * Check if the parser is currently parsing an element that is part of an @context entry.
     * @param {number} depth A depth.
     * @return {boolean} A boolean.
     */
    isParsingContextInner(depth) {
        for (let i = depth; i > 0; i--) {
            if (this.jsonParser.stack[i - 1].key === '@context') {
                return true;
            }
        }
        return false;
    }
    /**
     * Execute all buffered jobs.
     * @return {Promise<void>} A promise resolving if all jobs are finished.
     */
    async executeBufferedJobs() {
        // Handle context jobs
        for (const jobs of this.contextJobs) {
            if (jobs) {
                for (const job of jobs) {
                    await job();
                }
            }
        }
        // Clear the keyword cache.
        this.parsingContext.unaliasedKeywordCacheStack.splice(0);
        const contextAwaitingJobs = [];
        for (const job of this.contextAwaitingJobs) {
            if ((await this.util.unaliasKeyword(job.keys[job.depth], job.keys, job.depth, true)) === '@type'
                || typeof job.keys[job.depth] === 'number' && (await this.util.unaliasKeyword(job.keys[job.depth - 1], job.keys, job.depth - 1, true)) === '@type') { // Also capture @type with array values
                // Remove @type from keys, because we want it to apply to parent later on
                this.typeJobs.push({ job: job.job, keys: job.keys.slice(0, job.keys.length - 1) });
            }
            else {
                contextAwaitingJobs.push(job);
            }
        }
        // Handle non-context jobs
        for (const job of contextAwaitingJobs) {
            // Check if we have a type (with possible type-scoped context) that should be handled before.
            // We check all possible parent nodes for the current job, from root to leaves.
            if (this.typeJobs.length > 0) {
                // First collect all applicable type jobs
                const applicableTypeJobs = [];
                const applicableTypeJobIds = [];
                for (let i = 0; i < this.typeJobs.length; i++) {
                    const typeJob = this.typeJobs[i];
                    if (Util_1.Util.isPrefixArray(typeJob.keys, job.keys)) {
                        applicableTypeJobs.push(typeJob);
                        applicableTypeJobIds.push(i);
                    }
                }
                // Next, sort the jobs from short to long key length (to ensure types higher up in the tree to be handled first)
                const sortedTypeJobs = applicableTypeJobs.sort((job1, job2) => job1.keys.length - job2.keys.length);
                // Finally, execute the jobs in order
                for (const typeJob of sortedTypeJobs) {
                    await typeJob.job();
                }
                // Remove the executed type jobs
                // Sort first, so we can efficiently splice
                const sortedApplicableTypeJobIds = applicableTypeJobIds.sort().reverse();
                for (const jobId of sortedApplicableTypeJobIds) {
                    this.typeJobs.splice(jobId, 1);
                }
            }
            await job.job();
        }
    }
}
exports.JsonLdParser = JsonLdParser;
JsonLdParser.DEFAULT_PROCESSING_MODE = '1.1';
JsonLdParser.ENTRY_HANDLERS = [
    new EntryHandlerArrayValue_1.EntryHandlerArrayValue(),
    new EntryHandlerKeywordContext_1.EntryHandlerKeywordContext(),
    new EntryHandlerKeywordId_1.EntryHandlerKeywordId(),
    new EntryHandlerKeywordIncluded_1.EntryHandlerKeywordIncluded(),
    new EntryHandlerKeywordGraph_1.EntryHandlerKeywordGraph(),
    new EntryHandlerKeywordNest_1.EntryHandlerKeywordNest(),
    new EntryHandlerKeywordType_1.EntryHandlerKeywordType(),
    new EntryHandlerKeywordValue_1.EntryHandlerKeywordValue(),
    new EntryHandlerKeywordAnnotation_1.EntryHandlerKeywordAnnotation(),
    new EntryHandlerContainer_1.EntryHandlerContainer(),
    new EntryHandlerKeywordUnknownFallback_1.EntryHandlerKeywordUnknownFallback(),
    new EntryHandlerPredicate_1.EntryHandlerPredicate(),
    new EntryHandlerInvalidFallback_1.EntryHandlerInvalidFallback(),
];
//# sourceMappingURL=JsonLdParser.js.map