jsonld-streaming-parser
Version:
A fast and lightweight streaming JSON-LD parser
486 lines • 25.3 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.JsonLdParser = void 0;
// tslint:disable-next-line:no-var-requires
const Parser = require('@bergos/jsonparse');
const jsonld_context_parser_1 = require("jsonld-context-parser");
const readable_stream_1 = require("readable-stream");
const EntryHandlerArrayValue_1 = require("./entryhandler/EntryHandlerArrayValue");
const EntryHandlerContainer_1 = require("./entryhandler/EntryHandlerContainer");
const EntryHandlerInvalidFallback_1 = require("./entryhandler/EntryHandlerInvalidFallback");
const EntryHandlerPredicate_1 = require("./entryhandler/EntryHandlerPredicate");
const EntryHandlerKeywordContext_1 = require("./entryhandler/keyword/EntryHandlerKeywordContext");
const EntryHandlerKeywordGraph_1 = require("./entryhandler/keyword/EntryHandlerKeywordGraph");
const EntryHandlerKeywordId_1 = require("./entryhandler/keyword/EntryHandlerKeywordId");
const EntryHandlerKeywordIncluded_1 = require("./entryhandler/keyword/EntryHandlerKeywordIncluded");
const EntryHandlerKeywordNest_1 = require("./entryhandler/keyword/EntryHandlerKeywordNest");
const EntryHandlerKeywordType_1 = require("./entryhandler/keyword/EntryHandlerKeywordType");
const EntryHandlerKeywordUnknownFallback_1 = require("./entryhandler/keyword/EntryHandlerKeywordUnknownFallback");
const EntryHandlerKeywordValue_1 = require("./entryhandler/keyword/EntryHandlerKeywordValue");
const ParsingContext_1 = require("./ParsingContext");
const Util_1 = require("./Util");
const http_link_header_1 = require("http-link-header");
const EntryHandlerKeywordAnnotation_1 = require("./entryhandler/keyword/EntryHandlerKeywordAnnotation");
/**
* A stream transformer that parses JSON-LD (text) streams to an {@link RDF.Stream}.
*/
class JsonLdParser extends readable_stream_1.Transform {
constructor(options) {
super({ readableObjectMode: true });
options = options || {};
this.options = options;
this.parsingContext = new ParsingContext_1.ParsingContext(Object.assign({ parser: this }, options));
this.util = new Util_1.Util({ dataFactory: options.dataFactory, parsingContext: this.parsingContext });
this.jsonParser = new Parser();
this.contextJobs = [];
this.typeJobs = [];
this.contextAwaitingJobs = [];
this.lastDepth = 0;
this.lastKeys = [];
this.lastOnValueJob = Promise.resolve();
this.attachJsonParserListeners();
this.on('end', () => {
if (typeof this.jsonParser.mode !== 'undefined') {
this.emit('error', new Error('Unclosed document'));
}
});
}
/**
* Construct a JsonLdParser from the given HTTP response.
*
* This will throw an error if no valid JSON response is received
* (application/ld+json, application/json, or something+json).
*
* For raw JSON responses, exactly one link header pointing to a JSON-LD context is required.
*
* This method is not responsible for handling redirects.
*
* @param baseIRI The URI of the received response.
* @param mediaType The received content type.
* @param headers Optional HTTP headers.
* @param options Optional parser options.
*/
static fromHttpResponse(baseIRI, mediaType, headers, options) {
let context;
let wellKnownMediaTypes = ['application/activity+json'];
if (options && options.wellKnownMediaTypes) {
wellKnownMediaTypes = options.wellKnownMediaTypes;
}
// Special cases when receiving something else than the JSON-LD media type or the wellKnownMediaTypes
if (mediaType !== 'application/ld+json' && !wellKnownMediaTypes.includes(mediaType)) {
// Only accept JSON or JSON extension types
if (mediaType !== 'application/json' && !mediaType.endsWith('+json')) {
throw new jsonld_context_parser_1.ErrorCoded(`Unsupported JSON-LD media type ${mediaType}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED);
}
// We need exactly one JSON-LD context in the link header
if (headers && headers.has('Link')) {
headers.forEach((value, key) => {
if (key === 'link') {
const linkHeader = (0, http_link_header_1.parse)(value);
for (const link of linkHeader.get('rel', 'http://www.w3.org/ns/json-ld#context')) {
if (context) {
throw new jsonld_context_parser_1.ErrorCoded('Multiple JSON-LD context link headers were found on ' + baseIRI, jsonld_context_parser_1.ERROR_CODES.MULTIPLE_CONTEXT_LINK_HEADERS);
}
context = link.uri;
}
}
});
}
if (!context && !(options === null || options === void 0 ? void 0 : options.ignoreMissingContextLinkHeader)) {
throw new jsonld_context_parser_1.ErrorCoded(`Missing context link header for media type ${mediaType} on ${baseIRI}`, jsonld_context_parser_1.ERROR_CODES.LOADING_DOCUMENT_FAILED);
}
}
// Check if the streaming profile is present
let streamingProfile;
if (headers && headers.has('Content-Type')) {
const contentType = headers.get('Content-Type');
const match = /; *profile=([^"]*)/.exec(contentType);
if (match && match[1] === 'http://www.w3.org/ns/json-ld#streaming') {
streamingProfile = true;
}
}
return new JsonLdParser(Object.assign({ baseIRI,
context,
streamingProfile }, options ? options : {}));
}
/**
* Parses the given text stream into a quad stream.
* @param {NodeJS.EventEmitter} stream A text stream.
* @return {RDF.Stream} A quad stream.
*/
import(stream) {
if ('pipe' in stream) {
stream.on('error', (error) => parsed.emit('error', error));
const parsed = stream.pipe(new JsonLdParser(this.options));
return parsed;
}
else {
const output = new readable_stream_1.PassThrough({ readableObjectMode: true });
stream.on('error', (error) => parsed.emit('error', error));
stream.on('data', (data) => output.push(data));
stream.on('end', () => output.push(null));
const parsed = output.pipe(new JsonLdParser(this.options));
return parsed;
}
}
_transform(chunk, encoding, callback) {
this.jsonParser.write(chunk);
this.lastOnValueJob
.then(() => callback(), (error) => callback(error));
}
/**
* Start a new job for parsing the given value.
*
* This will let the first valid {@link IEntryHandler} handle the entry.
*
* @param {any[]} keys The stack of keys.
* @param value The value to parse.
* @param {number} depth The depth to parse at.
* @param {boolean} lastDepthCheck If the lastDepth check should be done for buffer draining.
* @return {Promise<void>} A promise resolving when the job is done.
*/
async newOnValueJob(keys, value, depth, lastDepthCheck) {
let flushStacks = true;
// When we go up the stack, emit all unidentified values
// We need to do this before the new job, because the new job may require determined values from the flushed jobs.
if (lastDepthCheck && depth < this.lastDepth) {
// Check if we had any RDF lists that need to be terminated with an rdf:nil
const listPointer = this.parsingContext.listPointerStack[this.lastDepth];
if (listPointer) {
// Terminate the list if the had at least one value
if (listPointer.value) {
this.push(this.util.dataFactory.quad(listPointer.value, this.util.rdfRest, this.util.rdfNil, this.util.getDefaultGraph()));
}
// Add the list id to the id stack, so it can be used higher up in the stack
listPointer.listId.listHead = true;
this.parsingContext.idStack[listPointer.listRootDepth + 1] = [listPointer.listId];
this.parsingContext.listPointerStack.splice(this.lastDepth, 1);
}
// Flush the buffer for lastDepth
// If the parent key is a special type of container, postpone flushing until that parent is handled.
if (await EntryHandlerContainer_1.EntryHandlerContainer.isBufferableContainerHandler(this.parsingContext, this.lastKeys, this.lastDepth)) {
this.parsingContext.pendingContainerFlushBuffers
.push({ depth: this.lastDepth, keys: this.lastKeys.slice(0, this.lastKeys.length) });
flushStacks = false;
}
else {
await this.flushBuffer(this.lastDepth, this.lastKeys);
}
}
const key = await this.util.unaliasKeyword(keys[depth], keys, depth);
const parentKey = await this.util.unaliasKeywordParent(keys, depth);
this.parsingContext.emittedStack[depth] = true;
let handleKey = true;
// Keywords inside @reverse is not allowed apart from @context
if (jsonld_context_parser_1.Util.isValidKeyword(key) && parentKey === '@reverse' && key !== '@context') {
this.emit('error', new jsonld_context_parser_1.ErrorCoded(`Found the @id '${value}' inside an @reverse property`, jsonld_context_parser_1.ERROR_CODES.INVALID_REVERSE_PROPERTY_MAP));
}
// Skip further processing if one of the parent nodes are invalid.
// We use the validationStack to reuse validation results that were produced before with common key stacks.
let inProperty = false;
if (this.parsingContext.validationStack.length > 1) {
inProperty = this.parsingContext.validationStack[this.parsingContext.validationStack.length - 1].property;
}
for (let i = Math.max(1, this.parsingContext.validationStack.length - 1); i < keys.length - 1; i++) {
const validationResult = this.parsingContext.validationStack[i]
|| (this.parsingContext.validationStack[i] = await this.validateKey(keys.slice(0, i + 1), i, inProperty));
if (!validationResult.valid) {
this.parsingContext.emittedStack[depth] = false;
handleKey = false;
break;
}
else if (!inProperty && validationResult.property) {
inProperty = true;
}
}
// Skip further processing if this node is part of a literal
if (await this.util.isLiteral(keys, depth)) {
handleKey = false;
}
// Get handler
if (handleKey) {
for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) {
const testResult = await entryHandler.test(this.parsingContext, this.util, key, keys, depth);
if (testResult) {
// Pass processing over to the handler
await entryHandler.handle(this.parsingContext, this.util, key, keys, value, depth, testResult);
// Flag that this depth is processed
if (entryHandler.isStackProcessor()) {
this.parsingContext.processingStack[depth] = true;
}
break;
}
}
}
// Validate value indexes on the root.
if (depth === 0 && Array.isArray(value)) {
await this.util.validateValueIndexes(value);
}
// When we go up the stack, flush the old stack
if (flushStacks && depth < this.lastDepth) {
// Reset our stacks
this.flushStacks(this.lastDepth);
}
this.lastDepth = depth;
this.lastKeys = keys;
// Clear the keyword cache at this depth, and everything underneath.
this.parsingContext.unaliasedKeywordCacheStack.splice(depth - 1);
}
/**
* Flush the processing stacks at the given depth.
* @param {number} depth A depth.
*/
flushStacks(depth) {
this.parsingContext.processingStack.splice(depth, 1);
this.parsingContext.processingType.splice(depth, 1);
this.parsingContext.emittedStack.splice(depth, 1);
this.parsingContext.idStack.splice(depth, 1);
this.parsingContext.graphStack.splice(depth + 1, 1);
this.parsingContext.graphContainerTermStack.splice(depth, 1);
this.parsingContext.jsonLiteralStack.splice(depth, 1);
this.parsingContext.validationStack.splice(depth - 1, 2);
this.parsingContext.literalStack.splice(depth, this.parsingContext.literalStack.length - depth);
this.parsingContext.annotationsBuffer.splice(depth, 1);
// TODO: just like the literal stack, splice all other stack until the end as well?
}
/**
* Flush buffers for the given depth.
*
* This should be called after the last entry at a given depth was processed.
*
* @param {number} depth A depth.
* @param {any[]} keys A stack of keys.
* @return {Promise<void>} A promise resolving if flushing is done.
*/
async flushBuffer(depth, keys) {
let subjects = this.parsingContext.idStack[depth];
const subjectsWasDefined = !!subjects;
if (!subjectsWasDefined) {
subjects = this.parsingContext.idStack[depth] = [this.util.dataFactory.blankNode()];
}
// Flush values at this level
const valueBuffer = this.parsingContext.unidentifiedValuesBuffer[depth];
if (valueBuffer) {
for (const subject of subjects) {
const depthOffsetGraph = await this.util.getDepthOffsetGraph(depth, keys);
const graphs = (this.parsingContext.graphStack[depth] || depthOffsetGraph >= 0)
? this.parsingContext.idStack[depth - depthOffsetGraph - 1]
: [await this.util.getGraphContainerValue(keys, depth)];
if (graphs) {
for (const graph of graphs) {
// Flush values to stream if the graph @id is known
this.parsingContext.emittedStack[depth] = true;
for (const bufferedValue of valueBuffer) {
this.util.emitQuadChecked(depth, subject, bufferedValue.predicate, bufferedValue.object, graph, bufferedValue.reverse, bufferedValue.isEmbedded);
}
}
}
else {
// Place the values in the graphs buffer if the graph @id is not yet known
const subGraphBuffer = this.parsingContext.getUnidentifiedGraphBufferSafe(depth - await this.util.getDepthOffsetGraph(depth, keys) - 1);
for (const bufferedValue of valueBuffer) {
if (bufferedValue.reverse) {
subGraphBuffer.push({
object: subject,
predicate: bufferedValue.predicate,
subject: bufferedValue.object,
isEmbedded: bufferedValue.isEmbedded,
});
}
else {
subGraphBuffer.push({
object: bufferedValue.object,
predicate: bufferedValue.predicate,
subject,
isEmbedded: bufferedValue.isEmbedded,
});
}
}
}
}
this.parsingContext.unidentifiedValuesBuffer.splice(depth, 1);
this.parsingContext.literalStack.splice(depth, 1);
this.parsingContext.jsonLiteralStack.splice(depth, 1);
}
// Flush graphs at this level
const graphBuffer = this.parsingContext.unidentifiedGraphsBuffer[depth];
if (graphBuffer) {
for (const subject of subjects) {
// A @graph statement at the root without @id relates to the default graph,
// unless there are top-level properties,
// others relate to blank nodes.
const graph = depth === 1 && subject.termType === 'BlankNode'
&& !this.parsingContext.topLevelProperties ? this.util.getDefaultGraph() : subject;
this.parsingContext.emittedStack[depth] = true;
for (const bufferedValue of graphBuffer) {
this.parsingContext.emitQuad(depth, this.util.dataFactory.quad(bufferedValue.subject, bufferedValue.predicate, bufferedValue.object, graph));
}
}
this.parsingContext.unidentifiedGraphsBuffer.splice(depth, 1);
}
// Push unhandled annotations up the stack as nested annotations
const annotationsBuffer = this.parsingContext.annotationsBuffer[depth];
if (annotationsBuffer) {
// Throw an error if we reach the top, and still have annotations
if (annotationsBuffer.length > 0 && depth === 1) {
this.parsingContext.emitError(new jsonld_context_parser_1.ErrorCoded(`Annotations can not be made on top-level nodes`, jsonld_context_parser_1.ERROR_CODES.INVALID_ANNOTATION));
}
// Pass the annotations buffer up one level in the stack
const annotationsBufferParent = this.parsingContext.getAnnotationsBufferSafe(depth - 1);
for (const annotation of annotationsBuffer) {
annotationsBufferParent.push(annotation);
}
delete this.parsingContext.annotationsBuffer[depth];
}
}
/**
* Check if at least one {@link IEntryHandler} validates the entry to true.
* @param {any[]} keys A stack of keys.
* @param {number} depth A depth.
* @param {boolean} inProperty If the current depth is part of a valid property node.
* @return {Promise<{ valid: boolean, property: boolean }>} A promise resolving to true or false.
*/
async validateKey(keys, depth, inProperty) {
for (const entryHandler of JsonLdParser.ENTRY_HANDLERS) {
if (await entryHandler.validate(this.parsingContext, this.util, keys, depth, inProperty)) {
return { valid: true, property: inProperty || entryHandler.isPropertyHandler() };
}
}
return { valid: false, property: false };
}
/**
* Attach all required listeners to the JSON parser.
*
* This should only be called once.
*/
attachJsonParserListeners() {
// Listen to json parser events
this.jsonParser.onValue = (value) => {
const depth = this.jsonParser.stack.length;
const keys = (new Array(depth + 1).fill(0)).map((v, i) => {
return i === depth ? this.jsonParser.key : this.jsonParser.stack[i].key;
});
if (!this.isParsingContextInner(depth)) { // Don't parse inner nodes inside @context
const valueJobCb = () => this.newOnValueJob(keys, value, depth, true);
if (!this.parsingContext.streamingProfile
&& !this.parsingContext.contextTree.getContext(keys.slice(0, -1))) {
// If an out-of-order context is allowed,
// we have to buffer everything.
// We store jobs for @context's and @type's separately,
// because at the end, we have to process them first.
// We also handle @type because these *could* introduce a type-scoped context.
if (keys[depth] === '@context') {
let jobs = this.contextJobs[depth];
if (!jobs) {
jobs = this.contextJobs[depth] = [];
}
jobs.push(valueJobCb);
}
else {
this.contextAwaitingJobs.push({ job: valueJobCb, keys, depth });
}
}
else {
// Make sure that our value jobs are chained synchronously
this.lastOnValueJob = this.lastOnValueJob.then(valueJobCb);
}
// Execute all buffered jobs on deeper levels
if (!this.parsingContext.streamingProfile && depth === 0) {
this.lastOnValueJob = this.lastOnValueJob
.then(() => this.executeBufferedJobs());
}
}
};
this.jsonParser.onError = (error) => {
this.emit('error', error);
};
}
/**
* Check if the parser is currently parsing an element that is part of an @context entry.
* @param {number} depth A depth.
* @return {boolean} A boolean.
*/
isParsingContextInner(depth) {
for (let i = depth; i > 0; i--) {
if (this.jsonParser.stack[i - 1].key === '@context') {
return true;
}
}
return false;
}
/**
* Execute all buffered jobs.
* @return {Promise<void>} A promise resolving if all jobs are finished.
*/
async executeBufferedJobs() {
// Handle context jobs
for (const jobs of this.contextJobs) {
if (jobs) {
for (const job of jobs) {
await job();
}
}
}
// Clear the keyword cache.
this.parsingContext.unaliasedKeywordCacheStack.splice(0);
const contextAwaitingJobs = [];
for (const job of this.contextAwaitingJobs) {
if ((await this.util.unaliasKeyword(job.keys[job.depth], job.keys, job.depth, true)) === '@type'
|| typeof job.keys[job.depth] === 'number' && (await this.util.unaliasKeyword(job.keys[job.depth - 1], job.keys, job.depth - 1, true)) === '@type') { // Also capture @type with array values
// Remove @type from keys, because we want it to apply to parent later on
this.typeJobs.push({ job: job.job, keys: job.keys.slice(0, job.keys.length - 1) });
}
else {
contextAwaitingJobs.push(job);
}
}
// Handle non-context jobs
for (const job of contextAwaitingJobs) {
// Check if we have a type (with possible type-scoped context) that should be handled before.
// We check all possible parent nodes for the current job, from root to leaves.
if (this.typeJobs.length > 0) {
// First collect all applicable type jobs
const applicableTypeJobs = [];
const applicableTypeJobIds = [];
for (let i = 0; i < this.typeJobs.length; i++) {
const typeJob = this.typeJobs[i];
if (Util_1.Util.isPrefixArray(typeJob.keys, job.keys)) {
applicableTypeJobs.push(typeJob);
applicableTypeJobIds.push(i);
}
}
// Next, sort the jobs from short to long key length (to ensure types higher up in the tree to be handled first)
const sortedTypeJobs = applicableTypeJobs.sort((job1, job2) => job1.keys.length - job2.keys.length);
// Finally, execute the jobs in order
for (const typeJob of sortedTypeJobs) {
await typeJob.job();
}
// Remove the executed type jobs
// Sort first, so we can efficiently splice
const sortedApplicableTypeJobIds = applicableTypeJobIds.sort().reverse();
for (const jobId of sortedApplicableTypeJobIds) {
this.typeJobs.splice(jobId, 1);
}
}
await job.job();
}
}
}
exports.JsonLdParser = JsonLdParser;
JsonLdParser.DEFAULT_PROCESSING_MODE = '1.1';
JsonLdParser.ENTRY_HANDLERS = [
new EntryHandlerArrayValue_1.EntryHandlerArrayValue(),
new EntryHandlerKeywordContext_1.EntryHandlerKeywordContext(),
new EntryHandlerKeywordId_1.EntryHandlerKeywordId(),
new EntryHandlerKeywordIncluded_1.EntryHandlerKeywordIncluded(),
new EntryHandlerKeywordGraph_1.EntryHandlerKeywordGraph(),
new EntryHandlerKeywordNest_1.EntryHandlerKeywordNest(),
new EntryHandlerKeywordType_1.EntryHandlerKeywordType(),
new EntryHandlerKeywordValue_1.EntryHandlerKeywordValue(),
new EntryHandlerKeywordAnnotation_1.EntryHandlerKeywordAnnotation(),
new EntryHandlerContainer_1.EntryHandlerContainer(),
new EntryHandlerKeywordUnknownFallback_1.EntryHandlerKeywordUnknownFallback(),
new EntryHandlerPredicate_1.EntryHandlerPredicate(),
new EntryHandlerInvalidFallback_1.EntryHandlerInvalidFallback(),
];
//# sourceMappingURL=JsonLdParser.js.map
;