UNPKG

@graphy/content.nq.read

Version:

Single-threaded RDF N-Quads content reader

1,338 lines (1,112 loc) 35.7 kB
// queueMicrotask shim { // not defined or not a function if('function' !== typeof queueMicrotask) { // create resolved promise let dp_resolve = Promise.resolve(); // try to redefine try { // eslint-disable-next-line no-global-assign queueMicrotask = fk => dp_resolve.then(fk) .catch(e_callback => setTimeout(() => { throw e_callback; }, 0)); } // oh well, at least we tried catch(e_define) {} } } const stream = require('@graphy/core.iso.stream'); const factory = require('@graphy/core.data.factory'); const RT_ABSOLUTE_IRI_VALID = /^[a-z][a-z0-9+\-.]*:(?:[^\0-\x20<>"{}|^`\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/; const RT_ABSOLUTE_IRI_ESCAPELESS_VALID = /^[a-z][a-z0-9+\-.]*:[^\0-\x20<>"{}|^`]*$/; const RT_NAMED_NODE_VALID = /^([^\0-\x20<>"{}|^`\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/; const RT_NAMED_NODE_ESCAPELESS_VALID = /^([^\0-\x20<>"{}|^`])*$/; const R_UNICODE_ANY = /\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8})/g; const F_REPLACE_UNICODE_ANY = (s_, s_4, s_8) => String.fromCodePoint(parseInt(s_4 || s_8, 16)); const R_CLEAN = /\s*(?:#[^\n]*\n\s*)*\s*/y; const R_CLEAN_COMMENTS = /\s*(#[^\n]*\n\s*)*\s*/y; const RT_HAS_ESCAPES = /[\\]/; const R_EOL = /[^\n]+\n/y; // eslint-disable-next-line no-misleading-character-class const RT_BLANK_NODE_LABEL_VALID = /^(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_0-9])(?:(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}.])*[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}])?$/u; const RT_LANGUAGE_VALID = /^[a-z]+(-[a-z0-9]+)*$/; const R_WS = /\s*/y; const R_HWS = /[ \t]*/y; const R_LANGTAG = /@([A-Za-z]+(?:-[A-Za-z0-9-]+)*)(?:\s+|(?=[.,;\])#]))/y; const R_IRIREF = /<([^>]*)>\s*/y; const F_REPLACE_STRLIT_CONTENTS = (s_, s_whitespace, s_auto, s_4, s_8, s_invalid) => { if(s_whitespace) { switch(s_whitespace) { case 't': return '\t'; case 'n': return '\n'; case 'r': return '\r'; case 'f': return '\f'; case 'b': return '\b'; default: { console.assert(`bad regex escape char mapping: '${s_whitespace}'`); } } } else if(s_auto) { return s_auto; } else if(s_4) { return String.fromCodePoint(parseInt(s_4, 16)); } else if(s_8) { return String.fromCodePoint(parseInt(s_8, 16)); } else if(s_invalid) { // pointless escape if('\\' === s_invalid[0]) { // // relaxed // return s_invalid[1]; // if relaxed then return s_invalid, otherwise throw: throw new Error(`expected string_literal but invalid escape sequence within contents: '${s_invalid}'. failed to parse a valid token`); } // bad character else { throw new Error(`expected string_literal but invalid whitespace character within contents: ${JSON.stringify(s_invalid)}. failed to parse a valid token`); } } else { console.assert(`unexpected no match branch in escape sequence replace callback`); } }; const R_STRLIT_SHORT_CONTENTS_ESCAPES_HARD = /(?:\\(?:([tnrfb])|([\\"'])|u([0-9A-Fa-f]{4})|U([0-9A-Fa-f]{8}))|([\r\n]|\\.))/g; const R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT = /(?:\\(?:([tnrfb])|([\\"'])|u([0-9A-Fa-f]{4})|U([0-9A-Fa-f]{8}))|([\r\n]|\\[^uU]|\\u[^]{4}|\\U[^]{8}))/g; const unescape_literal_short_hard = s_literal => s_literal .replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_HARD, F_REPLACE_STRLIT_CONTENTS); const unescape_literal_short_soft = (s_literal) => { let m_incomplete = R_STRLIT_ESCAPE_INCOMPLETE.exec(s_literal); // incomplete escape if(m_incomplete) { let i_safe = m_incomplete.index; // rewind return [ s_literal.slice(0, i_safe) .replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT, F_REPLACE_STRLIT_CONTENTS), s_literal.slice(i_safe), ]; } // done else { return [ s_literal .replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT, F_REPLACE_STRLIT_CONTENTS), '', ]; } }; // lookbehind regexes const [ R_STRLIT_ESCAPE_INCOMPLETE, R_STRLIT_SHORT_DOUBLE_TERM, ] = (() => { function RegExp_$lookbehind_polyfill(s_input) { let m_match = RegExp.prototype.exec.call(this, s_input); if(m_match) { let i_start = m_match[0].length - m_match[1].length; m_match.index += i_start; m_match[0] = m_match[0].slice(i_start); } return m_match; } let mk_lookbehind_regex = (() => { try { new RegExp('(?<!h)i'); // eslint-disable-line no-new } catch(e_compile) { return (f_lookbehind, r_polyfill, f_polyfill) => { r_polyfill.exec = f_polyfill; return r_polyfill; }; } return f_lookbehind => f_lookbehind(); })(); return [ // R_STRLIT_ESCAPE_INCOMPLETE mk_lookbehind_regex( () => new RegExp('(?<!(?:[^\\\\]|^)(?:\\\\\\\\)*\\\\)\\\\(|u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})$'), /^(?:(?:[^\\]|\\.)*)(\\(?:|u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7}))$/, function RegExp_$lookbehind_polyfill_n(s_input) { let m_match = RegExp.prototype.exec.call(this, s_input); if(m_match) { m_match.index += m_match[0].length - m_match[1].length; } return m_match; }, ), // R_STRLIT_SHORT_DOUBLE_TERM mk_lookbehind_regex( () => new RegExp('(?<!(?:[^\\\\]|^)(?:\\\\\\\\)*\\\\)"\\s*', 'g'), /(?:[^\\"]|\\.)*("\s*)/y, RegExp_$lookbehind_polyfill, ), ]; })(); const R_QUAD_ESCAPELESS_SP = /(?:<([^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^\\>]*)>[\x20\t]*(?:(?:(<[^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+|"([^"\\]*)(?:(")(?:\^\^<([^\\>]*)>|@([^\x20\t.]+)|)[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+)?)/y; const R_QUAD = /(?:<([^>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^>]*)>[\x20\t]*(?:(?:(<[^>]*)>|_:([^\x20\t<]+))[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+|"((?:[^"\\]|\\.)*)(?:(")(?:\^\^<([^>]*)>|@([^\x20\t.]+)|)[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+)?)/y; const R_BLANK_NODE = /_:([^\x20\t<]+)/y; class NQuads_Reader extends stream.Transform { constructor(g_impls) { super({ // do not decode strings into buffers decodeStrings: false, // accept strings as input on writable side writableObjectMode: false, // output quad objects on readable side readableObjectMode: true, // implementations flush: g_impls.flush, transform: g_impls.transform, }); // when the writable side is piped into this.on('pipe', (ds_input) => { this._ds_input = ds_input; // input stream has encoding option; ensure stream encoding is utf8 if('function' === typeof ds_input.setEncoding) { ds_input.setEncoding('utf8'); } }); } // intercept pipe pipe(ds_out) { let ds_dst = ds_out; // non-object mode if(!ds_dst._writableState.objectMode) { // transform to JSON ds_out = stream.quads_to_json(); } // yet object mode and graphy writable else if(ds_out.isGraphyWritable) { // transform to quad-stream ds_out = stream.quads_to_writable(); } // interim stream created if(ds_out !== ds_dst) { // forward output to super super.pipe(ds_out); // pipe outpu to destination return ds_out.pipe(ds_dst); } // forward as-is to super else { return super.pipe(ds_dst); } } } class Reader { constructor(g_config) { let { // input medium input: g_input=null, // relax validation relax: b_relax=false, // debug debug: b_debug=false, } = g_config; // allow relative iris flag let b_allow_relative_iris = g_config.allow_relative_iris || g_config.allowRelativeIRIs || g_config.allowRelativeIris || false; // adopt factory let dc_factory = this._dc_factory = factory.adopt(g_config.dataFactory || g_config.data_factory || factory.unfiltered); let f_quad = this._f_quad = dc_factory.quad; // fields Object.assign(this, { // string buffer, accept left-over string from previous data chunk s: g_config.prepend || '', // string buffer length n: 0, _b_debug: b_debug, _b_relax: b_relax, _b_destroyed: false, _b_trim_start: true, _f_state: this.statement, _kt_subject: null, _kt_predicate: null, _kt_object: null, _s_literal: '', }); this._kt_default_graph = dc_factory.defaultGraph(); this._kt_rdfs_lang_string = dc_factory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString'); // clean regex let r_clean = this._r_clean = R_CLEAN; if(g_config.relaxed) { console.warn((new Error(`no such option 'relaxed'; did you mean 'relax' ?`)).stack.replace(/^Error:/, 'Warning:')); } if('validate' in g_config) { console.warn((new Error(`option 'validate' has been removed and validation is now on by default. Use 'relax' option if you wish to disable validation.`)).stack.replace(/^Error:/, 'Warning:')); } let namedNode = dc_factory.namedNode; let blankNode = dc_factory.blankNode; let languagedLiteral = dc_factory.languagedLiteral; // test for valid named node let rt_named_node_valid = b_allow_relative_iris? RT_NAMED_NODE_VALID: RT_ABSOLUTE_IRI_VALID; // test for valid named node escapeless let rt_named_node_valid_escapeless = b_allow_relative_iris? RT_NAMED_NODE_ESCAPELESS_VALID: RT_ABSOLUTE_IRI_ESCAPELESS_VALID; // validation let k_self = this; Object.assign(this, !b_relax ? { create_named_node(p_iri) { if(!rt_named_node_valid.test(p_iri)) return k_self._error(`invalid IRI: "${p_iri}"`); return namedNode(p_iri); }, create_named_node_escapeless(p_iri) { if(!rt_named_node_valid_escapeless.test(p_iri)) return k_self._error(`invalid IRI: "${p_iri}"`); return namedNode(p_iri); }, create_blank_node(s_label) { if(!RT_BLANK_NODE_LABEL_VALID.test(s_label)) return k_self._error(`Invalid blank node label: "${s_label}"`); return blankNode(s_label); }, create_languaged_literal(s_contents, s_language) { if(!RT_LANGUAGE_VALID.test(s_language)) { return k_self._error(`Invalid literal language tag: ${s_language}`); } return languagedLiteral(s_contents, s_language); }, } : { create_named_node: namedNode, create_named_node_escapeless: namedNode, create_blank_node: blankNode, create_languaged_literal: languagedLiteral, }); // transform stream let ds_transform; // whether or not data has been received before let b_init = false; // create transform ds_transform = this.transform = new NQuads_Reader({ // on data event transform: (s_chunk, s_encoding, fk_chunk) => { // first transform if(!b_init) { // notify that data will begin ds_transform.emit('ready'); // do not emit 'ready' event again b_init = false; } // concatenate current chunk to previous chunk let s = this.s += s_chunk; // remove whitespace & comments from beginning if(this._b_trim_start) { r_clean.lastIndex = 0; let m_clean = r_clean.exec(s); if(this.emit_comments) { this.emit_comments(m_clean[1]); } // update index and prepare to match statement this.i = r_clean.lastIndex; } // do not remove whitespace; reset index else { this.i = 0; } // cache chunk length this.n = s.length; // resume parsing try { this.parse(true); } // read error occurred; emit and destroy stream catch(e_read) { return ds_transform.destroy(e_read); } // emit progress event updates ds_transform.emit('progress', s_chunk.length); // done transforming this chunk fk_chunk(); }, // once there's no more data to consume, invoke eof flush: (fk_flush) => { // there is still unparsed data if(this.s.length) { // append newline to end so we can match token this.s += '\n'; // remove whitespace & comments from beginning if(this._b_trim_start) { r_clean.lastIndex = 0; let m_clean = r_clean.exec(this.s); if(this.emit_comments) { this.emit_comments(m_clean[1]); } // update index and prepare to match statement this.i = r_clean.lastIndex; } // do not remove whitespace; reset index else { this.i = 0; } // parse try { this.parse(); } // read error occurred; pass to flush errback and exit method catch(e_read) { // destroying during flush means overriding push return ds_transform.demolish(e_read); } // still unparsed characters; pass to flush errback and exit method if(this.s.length) { return ds_transform.demolish(new Error(`parsing error occurred in state: statement\n ${this.s.substr(0, 50)}\n ^ starting here`)); } } // invalid state if(this._f_state !== this.statement) { return ds_transform.demolish(new Error(`parsing error occurred in state: ${this._f_state.name}\n ${this.s.substr(0, 50)}\n ^ starting here`)); } // make buffer's alloc eligible for gc this.s = null; // final progress update: no additional bytes were read ds_transform.emit('progress', 0); // call end event listener ds_transform.emit('eof'); // done flushing, close read stream fk_flush(); }, }); // destroy ds_transform._destroy = (...a_args) => { this.destroy(...a_args); }; // data quad this._f_data_quad = (kt_subject, kt_predicate, kt_object, kt_graph) => ds_transform.push(f_quad(kt_subject, kt_predicate, kt_object, kt_graph)); // new listener added ds_transform.on('newListener', (s_event) => { // comment if('comment' === s_event) { r_clean = R_CLEAN_COMMENTS; this.emit_comments = (s_captured) => { if(!s_captured) return; let a_comments = s_captured.slice(1).replace(/\n\s+$/, '').split(/\n+\s*#/g); for(let s_comment of a_comments) { ds_transform.emit('comment', s_comment); } }; } }); // bind events to transform stream this.bind(g_config); // input given if(g_input) { // input is stream if(g_input.stream) { let ds_input = g_input.stream; // go async so caller has chance to bind event listeners queueMicrotask(() => { ds_input.pipe(ds_transform); }); } // string else if('string' === typeof g_input.string) { let s_input = g_input.string; // go async so caller has chance to bind event listeners queueMicrotask(() => { ds_transform.end(s_input, 'utf8'); }); } // invalid arg else { throw new TypeError(`Invalid argument for input parameter: ${'object' === typeof g_input? JSON.stringify(g_input): g_input}`); } } ds_transform._graphy_reader = this; } _error(s_message) { this._b_destroyed = true; throw new Error(s_message); } bind(g_config) { let ds_transform = this.transform; if(g_config.error) ds_transform.on('error', g_config.error); if(g_config.comment) ds_transform.on('comment', g_config.comment); if(g_config.read) ds_transform.once('read', g_config.read); if(g_config.progress) ds_transform.on('progress', g_config.progress); if(g_config.eof) ds_transform.once('eof', g_config.eof); if(g_config.end) ds_transform.once('end', g_config.end); if(g_config.finish) ds_transform.once('finish', g_config.finish); if(g_config.data) ds_transform.on('data', g_config.data); } // begin parsing, keep applying until no more stack bail-outs parse() { let f_sync = this._f_state(); while('function' === typeof f_sync) { f_sync = f_sync.apply(this); } } statement() { let s = this.s; let n = this.n; let i = this.i; let f_data_quad = this._f_data_quad; let create_named_node = this.create_named_node; let create_named_node_escapeless = this.create_named_node_escapeless; let create_languaged_literal = this.create_languaged_literal; let create_blank_node = this.create_blank_node; let simpleLiteral = this._dc_factory.simpleLiteral; let datatypedLiteral = this._dc_factory.datatypedLiteral; let kt_default_graph = this._kt_default_graph; // match triples/quads for(;;) { // prepare sticky regex index R_QUAD_ESCAPELESS_SP.lastIndex = i; // execute regex let m_statement_e_sp = R_QUAD_ESCAPELESS_SP.exec(s); // regex was a match if(m_statement_e_sp) { // advance index i = R_QUAD_ESCAPELESS_SP.lastIndex; // prep object term let kt_object; // where to find the graph component let b_graph_late = false; // object term type is named node if(m_statement_e_sp[4]) { let p_object = m_statement_e_sp[4].slice(1); kt_object = create_named_node_escapeless(p_object); } // object term type is blank node else if(m_statement_e_sp[5]) { kt_object = create_blank_node(m_statement_e_sp[5]); } // object term type is literal else { // graph is in late capture group b_graph_late = true; // contents let s_contents = m_statement_e_sp[9]; // string terminator if(m_statement_e_sp[10]) { // datatype is present if(m_statement_e_sp[11]) { // create datatype term let kt_datatype = this.create_named_node_escapeless(m_statement_e_sp[11]); // create object term kt_object = datatypedLiteral(s_contents, kt_datatype); } // language tag is present else if(m_statement_e_sp[12]) { // normalize language let s_language = m_statement_e_sp[12].toLowerCase(); // create object term kt_object = create_languaged_literal(s_contents, s_language); } // simple literal else { kt_object = simpleLiteral(s_contents); } } // no string terminator else { // save contents this._s_literal = s_contents; // update index this.i = i; // save subject { let s_subject = m_statement_e_sp[1]; // named node if(s_subject || 'string' === typeof s_subject) { this._kt_subject = create_named_node_escapeless(s_subject); } // blank node else { this._kt_subject = create_blank_node(m_statement_e_sp[2]); } } // save predicate this._kt_predicate = create_named_node_escapeless(m_statement_e_sp[3]); // parse contents let z_bail = this.strlit_contents(); // bail out of stack if(z_bail && this.statement !== z_bail) { return z_bail; } // statement completed else { // clean let r_clean = this._r_clean; r_clean.lastIndex = this.i; let m_clean = r_clean.exec(s); if(this.emit_comments) { this.emit_comments(m_clean[1]); } // update local index and prepare to match next statement i = r_clean.lastIndex; // resume continue; } } } let kt_graph = kt_default_graph; // graph after literal if(b_graph_late) { // ref capture group let s_graph = m_statement_e_sp[13]; // named node if(s_graph || 'string' === typeof s_graph) { kt_graph = create_named_node_escapeless(s_graph); } // blank node else if(m_statement_e_sp[14]) { kt_graph = create_blank_node(m_statement_e_sp[14]); } } // graph after node else { // ref capture group let s_graph = m_statement_e_sp[6]; // named node if(s_graph || 'string' === typeof s_graph) { kt_graph = create_named_node_escapeless(s_graph); } // blank node else if(m_statement_e_sp[7]) { kt_graph = create_blank_node(m_statement_e_sp[7]); } } let kt_subject; { let s_subject = m_statement_e_sp[1]; // named node if(s_subject || 'string' === typeof s_subject) { kt_subject = create_named_node_escapeless(s_subject); } // blank node else { kt_subject = create_blank_node(m_statement_e_sp[2]); } } let s_predicate = m_statement_e_sp[3]; // emit data event f_data_quad( kt_subject, create_named_node_escapeless(s_predicate), kt_object, kt_graph, ); // comments if(this.emit_comments) { this.emit_comments(m_statement_e_sp[8] || m_statement_e_sp[15]); } } else { // prepare sticky regex index R_QUAD.lastIndex = i; // execute regex let m_statement = R_QUAD.exec(s); // regex was a match if(m_statement) { // advance index i = R_QUAD.lastIndex; // prep object term let kt_object; // where to find the graph component let b_graph_late = false; // object term type is named node if(m_statement[4]) { let p_object = m_statement[4].slice(1); kt_object = create_named_node(RT_HAS_ESCAPES.test(p_object)? p_object.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): p_object); } // object term type is blank node else if(m_statement[5]) { kt_object = create_blank_node(RT_HAS_ESCAPES.test(m_statement[5])? m_statement[5].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): m_statement[5]); } // object term type is literal else { // graph is in late capture group b_graph_late = true; // contents let s_contents = m_statement[9]; // string terminator if(m_statement[10]) { // unescape contents s_contents = unescape_literal_short_hard(s_contents); // datatype is present if(m_statement[11]) { // create datatype term let kt_datatype = this.create_named_node(m_statement[11]); // create object term kt_object = datatypedLiteral(s_contents, kt_datatype); } // language tag is present else if(m_statement[12]) { // normalize language let s_language = m_statement[12].toLowerCase(); // create object term kt_object = create_languaged_literal(s_contents, s_language); } // simple literal else { kt_object = simpleLiteral(s_contents); } } // no string terminator else { // save contents this._s_literal = s_contents; // update index this.i = i; // save subject { let s_subject = m_statement[1]; // named node if(s_subject || 'string' === typeof s_subject) { this._kt_subject = create_named_node(RT_HAS_ESCAPES.test(s_subject)? s_subject.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_subject); } // blank node else { this._kt_subject = create_blank_node(m_statement[2]); } } // save predicate this._kt_predicate = create_named_node(RT_HAS_ESCAPES.test(m_statement[3])? m_statement[3].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): m_statement[3]); // parse contents let z_bail = this.strlit_contents(); // bail out of stack if(z_bail && this.statement !== z_bail) { return z_bail; } // statement completed else { // clean let r_clean = this._r_clean; r_clean.lastIndex = this.i; let m_clean = r_clean.exec(s); if(this.emit_comments) { this.emit_comments(m_clean[1]); } // update local index and prepare to match next statement i = r_clean.lastIndex; // resume continue; } } } let kt_graph = kt_default_graph; // graph after literal if(b_graph_late) { // ref capture group let s_graph = m_statement[13]; // named node if(s_graph || 'string' === typeof s_graph) { kt_graph = create_named_node(RT_HAS_ESCAPES.test(s_graph)? s_graph.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_graph); } // blank node else if(m_statement[14]) { kt_graph = create_blank_node(m_statement[14]); } } // graph after node else { // ref capture group let s_graph = m_statement[6]; // named node if(s_graph || 'string' === typeof s_graph) { kt_graph = create_named_node(RT_HAS_ESCAPES.test(s_graph)? s_graph.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_graph); } // blank node else if(m_statement[7]) { kt_graph = create_blank_node(m_statement[7]); } } let kt_subject; { let s_subject = m_statement[1]; // named node if(s_subject || 'string' === typeof s_subject) { kt_subject = create_named_node(RT_HAS_ESCAPES.test(s_subject)? s_subject.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_subject); } // blank node else { kt_subject = create_blank_node(m_statement[2]); } } let s_predicate = m_statement[3]; // emit data event f_data_quad( kt_subject, create_named_node(RT_HAS_ESCAPES.test(s_predicate)? s_predicate.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_predicate), kt_object, kt_graph, ); // comments if(this.emit_comments) { this.emit_comments(m_statement[8] || m_statement[15]); } } else { // prepare sticky regex index R_EOL.lastIndex = i; if(R_EOL.exec(s)) { // advance index i = R_EOL.lastIndex; this._error(`Failed to read statement:\n\`${s.substr(i, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`); // match counter: 2 } else { // break loop to retry on next chunk if eos break; } } // brace #2 } // brace #1 } // end of while // update unparsed data string this.s = s.substr(i); // resume here this._f_state = this.statement; // exit return 1; } strlit_contents() { let {s, n, i} = this; // try to find end R_STRLIT_SHORT_DOUBLE_TERM.lastIndex = i; let m_term = R_STRLIT_SHORT_DOUBLE_TERM.exec(s); // end is in this chunk if(m_term) { // index of terminator let i_term = m_term.index; // extract dirty potion let s_dirty = s.slice(i, i_term); // clean and save this._s_literal += unescape_literal_short_hard(s_dirty); // advance index beyond terminator this.i = i_term + m_term[0].length; // resume eating whitespace at start of next chunk this._b_trim_start = true; // proceed with datatype_or_lang, then bail out of stack or resume parsing return this.datatype_or_langtag() || this.statement; } // end is not in this chunk else { // extract whole portion let s_dirty = s.slice(i); // unescape to clean part let [s_clean, s_incomplete] = unescape_literal_short_soft(s_dirty); // save this._s_literal += s_clean; // set unparsed index this.i = i = n - s_incomplete.length; // do not eat whitespace at start of next chunk this._b_trim_start = false; } // not yet eos if(i < this.n) { // expected token was not found if(0 === i) { // we've exceeded the maximum token length if(this.n > this.max_token_length) { return this.parse_error('strlit_contents'); } } } // resume here this._f_state = this.strlit_contents; // store what is unparsed this.s = s.slice(i); // if we're not parsing a stream, then this is an error if(this.eos) this.eos(); return 1; } // parse state for datatype_or_langtag datatype_or_langtag() { // destruct chunk, length, and index let {s, n, i} = this; // ref character let x = s[i]; while(i < n) { // eslint-disable-line no-unmodified-loop-condition // datatype if('^' === x) { // enough to speculate datatype if((i+2) < n) { // correct token if('^' === s[i+1]) { // advance index beyond token R_IRIREF.lastIndex = i + 2; // execute regex let m_iriref = R_IRIREF.exec(s); // regex was a match if(m_iriref) { // advance index this.i = R_IRIREF.lastIndex; // prepare iri let p_datatype = m_iriref[1].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY); // create datatype term let kt_datatype = this.create_named_node(p_datatype); // create object term this._kt_object = this._dc_factory.datatypedLiteral(this._s_literal, kt_datatype); // free literal string this._s_literal = ''; // graph state return this.post_object(); } // failed to match; try again next chunk else { break; } } // invalid else { this._error(`Failed to read token after literal:\n\`${s.substr(i+1, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`); } } // not enough to speculate; try again next chunk else { break; } } // language tag else if('@' === x) { // prepare sticky regex index R_LANGTAG.lastIndex = i; // execute regex let m_langtag = R_LANGTAG.exec(s); // regex was a match if(m_langtag) { // advance index this.i = R_LANGTAG.lastIndex; // use direct factory method since regex is validation this._kt_object = this._dc_factory.languagedLiteral(this._s_literal, m_langtag[1]); // free literal string this._s_literal = ''; // graph state return this.post_object(); } // interrupted by eos; try again next chunk else { break; } } // graph component else if('<' === x || '_' === x) { // save simple literal this._kt_object = this._dc_factory.simpleLiteral(this._s_literal); // free literal string this._s_literal = ''; // continue parsing graph component return this.graph(); } // triple terminator else if('.' === x) { // save simple literal let kt_object = this._dc_factory.simpleLiteral(this._s_literal); // free literal string this._s_literal = ''; // advance index beyond terminator this.i = i + 1; // emit data event this._f_data_quad(this._kt_subject, this._kt_predicate, kt_object, this._kt_default_graph); // reset state return this.statement; // // consume whitespace (and incidentally reset index) // R_WS.lastIndex = i + 1; // R_WS.exec(s); // this.i = R_WS.lastIndex; // // done // return; } // other else { break; } } // ran out of characters // update index value this.i = i; // not yet eos if(i < this.n) { // expected token was not found if(0 === i) { // we've exceeded the maximum token length if(this.n > this.max_token_length) { return this.parse_error('datatype_or_langtag'); } } } // resume here this._f_state = this.datatype_or_langtag; // store what is unparsed this.s = s.slice(i); // if we're not parsing a stream, then this is an error if(this.eos) this.eos(); return 1; } statement_term() { let {s, n, i} = this; // find full stop let i_stop = s.indexOf('.', i); // found if(i_stop > -1) { // consume whitespace again this._b_trim_start = true; // advance beyond token this.i = i_stop + 1; // reset state return this.statement; } // anything other than whitespace else if(!/^\s*$/.test(s.slice(i))) { this.parse_error('statement_term'); } // do not consume whitespace this._b_trim_start = false; // resume here this._f_state = this.statement_term; // store what is unparsed this.s = s.slice(i); // if we're not parsing a stream, then this is an error if(this.eos) this.eos(); return 1; } post_object() { let {s, n, i} = this; // eat horizontal whitespace R_HWS.lastIndex = i; R_HWS.exec(s); i = R_HWS.lastIndex; // ran out of characters if(i >= n) { // resume here this._f_state = this.post_object; // store what is unparsed this.s = s.slice(i); // if we're not parsing a stream, then this is an error if(this.eos) this.eos(); return 1; } // depending on char switch(s[i]) { // statement term case '.': { // advance index beyond terminator this.i = i + 1; // emit data event this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, this._kt_default_graph); // reset state return this.statement; } // graph case '<': case '_': { // save index this.i = i; // consume graph component return this.graph(); } // invalid default: { // save index this.i = i; // emit parsing error this.parse_error('post_object'); } } } graph() { let {s, n, i} = this; // prepare sticky regex index R_IRIREF.lastIndex = i; // execute regex let m_iriref = R_IRIREF.exec(s); // regex was a match if(m_iriref) { // advance index this.i = R_IRIREF.lastIndex; // create graph term let kt_graph = this.create_named_node(m_iriref[1]); // emit data event this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, kt_graph); // complete with statement_term return this.statement_term(); } else { // prepare sticky regex index R_BLANK_NODE.lastIndex = i; // execute regex let m_blank = R_BLANK_NODE.exec(s); // regex was a match if(m_blank) { // advance index this.i = R_BLANK_NODE.lastIndex; // create graph term let kt_graph = this._dc_factory.blankNode(m_blank[1]); // emit data event this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, kt_graph); // complete with statement_term return this.statement_term(); } } // brace #1 // resume here this._f_state = this.graph; // store what is unparsed this.s = s.slice(i); // if we're not parsing a stream, then this is an error if(this.eos) this.eos(); return 1; } parse_error(s_state) { return this._error(`Failed to read ${s_state}:\n\`${this.s.substr(this.i, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`); } destroy(e_destroy) { this._f_data_quad = () => {}; if(!e_destroy && this._ds_input) { this._ds_input.destroy(e_destroy); } this.transform.demolish(e_destroy); } } module.exports = function(...a_args) { let g_config = {}; // at least one argument if(a_args.length) { let z_arg_0 = a_args[0]; // input given unspecified if(z_arg_0 && z_arg_0.input && 'undefined' === typeof z_arg_0.input.string && !z_arg_0.input.stream) { z_arg_0 = z_arg_0.input; } // string if('string' === typeof z_arg_0) { g_config.input = {string:z_arg_0}; } // null else if(null === z_arg_0) { g_config.input = null; } // node stream else if('function' === typeof z_arg_0.setEncoding) { g_config.input = {stream:z_arg_0}; } // whatwg stream else if('function' === typeof z_arg_0.pipeTo) { throw new TypeError(`Sorry, WHATWG streams are currently not supported :(`); // g_config.input = {stream:z_arg_0}; } // config struct else if(z_arg_0 && 'object' === typeof z_arg_0 && '[object Object]' === Object.prototype.toString.call(z_arg_0)) { g_config = z_arg_0; // more args; invalid if(a_args.length > 1) { throw new TypeError(`unexpected argument(s) after config struct: ${a_args.slice(1)}`); } } // unknown else { throw new TypeError(`unexpected input type: ${z_arg_0}`); } // more args if(a_args.length > 1) { // copy onto struct Object.assign(g_config, a_args[1]); // more args if(a_args.length > 2) { throw new TypeError(`unexpected argument(s) after input and config struct: ${a_args.slice(2)}`); } } } // create reader, return transform stream return (new Reader(g_config)).transform; };