@graphy/content.nq.read
Version:
Single-threaded RDF N-Quads content reader
1,338 lines (1,112 loc) • 35.7 kB
JavaScript
// queueMicrotask shim
{
// not defined or not a function
if('function' !== typeof queueMicrotask) {
// create resolved promise
let dp_resolve = Promise.resolve();
// try to redefine
try {
// eslint-disable-next-line no-global-assign
queueMicrotask = fk => dp_resolve.then(fk)
.catch(e_callback => setTimeout(() => {
throw e_callback;
}, 0));
}
// oh well, at least we tried
catch(e_define) {}
}
}
const stream = require('@graphy/core.iso.stream');
const factory = require('@graphy/core.data.factory');
const RT_ABSOLUTE_IRI_VALID = /^[a-z][a-z0-9+\-.]*:(?:[^\0-\x20<>"{}|^`\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/;
const RT_ABSOLUTE_IRI_ESCAPELESS_VALID = /^[a-z][a-z0-9+\-.]*:[^\0-\x20<>"{}|^`]*$/;
const RT_NAMED_NODE_VALID = /^([^\0-\x20<>"{}|^`\\]|\\u[A-Fa-f0-9]{4}|\\U[A-Fa-f0-9]{8})*$/;
const RT_NAMED_NODE_ESCAPELESS_VALID = /^([^\0-\x20<>"{}|^`])*$/;
const R_UNICODE_ANY = /\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8})/g;
const F_REPLACE_UNICODE_ANY = (s_, s_4, s_8) => String.fromCodePoint(parseInt(s_4 || s_8, 16));
const R_CLEAN = /\s*(?:#[^\n]*\n\s*)*\s*/y;
const R_CLEAN_COMMENTS = /\s*(#[^\n]*\n\s*)*\s*/y;
const RT_HAS_ESCAPES = /[\\]/;
const R_EOL = /[^\n]+\n/y;
// eslint-disable-next-line no-misleading-character-class
const RT_BLANK_NODE_LABEL_VALID = /^(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_0-9])(?:(?:[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}.])*[A-Za-z\xc0-\xd6\xd8-\xf6\xf8-\u{02ff}\u{0370}-\u{037d}\u{037f}-\u{1fff}\u{200c}-\u{200d}\u{2070}-\u{218f}\u{2c00}-\u{2fef}\u{3001}-\u{d7ff}\u{f900}-\u{fdcf}\u{fdf0}-\u{fffd}\u{10000}-\u{effff}_\-0-9\xb7\u{0300}-\u{036f}\u{203f}-\u{2040}])?$/u;
const RT_LANGUAGE_VALID = /^[a-z]+(-[a-z0-9]+)*$/;
const R_WS = /\s*/y;
const R_HWS = /[ \t]*/y;
const R_LANGTAG = /@([A-Za-z]+(?:-[A-Za-z0-9-]+)*)(?:\s+|(?=[.,;\])#]))/y;
const R_IRIREF = /<([^>]*)>\s*/y;
const F_REPLACE_STRLIT_CONTENTS = (s_, s_whitespace, s_auto, s_4, s_8, s_invalid) => {
if(s_whitespace) {
switch(s_whitespace) {
case 't': return '\t';
case 'n': return '\n';
case 'r': return '\r';
case 'f': return '\f';
case 'b': return '\b';
default: {
console.assert(`bad regex escape char mapping: '${s_whitespace}'`);
}
}
}
else if(s_auto) {
return s_auto;
}
else if(s_4) {
return String.fromCodePoint(parseInt(s_4, 16));
}
else if(s_8) {
return String.fromCodePoint(parseInt(s_8, 16));
}
else if(s_invalid) {
// pointless escape
if('\\' === s_invalid[0]) {
// // relaxed
// return s_invalid[1];
// if relaxed then return s_invalid, otherwise throw:
throw new Error(`expected string_literal but invalid escape sequence within contents: '${s_invalid}'. failed to parse a valid token`);
}
// bad character
else {
throw new Error(`expected string_literal but invalid whitespace character within contents: ${JSON.stringify(s_invalid)}. failed to parse a valid token`);
}
}
else {
console.assert(`unexpected no match branch in escape sequence replace callback`);
}
};
const R_STRLIT_SHORT_CONTENTS_ESCAPES_HARD = /(?:\\(?:([tnrfb])|([\\"'])|u([0-9A-Fa-f]{4})|U([0-9A-Fa-f]{8}))|([\r\n]|\\.))/g;
const R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT = /(?:\\(?:([tnrfb])|([\\"'])|u([0-9A-Fa-f]{4})|U([0-9A-Fa-f]{8}))|([\r\n]|\\[^uU]|\\u[^]{4}|\\U[^]{8}))/g;
const unescape_literal_short_hard = s_literal => s_literal
.replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_HARD, F_REPLACE_STRLIT_CONTENTS);
const unescape_literal_short_soft = (s_literal) => {
let m_incomplete = R_STRLIT_ESCAPE_INCOMPLETE.exec(s_literal);
// incomplete escape
if(m_incomplete) {
let i_safe = m_incomplete.index;
// rewind
return [
s_literal.slice(0, i_safe)
.replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT, F_REPLACE_STRLIT_CONTENTS),
s_literal.slice(i_safe),
];
}
// done
else {
return [
s_literal
.replace(R_STRLIT_SHORT_CONTENTS_ESCAPES_SOFT, F_REPLACE_STRLIT_CONTENTS),
'',
];
}
};
// lookbehind regexes
const [
R_STRLIT_ESCAPE_INCOMPLETE,
R_STRLIT_SHORT_DOUBLE_TERM,
] = (() => {
function RegExp_$lookbehind_polyfill(s_input) {
let m_match = RegExp.prototype.exec.call(this, s_input);
if(m_match) {
let i_start = m_match[0].length - m_match[1].length;
m_match.index += i_start;
m_match[0] = m_match[0].slice(i_start);
}
return m_match;
}
let mk_lookbehind_regex = (() => {
try {
new RegExp('(?<!h)i'); // eslint-disable-line no-new
}
catch(e_compile) {
return (f_lookbehind, r_polyfill, f_polyfill) => {
r_polyfill.exec = f_polyfill;
return r_polyfill;
};
}
return f_lookbehind => f_lookbehind();
})();
return [
// R_STRLIT_ESCAPE_INCOMPLETE
mk_lookbehind_regex(
() => new RegExp('(?<!(?:[^\\\\]|^)(?:\\\\\\\\)*\\\\)\\\\(|u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})$'),
/^(?:(?:[^\\]|\\.)*)(\\(?:|u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7}))$/,
function RegExp_$lookbehind_polyfill_n(s_input) {
let m_match = RegExp.prototype.exec.call(this, s_input);
if(m_match) {
m_match.index += m_match[0].length - m_match[1].length;
}
return m_match;
},
),
// R_STRLIT_SHORT_DOUBLE_TERM
mk_lookbehind_regex(
() => new RegExp('(?<!(?:[^\\\\]|^)(?:\\\\\\\\)*\\\\)"\\s*', 'g'),
/(?:[^\\"]|\\.)*("\s*)/y,
RegExp_$lookbehind_polyfill,
),
];
})();
const R_QUAD_ESCAPELESS_SP = /(?:<([^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^\\>]*)>[\x20\t]*(?:(?:(<[^\\>]*)>|_:([^\x20\t<]+))[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+|"([^"\\]*)(?:(")(?:\^\^<([^\\>]*)>|@([^\x20\t.]+)|)[\x20\t]*(?:<([^\\>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+)?)/y;
const R_QUAD = /(?:<([^>]*)>|_:([^\x20\t<]+))[\x20\t]*<([^>]*)>[\x20\t]*(?:(?:(<[^>]*)>|_:([^\x20\t<]+))[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+|"((?:[^"\\]|\\.)*)(?:(")(?:\^\^<([^>]*)>|@([^\x20\t.]+)|)[\x20\t]*(?:<([^>]*)>|_:([^\x20\t<]+)|)[\x20\t]*\.\s*(#[^\n]*\n\s*|\n\s*)+)?)/y;
const R_BLANK_NODE = /_:([^\x20\t<]+)/y;
class NQuads_Reader extends stream.Transform {
constructor(g_impls) {
super({
// do not decode strings into buffers
decodeStrings: false,
// accept strings as input on writable side
writableObjectMode: false,
// output quad objects on readable side
readableObjectMode: true,
// implementations
flush: g_impls.flush,
transform: g_impls.transform,
});
// when the writable side is piped into
this.on('pipe', (ds_input) => {
this._ds_input = ds_input;
// input stream has encoding option; ensure stream encoding is utf8
if('function' === typeof ds_input.setEncoding) {
ds_input.setEncoding('utf8');
}
});
}
// intercept pipe
pipe(ds_out) {
let ds_dst = ds_out;
// non-object mode
if(!ds_dst._writableState.objectMode) {
// transform to JSON
ds_out = stream.quads_to_json();
}
// yet object mode and graphy writable
else if(ds_out.isGraphyWritable) {
// transform to quad-stream
ds_out = stream.quads_to_writable();
}
// interim stream created
if(ds_out !== ds_dst) {
// forward output to super
super.pipe(ds_out);
// pipe outpu to destination
return ds_out.pipe(ds_dst);
}
// forward as-is to super
else {
return super.pipe(ds_dst);
}
}
}
class Reader {
constructor(g_config) {
let {
// input medium
input: g_input=null,
// relax validation
relax: b_relax=false,
// debug
debug: b_debug=false,
} = g_config;
// allow relative iris flag
let b_allow_relative_iris = g_config.allow_relative_iris || g_config.allowRelativeIRIs || g_config.allowRelativeIris || false;
// adopt factory
let dc_factory = this._dc_factory = factory.adopt(g_config.dataFactory || g_config.data_factory || factory.unfiltered);
let f_quad = this._f_quad = dc_factory.quad;
// fields
Object.assign(this, {
// string buffer, accept left-over string from previous data chunk
s: g_config.prepend || '',
// string buffer length
n: 0,
_b_debug: b_debug,
_b_relax: b_relax,
_b_destroyed: false,
_b_trim_start: true,
_f_state: this.statement,
_kt_subject: null,
_kt_predicate: null,
_kt_object: null,
_s_literal: '',
});
this._kt_default_graph = dc_factory.defaultGraph();
this._kt_rdfs_lang_string = dc_factory.namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#langString');
// clean regex
let r_clean = this._r_clean = R_CLEAN;
if(g_config.relaxed) {
console.warn((new Error(`no such option 'relaxed'; did you mean 'relax' ?`)).stack.replace(/^Error:/, 'Warning:'));
}
if('validate' in g_config) {
console.warn((new Error(`option 'validate' has been removed and validation is now on by default. Use 'relax' option if you wish to disable validation.`)).stack.replace(/^Error:/, 'Warning:'));
}
let namedNode = dc_factory.namedNode;
let blankNode = dc_factory.blankNode;
let languagedLiteral = dc_factory.languagedLiteral;
// test for valid named node
let rt_named_node_valid = b_allow_relative_iris? RT_NAMED_NODE_VALID: RT_ABSOLUTE_IRI_VALID;
// test for valid named node escapeless
let rt_named_node_valid_escapeless = b_allow_relative_iris? RT_NAMED_NODE_ESCAPELESS_VALID: RT_ABSOLUTE_IRI_ESCAPELESS_VALID;
// validation
let k_self = this;
Object.assign(this, !b_relax
? {
create_named_node(p_iri) {
if(!rt_named_node_valid.test(p_iri)) return k_self._error(`invalid IRI: "${p_iri}"`);
return namedNode(p_iri);
},
create_named_node_escapeless(p_iri) {
if(!rt_named_node_valid_escapeless.test(p_iri)) return k_self._error(`invalid IRI: "${p_iri}"`);
return namedNode(p_iri);
},
create_blank_node(s_label) {
if(!RT_BLANK_NODE_LABEL_VALID.test(s_label)) return k_self._error(`Invalid blank node label: "${s_label}"`);
return blankNode(s_label);
},
create_languaged_literal(s_contents, s_language) {
if(!RT_LANGUAGE_VALID.test(s_language)) {
return k_self._error(`Invalid literal language tag: ${s_language}`);
}
return languagedLiteral(s_contents, s_language);
},
}
: {
create_named_node: namedNode,
create_named_node_escapeless: namedNode,
create_blank_node: blankNode,
create_languaged_literal: languagedLiteral,
});
// transform stream
let ds_transform;
// whether or not data has been received before
let b_init = false;
// create transform
ds_transform = this.transform = new NQuads_Reader({
// on data event
transform: (s_chunk, s_encoding, fk_chunk) => {
// first transform
if(!b_init) {
// notify that data will begin
ds_transform.emit('ready');
// do not emit 'ready' event again
b_init = false;
}
// concatenate current chunk to previous chunk
let s = this.s += s_chunk;
// remove whitespace & comments from beginning
if(this._b_trim_start) {
r_clean.lastIndex = 0;
let m_clean = r_clean.exec(s);
if(this.emit_comments) {
this.emit_comments(m_clean[1]);
}
// update index and prepare to match statement
this.i = r_clean.lastIndex;
}
// do not remove whitespace; reset index
else {
this.i = 0;
}
// cache chunk length
this.n = s.length;
// resume parsing
try {
this.parse(true);
}
// read error occurred; emit and destroy stream
catch(e_read) {
return ds_transform.destroy(e_read);
}
// emit progress event updates
ds_transform.emit('progress', s_chunk.length);
// done transforming this chunk
fk_chunk();
},
// once there's no more data to consume, invoke eof
flush: (fk_flush) => {
// there is still unparsed data
if(this.s.length) {
// append newline to end so we can match token
this.s += '\n';
// remove whitespace & comments from beginning
if(this._b_trim_start) {
r_clean.lastIndex = 0;
let m_clean = r_clean.exec(this.s);
if(this.emit_comments) {
this.emit_comments(m_clean[1]);
}
// update index and prepare to match statement
this.i = r_clean.lastIndex;
}
// do not remove whitespace; reset index
else {
this.i = 0;
}
// parse
try {
this.parse();
}
// read error occurred; pass to flush errback and exit method
catch(e_read) {
// destroying during flush means overriding push
return ds_transform.demolish(e_read);
}
// still unparsed characters; pass to flush errback and exit method
if(this.s.length) {
return ds_transform.demolish(new Error(`parsing error occurred in state: statement\n ${this.s.substr(0, 50)}\n ^ starting here`));
}
}
// invalid state
if(this._f_state !== this.statement) {
return ds_transform.demolish(new Error(`parsing error occurred in state: ${this._f_state.name}\n ${this.s.substr(0, 50)}\n ^ starting here`));
}
// make buffer's alloc eligible for gc
this.s = null;
// final progress update: no additional bytes were read
ds_transform.emit('progress', 0);
// call end event listener
ds_transform.emit('eof');
// done flushing, close read stream
fk_flush();
},
});
// destroy
ds_transform._destroy = (...a_args) => {
this.destroy(...a_args);
};
// data quad
this._f_data_quad = (kt_subject, kt_predicate, kt_object, kt_graph) => ds_transform.push(f_quad(kt_subject, kt_predicate, kt_object, kt_graph));
// new listener added
ds_transform.on('newListener', (s_event) => {
// comment
if('comment' === s_event) {
r_clean = R_CLEAN_COMMENTS;
this.emit_comments = (s_captured) => {
if(!s_captured) return;
let a_comments = s_captured.slice(1).replace(/\n\s+$/, '').split(/\n+\s*#/g);
for(let s_comment of a_comments) {
ds_transform.emit('comment', s_comment);
}
};
}
});
// bind events to transform stream
this.bind(g_config);
// input given
if(g_input) {
// input is stream
if(g_input.stream) {
let ds_input = g_input.stream;
// go async so caller has chance to bind event listeners
queueMicrotask(() => {
ds_input.pipe(ds_transform);
});
}
// string
else if('string' === typeof g_input.string) {
let s_input = g_input.string;
// go async so caller has chance to bind event listeners
queueMicrotask(() => {
ds_transform.end(s_input, 'utf8');
});
}
// invalid arg
else {
throw new TypeError(`Invalid argument for input parameter: ${'object' === typeof g_input? JSON.stringify(g_input): g_input}`);
}
}
ds_transform._graphy_reader = this;
}
_error(s_message) {
this._b_destroyed = true;
throw new Error(s_message);
}
bind(g_config) {
let ds_transform = this.transform;
if(g_config.error) ds_transform.on('error', g_config.error);
if(g_config.comment) ds_transform.on('comment', g_config.comment);
if(g_config.read) ds_transform.once('read', g_config.read);
if(g_config.progress) ds_transform.on('progress', g_config.progress);
if(g_config.eof) ds_transform.once('eof', g_config.eof);
if(g_config.end) ds_transform.once('end', g_config.end);
if(g_config.finish) ds_transform.once('finish', g_config.finish);
if(g_config.data) ds_transform.on('data', g_config.data);
}
// begin parsing, keep applying until no more stack bail-outs
parse() {
let f_sync = this._f_state();
while('function' === typeof f_sync) {
f_sync = f_sync.apply(this);
}
}
statement() {
let s = this.s;
let n = this.n;
let i = this.i;
let f_data_quad = this._f_data_quad;
let create_named_node = this.create_named_node;
let create_named_node_escapeless = this.create_named_node_escapeless;
let create_languaged_literal = this.create_languaged_literal;
let create_blank_node = this.create_blank_node;
let simpleLiteral = this._dc_factory.simpleLiteral;
let datatypedLiteral = this._dc_factory.datatypedLiteral;
let kt_default_graph = this._kt_default_graph;
// match triples/quads
for(;;) {
// prepare sticky regex index
R_QUAD_ESCAPELESS_SP.lastIndex = i;
// execute regex
let m_statement_e_sp = R_QUAD_ESCAPELESS_SP.exec(s);
// regex was a match
if(m_statement_e_sp) {
// advance index
i = R_QUAD_ESCAPELESS_SP.lastIndex;
// prep object term
let kt_object;
// where to find the graph component
let b_graph_late = false;
// object term type is named node
if(m_statement_e_sp[4]) {
let p_object = m_statement_e_sp[4].slice(1);
kt_object = create_named_node_escapeless(p_object);
}
// object term type is blank node
else if(m_statement_e_sp[5]) {
kt_object = create_blank_node(m_statement_e_sp[5]);
}
// object term type is literal
else {
// graph is in late capture group
b_graph_late = true;
// contents
let s_contents = m_statement_e_sp[9];
// string terminator
if(m_statement_e_sp[10]) {
// datatype is present
if(m_statement_e_sp[11]) {
// create datatype term
let kt_datatype = this.create_named_node_escapeless(m_statement_e_sp[11]);
// create object term
kt_object = datatypedLiteral(s_contents, kt_datatype);
}
// language tag is present
else if(m_statement_e_sp[12]) {
// normalize language
let s_language = m_statement_e_sp[12].toLowerCase();
// create object term
kt_object = create_languaged_literal(s_contents, s_language);
}
// simple literal
else {
kt_object = simpleLiteral(s_contents);
}
}
// no string terminator
else {
// save contents
this._s_literal = s_contents;
// update index
this.i = i;
// save subject
{
let s_subject = m_statement_e_sp[1];
// named node
if(s_subject || 'string' === typeof s_subject) {
this._kt_subject = create_named_node_escapeless(s_subject);
}
// blank node
else {
this._kt_subject = create_blank_node(m_statement_e_sp[2]);
}
}
// save predicate
this._kt_predicate = create_named_node_escapeless(m_statement_e_sp[3]);
// parse contents
let z_bail = this.strlit_contents();
// bail out of stack
if(z_bail && this.statement !== z_bail) {
return z_bail;
}
// statement completed
else {
// clean
let r_clean = this._r_clean;
r_clean.lastIndex = this.i;
let m_clean = r_clean.exec(s);
if(this.emit_comments) {
this.emit_comments(m_clean[1]);
}
// update local index and prepare to match next statement
i = r_clean.lastIndex;
// resume
continue;
}
}
}
let kt_graph = kt_default_graph;
// graph after literal
if(b_graph_late) {
// ref capture group
let s_graph = m_statement_e_sp[13];
// named node
if(s_graph || 'string' === typeof s_graph) {
kt_graph = create_named_node_escapeless(s_graph);
}
// blank node
else if(m_statement_e_sp[14]) {
kt_graph = create_blank_node(m_statement_e_sp[14]);
}
}
// graph after node
else {
// ref capture group
let s_graph = m_statement_e_sp[6];
// named node
if(s_graph || 'string' === typeof s_graph) {
kt_graph = create_named_node_escapeless(s_graph);
}
// blank node
else if(m_statement_e_sp[7]) {
kt_graph = create_blank_node(m_statement_e_sp[7]);
}
}
let kt_subject;
{
let s_subject = m_statement_e_sp[1];
// named node
if(s_subject || 'string' === typeof s_subject) {
kt_subject = create_named_node_escapeless(s_subject);
}
// blank node
else {
kt_subject = create_blank_node(m_statement_e_sp[2]);
}
}
let s_predicate = m_statement_e_sp[3];
// emit data event
f_data_quad(
kt_subject,
create_named_node_escapeless(s_predicate),
kt_object,
kt_graph,
);
// comments
if(this.emit_comments) {
this.emit_comments(m_statement_e_sp[8] || m_statement_e_sp[15]);
}
}
else {
// prepare sticky regex index
R_QUAD.lastIndex = i;
// execute regex
let m_statement = R_QUAD.exec(s);
// regex was a match
if(m_statement) {
// advance index
i = R_QUAD.lastIndex;
// prep object term
let kt_object;
// where to find the graph component
let b_graph_late = false;
// object term type is named node
if(m_statement[4]) {
let p_object = m_statement[4].slice(1);
kt_object = create_named_node(RT_HAS_ESCAPES.test(p_object)? p_object.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): p_object);
}
// object term type is blank node
else if(m_statement[5]) {
kt_object = create_blank_node(RT_HAS_ESCAPES.test(m_statement[5])? m_statement[5].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): m_statement[5]);
}
// object term type is literal
else {
// graph is in late capture group
b_graph_late = true;
// contents
let s_contents = m_statement[9];
// string terminator
if(m_statement[10]) {
// unescape contents
s_contents = unescape_literal_short_hard(s_contents);
// datatype is present
if(m_statement[11]) {
// create datatype term
let kt_datatype = this.create_named_node(m_statement[11]);
// create object term
kt_object = datatypedLiteral(s_contents, kt_datatype);
}
// language tag is present
else if(m_statement[12]) {
// normalize language
let s_language = m_statement[12].toLowerCase();
// create object term
kt_object = create_languaged_literal(s_contents, s_language);
}
// simple literal
else {
kt_object = simpleLiteral(s_contents);
}
}
// no string terminator
else {
// save contents
this._s_literal = s_contents;
// update index
this.i = i;
// save subject
{
let s_subject = m_statement[1];
// named node
if(s_subject || 'string' === typeof s_subject) {
this._kt_subject = create_named_node(RT_HAS_ESCAPES.test(s_subject)? s_subject.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_subject);
}
// blank node
else {
this._kt_subject = create_blank_node(m_statement[2]);
}
}
// save predicate
this._kt_predicate = create_named_node(RT_HAS_ESCAPES.test(m_statement[3])? m_statement[3].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): m_statement[3]);
// parse contents
let z_bail = this.strlit_contents();
// bail out of stack
if(z_bail && this.statement !== z_bail) {
return z_bail;
}
// statement completed
else {
// clean
let r_clean = this._r_clean;
r_clean.lastIndex = this.i;
let m_clean = r_clean.exec(s);
if(this.emit_comments) {
this.emit_comments(m_clean[1]);
}
// update local index and prepare to match next statement
i = r_clean.lastIndex;
// resume
continue;
}
}
}
let kt_graph = kt_default_graph;
// graph after literal
if(b_graph_late) {
// ref capture group
let s_graph = m_statement[13];
// named node
if(s_graph || 'string' === typeof s_graph) {
kt_graph = create_named_node(RT_HAS_ESCAPES.test(s_graph)? s_graph.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_graph);
}
// blank node
else if(m_statement[14]) {
kt_graph = create_blank_node(m_statement[14]);
}
}
// graph after node
else {
// ref capture group
let s_graph = m_statement[6];
// named node
if(s_graph || 'string' === typeof s_graph) {
kt_graph = create_named_node(RT_HAS_ESCAPES.test(s_graph)? s_graph.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_graph);
}
// blank node
else if(m_statement[7]) {
kt_graph = create_blank_node(m_statement[7]);
}
}
let kt_subject;
{
let s_subject = m_statement[1];
// named node
if(s_subject || 'string' === typeof s_subject) {
kt_subject = create_named_node(RT_HAS_ESCAPES.test(s_subject)? s_subject.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_subject);
}
// blank node
else {
kt_subject = create_blank_node(m_statement[2]);
}
}
let s_predicate = m_statement[3];
// emit data event
f_data_quad(
kt_subject,
create_named_node(RT_HAS_ESCAPES.test(s_predicate)? s_predicate.replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY): s_predicate),
kt_object,
kt_graph,
);
// comments
if(this.emit_comments) {
this.emit_comments(m_statement[8] || m_statement[15]);
}
}
else {
// prepare sticky regex index
R_EOL.lastIndex = i;
if(R_EOL.exec(s)) {
// advance index
i = R_EOL.lastIndex;
this._error(`Failed to read statement:\n\`${s.substr(i, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`);
// match counter: 2
}
else {
// break loop to retry on next chunk if eos
break;
}
} // brace #2
} // brace #1
} // end of while
// update unparsed data string
this.s = s.substr(i);
// resume here
this._f_state = this.statement;
// exit
return 1;
}
strlit_contents() {
let {s, n, i} = this;
// try to find end
R_STRLIT_SHORT_DOUBLE_TERM.lastIndex = i;
let m_term = R_STRLIT_SHORT_DOUBLE_TERM.exec(s);
// end is in this chunk
if(m_term) {
// index of terminator
let i_term = m_term.index;
// extract dirty potion
let s_dirty = s.slice(i, i_term);
// clean and save
this._s_literal += unescape_literal_short_hard(s_dirty);
// advance index beyond terminator
this.i = i_term + m_term[0].length;
// resume eating whitespace at start of next chunk
this._b_trim_start = true;
// proceed with datatype_or_lang, then bail out of stack or resume parsing
return this.datatype_or_langtag() || this.statement;
}
// end is not in this chunk
else {
// extract whole portion
let s_dirty = s.slice(i);
// unescape to clean part
let [s_clean, s_incomplete] = unescape_literal_short_soft(s_dirty);
// save
this._s_literal += s_clean;
// set unparsed index
this.i = i = n - s_incomplete.length;
// do not eat whitespace at start of next chunk
this._b_trim_start = false;
}
// not yet eos
if(i < this.n) {
// expected token was not found
if(0 === i) {
// we've exceeded the maximum token length
if(this.n > this.max_token_length) {
return this.parse_error('strlit_contents');
}
}
}
// resume here
this._f_state = this.strlit_contents;
// store what is unparsed
this.s = s.slice(i);
// if we're not parsing a stream, then this is an error
if(this.eos) this.eos();
return 1;
}
// parse state for datatype_or_langtag
datatype_or_langtag() {
// destruct chunk, length, and index
let {s, n, i} = this;
// ref character
let x = s[i];
while(i < n) { // eslint-disable-line no-unmodified-loop-condition
// datatype
if('^' === x) {
// enough to speculate datatype
if((i+2) < n) {
// correct token
if('^' === s[i+1]) {
// advance index beyond token
R_IRIREF.lastIndex = i + 2;
// execute regex
let m_iriref = R_IRIREF.exec(s);
// regex was a match
if(m_iriref) {
// advance index
this.i = R_IRIREF.lastIndex;
// prepare iri
let p_datatype = m_iriref[1].replace(R_UNICODE_ANY, F_REPLACE_UNICODE_ANY);
// create datatype term
let kt_datatype = this.create_named_node(p_datatype);
// create object term
this._kt_object = this._dc_factory.datatypedLiteral(this._s_literal, kt_datatype);
// free literal string
this._s_literal = '';
// graph state
return this.post_object();
}
// failed to match; try again next chunk
else {
break;
}
}
// invalid
else {
this._error(`Failed to read token after literal:\n\`${s.substr(i+1, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`);
}
}
// not enough to speculate; try again next chunk
else {
break;
}
}
// language tag
else if('@' === x) {
// prepare sticky regex index
R_LANGTAG.lastIndex = i;
// execute regex
let m_langtag = R_LANGTAG.exec(s);
// regex was a match
if(m_langtag) {
// advance index
this.i = R_LANGTAG.lastIndex;
// use direct factory method since regex is validation
this._kt_object = this._dc_factory.languagedLiteral(this._s_literal, m_langtag[1]);
// free literal string
this._s_literal = '';
// graph state
return this.post_object();
}
// interrupted by eos; try again next chunk
else {
break;
}
}
// graph component
else if('<' === x || '_' === x) {
// save simple literal
this._kt_object = this._dc_factory.simpleLiteral(this._s_literal);
// free literal string
this._s_literal = '';
// continue parsing graph component
return this.graph();
}
// triple terminator
else if('.' === x) {
// save simple literal
let kt_object = this._dc_factory.simpleLiteral(this._s_literal);
// free literal string
this._s_literal = '';
// advance index beyond terminator
this.i = i + 1;
// emit data event
this._f_data_quad(this._kt_subject, this._kt_predicate, kt_object, this._kt_default_graph);
// reset state
return this.statement;
// // consume whitespace (and incidentally reset index)
// R_WS.lastIndex = i + 1;
// R_WS.exec(s);
// this.i = R_WS.lastIndex;
// // done
// return;
}
// other
else {
break;
}
}
// ran out of characters
// update index value
this.i = i;
// not yet eos
if(i < this.n) {
// expected token was not found
if(0 === i) {
// we've exceeded the maximum token length
if(this.n > this.max_token_length) {
return this.parse_error('datatype_or_langtag');
}
}
}
// resume here
this._f_state = this.datatype_or_langtag;
// store what is unparsed
this.s = s.slice(i);
// if we're not parsing a stream, then this is an error
if(this.eos) this.eos();
return 1;
}
statement_term() {
let {s, n, i} = this;
// find full stop
let i_stop = s.indexOf('.', i);
// found
if(i_stop > -1) {
// consume whitespace again
this._b_trim_start = true;
// advance beyond token
this.i = i_stop + 1;
// reset state
return this.statement;
}
// anything other than whitespace
else if(!/^\s*$/.test(s.slice(i))) {
this.parse_error('statement_term');
}
// do not consume whitespace
this._b_trim_start = false;
// resume here
this._f_state = this.statement_term;
// store what is unparsed
this.s = s.slice(i);
// if we're not parsing a stream, then this is an error
if(this.eos) this.eos();
return 1;
}
post_object() {
let {s, n, i} = this;
// eat horizontal whitespace
R_HWS.lastIndex = i;
R_HWS.exec(s);
i = R_HWS.lastIndex;
// ran out of characters
if(i >= n) {
// resume here
this._f_state = this.post_object;
// store what is unparsed
this.s = s.slice(i);
// if we're not parsing a stream, then this is an error
if(this.eos) this.eos();
return 1;
}
// depending on char
switch(s[i]) {
// statement term
case '.': {
// advance index beyond terminator
this.i = i + 1;
// emit data event
this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, this._kt_default_graph);
// reset state
return this.statement;
}
// graph
case '<':
case '_': {
// save index
this.i = i;
// consume graph component
return this.graph();
}
// invalid
default: {
// save index
this.i = i;
// emit parsing error
this.parse_error('post_object');
}
}
}
graph() {
let {s, n, i} = this;
// prepare sticky regex index
R_IRIREF.lastIndex = i;
// execute regex
let m_iriref = R_IRIREF.exec(s);
// regex was a match
if(m_iriref) {
// advance index
this.i = R_IRIREF.lastIndex;
// create graph term
let kt_graph = this.create_named_node(m_iriref[1]);
// emit data event
this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, kt_graph);
// complete with statement_term
return this.statement_term();
}
else {
// prepare sticky regex index
R_BLANK_NODE.lastIndex = i;
// execute regex
let m_blank = R_BLANK_NODE.exec(s);
// regex was a match
if(m_blank) {
// advance index
this.i = R_BLANK_NODE.lastIndex;
// create graph term
let kt_graph = this._dc_factory.blankNode(m_blank[1]);
// emit data event
this._f_data_quad(this._kt_subject, this._kt_predicate, this._kt_object, kt_graph);
// complete with statement_term
return this.statement_term();
}
} // brace #1
// resume here
this._f_state = this.graph;
// store what is unparsed
this.s = s.slice(i);
// if we're not parsing a stream, then this is an error
if(this.eos) this.eos();
return 1;
}
parse_error(s_state) {
return this._error(`Failed to read ${s_state}:\n\`${this.s.substr(this.i, 80).replace(/\n/g, '\u23CE')} [...]\`\n ^ starting here`);
}
destroy(e_destroy) {
this._f_data_quad = () => {};
if(!e_destroy && this._ds_input) {
this._ds_input.destroy(e_destroy);
}
this.transform.demolish(e_destroy);
}
}
module.exports = function(...a_args) {
let g_config = {};
// at least one argument
if(a_args.length) {
let z_arg_0 = a_args[0];
// input given unspecified
if(z_arg_0 && z_arg_0.input && 'undefined' === typeof z_arg_0.input.string && !z_arg_0.input.stream) {
z_arg_0 = z_arg_0.input;
}
// string
if('string' === typeof z_arg_0) {
g_config.input = {string:z_arg_0};
}
// null
else if(null === z_arg_0) {
g_config.input = null;
}
// node stream
else if('function' === typeof z_arg_0.setEncoding) {
g_config.input = {stream:z_arg_0};
}
// whatwg stream
else if('function' === typeof z_arg_0.pipeTo) {
throw new TypeError(`Sorry, WHATWG streams are currently not supported :(`);
// g_config.input = {stream:z_arg_0};
}
// config struct
else if(z_arg_0 && 'object' === typeof z_arg_0 && '[object Object]' === Object.prototype.toString.call(z_arg_0)) {
g_config = z_arg_0;
// more args; invalid
if(a_args.length > 1) {
throw new TypeError(`unexpected argument(s) after config struct: ${a_args.slice(1)}`);
}
}
// unknown
else {
throw new TypeError(`unexpected input type: ${z_arg_0}`);
}
// more args
if(a_args.length > 1) {
// copy onto struct
Object.assign(g_config, a_args[1]);
// more args
if(a_args.length > 2) {
throw new TypeError(`unexpected argument(s) after input and config struct: ${a_args.slice(2)}`);
}
}
}
// create reader, return transform stream
return (new Reader(g_config)).transform;
};