UNPKG

openrosa-xpath-evaluator

Version:

Wrapper for browsers' XPath evaluator with added support for OpenRosa extensions.

770 lines (731 loc) 28.8 kB
const { BlankDate, getTimezoneOffsetAsTime } = require('./date-extensions'); const { asGeopoints, area, distance } = require('./geo'); const digest = require('./digest'); const { randomToken } = require('./random-token'); const { DATE_STRING, dateStringToDays, dateToDays, isValidDate, } = require('./utils/date'); const shuffle = require('./utils/shuffle'); const { asBoolean, asNumber, asString } = require('./utils/xpath-cast'); const sortByDocumentOrder = require('./utils/sort-by-document-order'); const XPR = require('./xpr'); const RAW_NUMBER = /^-?[0-9]+(\.[0-9]+)?$/; // Operator constants copied from extended-xpath.js const EQ = 0b01000; const GTE = 0b01111; const PLUS = 0b10000; const MINUS = 0b10001; const openrosaXPathExtensions = function () { const TOO_MANY_ARGS = new Error('too many args'); const TOO_FEW_ARGS = new Error('too few args'); const _round = function (num) { if (num < 0) { return -Math.round(-num); } return Math.round(num); }; const formatDate = function (date, format) { date = asDate(date); format = asString(format); if (date.toString() === '' || Number.isNaN(Number(date))) return ''; let c; let i; let sb = ''; const year = 1900 + date.getYear(); const month = 1 + date.getMonth(); const day = date.getDate(); const hour = date.getHours(); const locale = globalThis.window?.enketoFormLocale; for (i = 0; i < format.length; ++i) { c = format.charAt(i); if (c === '%') { if (++i >= format.length) { throw new Error('date format string ends with %'); } c = format.charAt(i); if (c === '%') { // literal '%' sb += '%'; } else if (c === 'Y') { // 4-digit year sb += _zeroPad(year, 4); } else if (c === 'y') { // 2-digit year sb += _zeroPad(year, 4).substring(2); } else if (c === 'm') { // 0-padded month sb += _zeroPad(month, 2); } else if (c === 'n') { // numeric month sb += month; } else if (c === 'b') { // short text month sb += date.toLocaleDateString(locale, { month: 'short' }); } else if (c === 'd') { // 0-padded day of month sb += _zeroPad(day, 2); } else if (c === 'e') { // day of month sb += day; } else if (c === 'H') { // 0-padded hour (24-hr time) sb += _zeroPad(hour, 2); } else if (c === 'h') { // hour (24-hr time) sb += hour; } else if (c === 'M') { // 0-padded minute sb += _zeroPad(date.getMinutes(), 2); } else if (c === 'S') { // 0-padded second sb += _zeroPad(date.getSeconds(), 2); } else if (c === '3') { // 0-padded millisecond ticks (000-999) sb += _zeroPad(date.getMilliseconds(), 3); } else if (c === 'a') { // Three letter short text day sb += date.toLocaleDateString(locale, { weekday: 'short' }); } else if (c === 'Z' || c === 'A' || c === 'B') { throw new Error( `unsupported escape in date format string [%${c}]` ); } else { throw new Error( `unrecognized escape in date format string [%${c}]` ); } } else { sb += c; } } return sb; }; const ret = {}; const func = { abs(r) { return XPR.number(Math.abs(asNumber(r))); }, acos(r) { return XPR.number(Math.acos(asNumber(r))); }, asin(r) { return XPR.number(Math.asin(asNumber(r))); }, atan(r) { return XPR.number(Math.atan(asNumber(r))); }, atan2(r, ...rest) { if (rest.length > 0) { const y = asNumber(r); const x = asNumber(rest[0]); return XPR.number(Math.atan2(y, x)); } return XPR.number(Math.atan2(asNumber(r))); }, boolean(r) { if (arguments.length === 0) throw new Error('too few args'); if (arguments.length > 1) throw new Error('too few args'); return XPR.boolean(asBoolean(r)); }, 'boolean-from-string': function (r) { if (r.t === 'num' && r.v > 0 && !r.decimal) { return XPR.boolean(true); } r = asString(r); return XPR.boolean(r === '1' || r === 'true'); }, area(r) { if (arguments.length === 0) throw TOO_FEW_ARGS; return XPR.number(area(asGeopoints(r))); }, checklist(min, max, ...list) { min = asNumber(min); max = asNumber(max); const trues = mapFn(asBoolean, ...list).reduce( (acc, v) => (v ? acc + 1 : acc), 0 ); return XPR.boolean( (min < 0 || trues >= min) && (max < 0 || trues <= max) ); }, coalesce(a, b) { return XPR.string(asString(a) || asString(b)); }, concat(...args) { return XPR.string(mapFn(asString, ...args).join('')); }, cos(r) { return XPR.number(Math.cos(asNumber(r))); }, count(selecter) { // count() is part of XPath 1.0, but Chrome and Firefox disagree on how it should work. if (arguments.length === 0) throw new Error('too few args'); if (arguments.length > 1) throw new Error('too few args'); if (selecter.t !== 'arr') throw new Error(`Unpexpected arg type: '${selecter.t}'`); return XPR.number(selecter.v.length); }, 'count-non-empty': function (r) { if (!arguments.length === 0) throw new Error('too few args'); if (arguments.length > 1) throw new Error('too many args'); if (r.t !== 'arr') throw new Error(`wrong arg type:${JSON.stringify(r)}`); return XPR.number( mapFn(asString, r).reduce((acc, v) => (v ? acc + 1 : acc), 0) ); }, 'count-selected': function (s) { const parts = asString(s).split(' '); let i = parts.length; let count = 0; while (--i >= 0) if (parts[i].length) ++count; return XPR.number(count); }, date(it) { return XPR.date(asDate(it)); }, 'decimal-date-time': function (r) { if (arguments.length > 1) throw TOO_MANY_ARGS; const days = r.t === 'num' ? asNumber(r) : dateStringToDays(asString(r)); return XPR.number(days); }, 'decimal-time': function (r) { if (arguments.length > 1) throw TOO_MANY_ARGS; if (r.t === 'num') return XPR.number(NaN); const time = asString(r); // There is no Time type, and so far we don't need it so we do all validation // and conversion here, manually. const m = time.match( /^(\d\d):(\d\d):(\d\d)(\.\d\d?\d?)?(\+|-)(\d\d):(\d\d)$/ ); let dec; if ( m && m[1] < 24 && m[1] >= 0 && m[2] < 60 && m[2] >= 0 && m[3] < 60 && m[3] >= 0 && m[6] < 24 && m[6] >= 0 && // this could be tighter m[7] < 60 && m[7] >= 0 // this is probably either 0 or 30 ) { const today = new Date(); // use today to cater to daylight savings time. const d = new Date( `${today.getFullYear()}-${_zeroPad( today.getMonth() + 1 )}-${_zeroPad(today.getDate())}T${time}` ); if (d.toString() === 'Invalid Date') { dec = NaN; } else { dec = (d.getSeconds() / 3600 + d.getMinutes() / 60 + d.getHours()) / 24; } } else { dec = NaN; } return XPR.number(dec); }, digest(msg, algo, encoding) { return XPR.string(digest(msg, algo, encoding)); }, distance(...r) { if (arguments.length === 0) throw TOO_FEW_ARGS; return XPR.number(distance(asGeopoints(...r))); }, exp(r) { return XPR.number(Math.exp(asNumber(r))); }, exp10(r) { return XPR.number(10 ** asNumber(r)); }, false() { if (arguments.length) throw TOO_MANY_ARGS; return XPR.boolean(false); }, 'format-date': function (date, format) { if (arguments.length < 2) throw new Error('format-date() :: not enough args'); return XPR.string(formatDate(date, format)); }, if(con, a, b) { return asBoolean(con) ? a : b; }, 'ends-with': function (a, b) { if (arguments.length > 2) throw TOO_MANY_ARGS; if (arguments.length < 2) throw TOO_FEW_ARGS; return XPR.boolean(asString(a).endsWith(asString(b))); }, int(v) { return XPR.number(asInteger(v)); }, join(delim, ...args) { return XPR.string(mapFn(asString, ...args).join(asString(delim))); }, last() { if (arguments.length) throw new Error(`last() does not take arguments`); return XPR.number(this.contextSize); }, 'local-name': function (r) { // This is actually supported natively, but currently it's simpler to implement // ourselves than convert the supplied nodeset into a single node and pass this // somehow to the native implementation. // // See: https://www.w3.org/TR/1999/REC-xpath-19991116/#function-local-name const name = getNodeName(this, r); return XPR.string(name.match(/^(?:[^:]*:)?(.*)/)[1]); }, name(r) { // This is actually supported natively, but currently it's simpler to implement // ourselves than convert the supplied nodeset into a single node and pass this // somehow to the native implementation. // // See: https://www.w3.org/TR/1999/REC-xpath-19991116/#function-name return XPR.string(getNodeName(this, r)); }, log(r) { return XPR.number(Math.log(asNumber(r))); }, log10(r) { return XPR.number(Math.log10(asNumber(r))); }, max(...args) { const nums = mapFn(asNumber, ...args); if (!nums.length || nums.some((v) => Number.isNaN(Number(v)))) return XPR.number(NaN); return XPR.number(Math.max(...nums)); }, min(...args) { const nums = mapFn(asNumber, ...args); if (!nums.length || nums.some((v) => Number.isNaN(Number(v)))) return XPR.number(NaN); return XPR.number(Math.min(...nums)); }, 'namespace-uri': function (r) { // This is actually supported natively, but currently it's simpler to implement // ourselves than convert the supplied nodeset into a single node and pass this // somehow to the native implementation. // // See: https://www.w3.org/TR/1999/REC-xpath-19991116/#function-namespace-uri const node = getNode(this, r); return XPR.string((node && node.namespaceURI) || ''); }, 'normalize-space': function (r) { // TODO this seems to do a lot more than the spec at https://www.w3.org/TR/1999/REC-xpath-19991116/#function-normalize-space // I think we should just be able to return: XPR.string(asString(r || this.cN).replace(/[\t\r\n ]+/g, ' ').trim()); if (arguments.length > 1) throw new Error('too many args'); let res = asString(r || this.cN); res = res.replace(/\f/g, '\\f'); res = res.replace(/\r\v/g, '\v'); res = res.replace(/\v/g, '\\v'); res = res.replace(/\s+/g, ' '); res = res.replace(/^\s+|\s+$/g, ''); res = res.replace(/\\v/g, '\v'); res = res.replace(/\\f/g, '\f'); return XPR.string(res); }, /* * As per https://github.com/alxndrsn/openrosa-xpath-evaluator/issues/15, * the pass-through to the wrapped implementation always requests * XPathResult.STRING_TYPE. This seems to cause an issue with the response * from `not()` calls, which should ideally be handled by the built-in * XPath implementation. The following method is supplied as a workaround, * and ideally would be unnecessary. */ not(r) { if (arguments.length === 0) throw TOO_FEW_ARGS; if (arguments.length > 1) throw TOO_MANY_ARGS; return XPR.boolean(!asBoolean(r)); }, now() { return XPR.date(new Date()); }, number(r) { if (arguments.length > 1) throw new Error( `number() passed wrong arg count (expected 0 or 1, but got ${arguments.length})` ); const arg = arguments.length ? r : this.cN; const str = asString(arg); if (DATE_STRING.test(str)) { // TODO cleanup regex and splitting return XPR.number(dateStringToDays(str)); } return XPR.number(asNumber(arg)); }, today() { return XPR.date(ret._now()); }, /** * The once function returns the value of the parameter if its own value * is not empty, NaN, [Infinity or -Infinity]. The naming is therefore misleading! * Also note that the parameter expr is always evaluated. * This function simply decides whether to return the new result or the old value. */ once(r) { const current = asString(this.cN); return XPR.string(current || asString(r)); }, pi() { return XPR.number(Math.PI); }, position(r) { // N.B.: I suspect there is a bug here - this will return position within the parent node, rather than the evaluation context. // I suspect this is contrary to the spec, which reads: // > The position function returns a number equal to the context position from the expression evaluation context. // - https://www.w3.org/TR/1999/REC-xpath-19991116/#function-position // I'd have thought e.g. a union of all first-children in a doc would not all have position()=1 within that nodeset if (arguments.length > 1) throw new Error('too many args'); if (r && r.t !== 'arr') throw new Error( `wrong arg type for position() - expected nodeset, but got: ${r.t}` ); if (r && !r.v.length) throw new Error('cannot call position() on an empty nodeset'); if (!r) return XPR.number(this.contextPosition); let position = 1; let node = r.v[0]; const nodeName = node.tagName; while ( node.previousElementSibling && node.previousElementSibling.tagName === nodeName ) { node = node.previousElementSibling; position++; } return XPR.number(position); }, pow(x, y) { return XPR.number(asNumber(x) ** asNumber(y)); }, random() { return XPR.number(parseFloat(Math.random().toFixed(15))); }, randomize(r, seed) { if (!arguments.length) throw TOO_FEW_ARGS; // only rT passed if (arguments.length > 2) throw TOO_MANY_ARGS; if (!r || r.t !== 'arr') throw new Error('randomize() must be called on a nodeset'); seed = seed && asNumber(seed); return { t: 'arr', v: shuffle(r.v, seed) }; }, regex(haystack, pattern) { return XPR.boolean( new RegExp(asString(pattern)).test(asString(haystack)) ); }, round(number, numDigits) { if (arguments.length === 0) throw TOO_FEW_ARGS; if (arguments.length > 2) throw TOO_MANY_ARGS; number = asNumber(number); if (!numDigits) { return XPR.number(_round(number)); } numDigits = asInteger(numDigits); const pow = 10 ** Math.abs(numDigits); if (numDigits > 0) { return XPR.number(_round(number * pow) / pow); } return XPR.number(pow * _round(number / pow)); }, selected(haystack, needle) { return XPR.boolean( asString(haystack) .split(' ') .indexOf(asString(needle).trim()) !== -1 ); }, 'selected-at': function (list, index) { if (!index) throw new Error( `No index provided for selected-at() [index=${index}; list=${JSON.stringify( list )}` ); return XPR.string( asString(list).split(' ')[asInteger(index)] || '' ); }, sin(r) { return XPR.number(Math.sin(asNumber(r))); }, sqrt(r) { return XPR.number(Math.sqrt(asNumber(r))); }, string(r) { if (arguments.length > 1) throw new Error( `string() passed wrong arg count (expected 0 or 1, but got ${arguments.length})` ); return XPR.string(asString(r || this.cN)); }, // TODO this is not an extension - should be a "native" function 'string-length': function (r) { if (arguments.length > 1) throw new Error('too many args'); const str = asString(r || this.cN); // implemented as per https://www.w3.org/TR/1999/REC-xpath-19991116/#function-string-length, rather than the restricted ODK implementation return XPR.number(str.length); }, substr(s, startIndex, endIndex) { return XPR.string( asString(s).slice( asNumber(startIndex), endIndex && asNumber(endIndex) ) ); }, sum(r) { if (!r || r.t !== 'arr') throw new Error('sum() must be called on a nodeset'); let sum = 0; let i = r.v.length; while (i--) sum += asNumber(r.v[i]); return XPR.number(sum); }, tan(r) { return XPR.number(Math.tan(asNumber(r))); }, true() { if (arguments.length) throw TOO_MANY_ARGS; return XPR.boolean(true); }, uuid(r) { if (r) return XPR.string(randomToken(asNumber(r))); return XPR.string(uuid()); }, 'weighted-checklist': function (min, max, ...list) { min = asNumber(min); max = asNumber(max); let values = []; let weights = []; let weightedTrues = 0; for (let i = 0; i < list.length; i += 2) { const v = list[i]; const w = list[i + 1]; if (v && w) { // value or weight might be a nodeset values = values.concat(mapFn(asBoolean, v)); weights = weights.concat(mapFn(asNumber, w)); } } for (let i = 0; i < values.length; i++) { if (values[i]) { weightedTrues += weights[i] || 0; } } return XPR.boolean( (min < 0 || weightedTrues >= min) && (max < 0 || weightedTrues <= max) ); }, }; // function aliases func['date-time'] = func.date; func['format-date-time'] = func['format-date']; const process = { toExternalResult(r, resultType) { if (r.t === 'arr' && resultType === XPathResult.NUMBER_TYPE) { const str = asString(r); if (DATE_STRING.test(str)) { return { resultType, numberValue: dateStringToDays(str) }; } } if (r.t === 'date') { switch (resultType) { case XPathResult.BOOLEAN_TYPE: return { resultType, booleanValue: !Number.isNaN(Number(r.v)), }; case XPathResult.NUMBER_TYPE: return { resultType, numberValue: asNumber(r) }; case XPathResult.ANY_TYPE: case XPathResult.STRING_TYPE: return { resultType, stringValue: asString(r) }; default: throw new Error( `toExternalResult() doesn't know how to convert a date to ${resultType}` ); } } }, typefor(val) { if (val instanceof Date) return 'date'; }, handleInfix(err, lhs, op, rhs) { if (lhs.t === 'date' || rhs.t === 'date') { if (lhs.t === 'bool' || rhs.t === 'bool') { // date comparisons with booleans should be coerced to boolean return; } // For comparisons and math, we must make sure that both values are numbers if (lhs.t === 'arr' || lhs.t === 'str') lhs = XPR.date(asDate(lhs)); if (rhs.t === 'arr' || rhs.t === 'str') rhs = XPR.date(asDate(rhs)); if (lhs.t === 'date') lhs = { t: 'num', v: dateToDays(lhs.v) }; if (rhs.t === 'date') rhs = { t: 'num', v: dateToDays(rhs.v) }; return { t: 'continue', lhs, op, rhs }; } // try to coerce non-dates into dates :o if (op === PLUS || op === MINUS) { const lStr = asString(lhs); if (DATE_STRING.test(lStr)) { const lDays = dateStringToDays(lStr); const rDays = asNumber(rhs); const delta = op === PLUS ? lDays + rDays : lDays - rDays; return delta; } const rStr = asString(rhs); if (DATE_STRING.test(rStr)) { const rDays = dateStringToDays(rStr); const lDays = asNumber(lhs); const delta = op === PLUS ? lDays + rDays : lDays - rDays; return delta; } } else if (op >= EQ && op <= GTE) { const lStr = asString(lhs); if (DATE_STRING.test(lStr)) lhs = XPR.number(dateStringToDays(lStr)); const rStr = asString(rhs); if (DATE_STRING.test(rStr)) rhs = XPR.number(dateStringToDays(rStr)); return { t: 'continue', lhs, op, rhs }; } }, }; ret.func = func; ret.process = process; ret.XPR = XPR; ret._now = function () { // This is exposed in ret to allow for unit testing, although this is not currently utilised. const t = new Date(); return new Date(t.getFullYear(), t.getMonth(), t.getDate()); }; return ret; }; module.exports = openrosaXPathExtensions; function mapFn(fn, ...args) { const res = []; for (let i = 0; i < args.length; ++i) { if (args[i].t === 'arr') { for (let j = 0; j < args[i].v.length; ++j) { res.push(fn(args[i].v[j])); } } else res.push(fn(args[i])); } return res; } function asInteger(r) { const num = asNumber(r); return num > 0 ? Math.floor(num) : Math.ceil(num); } function asDate(r) { let temp; let timeComponent; switch (r.t) { case 'bool': return new Date(NaN); case 'date': return r.v; case 'num': temp = new Date(0); temp.setTime(temp.getTime() + r.v * 24 * 60 * 60 * 1000); return temp; case 'arr': case 'str': r = asString(r); if (r.length === 0) return new BlankDate(); if (RAW_NUMBER.test(r)) { temp = new Date(0); temp.setTime( temp.getTime() + parseInt(r, 10) * 24 * 60 * 60 * 1000 ); return temp; } if (DATE_STRING.test(r)) { temp = r.indexOf('T'); if (temp !== -1) { timeComponent = r.substring(temp); r = r.substring(0, temp); } temp = r.split('-'); if (isValidDate(temp[0], temp[1], temp[2])) { timeComponent = timeComponent || `T00:00:00.000${getTimezoneOffsetAsTime(new Date(r))}`; const time = `${_zeroPad(temp[0])}-${_zeroPad( temp[1] )}-${_zeroPad(temp[2])}${timeComponent}`; return new Date(time); } } return new Date(r); default: throw new Error(`asDate() can't handle ${r.t}s yet :-(`); } } function _zeroPad(n, len) { len = len || 2; n = n.toString(); while (n.length < len) n = `0${n}`; return n; } function getNodeName(ctx, r) { const node = getNode(ctx, r); return node ? node.nodeName : ''; } /** * If r is supplied, returns the first Element or Attribute in r by document order. * If r is not supplied, returns the ctx iff it is an Element or Attribute. */ function getNode(ctx, r) { if (arguments.length > 2) throw new Error('too many args'); if (!r) return isNodeish(ctx.cN) ? ctx.cN : null; if (r.t !== 'arr') throw new Error('wrong arg type'); if (!r.v.length) return; sortByDocumentOrder({ t: 'arr', v: r.v.filter(isNodeish) }); return r.v[0]; } /** * I can't decode what a QName is from the spec, but it seems like only the * following nodeTypes are considered by XPath: * @see https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType */ const NODEISH = { [Node.ELEMENT_NODE]: true, [Node.ATTRIBUTE_NODE]: true, [Node.PROCESSING_INSTRUCTION_NODE]: true, [Node.DOCUMENT_TYPE_NODE]: true, }; function isNodeish({ nodeType }) { return NODEISH[nodeType]; } /** * Implementation from https://stackoverflow.com/a/2117523, added in revision * https://stackoverflow.com/revisions/2117523/11, licensed under CC by SA 3.0 * (https://creativecommons.org/licenses/by-sa/3.0/), see * https://stackoverflow.com/posts/2117523/timeline. Formatting may have been * changed. */ /* eslint-disable no-bitwise */ function uuid() { return ([1e7] + -1e3 + -4e3 + -8e3 + -1e11).replace(/[018]/g, (c) => ( c ^ (crypto.getRandomValues(new Uint8Array(1))[0] & (15 >> (c / 4))) ).toString(16) ); } /* eslint-enable no-bitwise */