UNPKG

fontoxpath

Version:

A minimalistic XPath 3.1 engine in JavaScript

530 lines (481 loc) 17.2 kB
import createSelectorFromXPath from './parsing/createSelectorFromXPath'; import adaptJavaScriptValueToXPathValue from './selectors/adaptJavaScriptValueToXPathValue'; import DynamicContext from './selectors/DynamicContext'; import DomFacade from './DomFacade'; import domBackedDomFacade from './domBackedDomFacade'; import atomize from './selectors/dataTypes/atomize'; import castToType from './selectors/dataTypes/castToType'; import Sequence from './selectors/dataTypes/Sequence'; import isSubtypeOf from './selectors/dataTypes/isSubtypeOf'; import { DONE_TOKEN, ready, notReady } from './selectors/util/iterators'; function normalizeEndOfLines (xpathString) { // Replace all character sequences of 0xD followed by 0xA and all 0xD not followed by 0xA with 0xA. return xpathString.replace(/(\x0D\x0A)|(\x0D(?!\x0A))/g, String.fromCharCode(0xA)); } const DEFAULT_NAMESPACES = { 'xml': 'http://www.w3.org/XML/1998/namespace', 'xs': 'http://www.w3.org/2001/XMLSchema', 'fn': 'http://www.w3.org/2005/xpath-functions', 'map': 'http://www.w3.org/2005/xpath-functions/map', 'array': 'http://www.w3.org/2005/xpath-functions/array', 'math': 'http://www.w3.org/2005/xpath-functions/math' }; function transformMapToObject (map, dynamicContext) { const mapObj = {}; let i = 0; let done = false; let transformedValueIterator = null; return { next: () => { if (done) { return DONE_TOKEN; } while (i < map.keyValuePairs.length) { if (!transformedValueIterator) { const val = map.keyValuePairs[i].value .switchCases({ default: seq => seq, multiple: () => { throw new Error('Serialization error: The value of an entry in a map is expected to be a singleton sequence.'); } }) .tryGetFirst(); if (!val.ready) { return notReady(val.promise); } if (val.value === null) { mapObj[map.keyValuePairs[i].key.value] = null; i++; continue; } transformedValueIterator = transformXPathItemToJavascriptObject(val.value, dynamicContext); } const transformedValue = transformedValueIterator.next(); if (!transformedValue.ready) { return transformedValue; } transformedValueIterator = null; mapObj[map.keyValuePairs[i].key.value] = transformedValue.value; i++; } done = true; return ready(mapObj); } }; } function transformArrayToArray (array, dynamicContext) { const arr = []; let i = 0; let done = false; let transformedMemberGenerator = null; return { next: () => { if (done) { return DONE_TOKEN; } while (i < array.members.length) { if (!transformedMemberGenerator) { const val = array.members[i] .switchCases({ default: seq => seq, multiple: () => { throw new Error('Serialization error: The value of an entry in an array is expected to be a singleton sequence.'); } }) .tryGetFirst(); if (!val.ready) { return notReady(val.promise); } if (val.value === null) { arr[i++] = null; continue; } transformedMemberGenerator = transformXPathItemToJavascriptObject(val.value, dynamicContext); } const transformedValue = transformedMemberGenerator.next(); if (!transformedValue.ready) { return transformedValue; } transformedMemberGenerator = null; arr[i++] = transformedValue.value; } done = true; return ready(arr); } }; } function transformXPathItemToJavascriptObject (value, dynamicContext) { if (isSubtypeOf(value.type, 'map(*)')) { return transformMapToObject(value, dynamicContext); } if (isSubtypeOf(value.type, 'array(*)')) { return transformArrayToArray(value, dynamicContext); } return { next: () => ready(value.value) }; } /** * @param {Node|*} contextItem * @return {function(string):?string} */ function createDefaultNamespaceResolver (contextItem) { if (!contextItem || typeof contextItem !== 'object' || !('lookupNamespaceURI' in contextItem)) { return (_prefix) => null; } return prefix => (/** @type {Node} */(contextItem)).lookupNamespaceURI(prefix || null); } /** * @typedef {{ * namespaceResolver: ?function(string):string?, * nodesFactory: INodesFactory?, * language: string * }} */ let Options; /** * Evaluates an XPath on the given contextItem. * If the return type is ANY_TYPE, the returned value depends on the result of the XPath: * * If the XPath evaluates to the empty sequence, an empty array is returned. * * If the XPath evaluates to a singleton node, that node is returned. * * If the XPath evaluates to a singleton value, that value is atomized and returned. * * If the XPath evaluates to a sequence of nodes, those nodes are returned. * * Else, the sequence is atomized and returned. * * @param {!string} xpathSelector The selector to execute. Supports XPath 3.1. * @param {Node|*|null} contextItem The node from which to run the XPath. * @param {?IDomFacade=} domFacade The domFacade (or DomFacade like interface) for retrieving relations. * @param {?Object=} variables Extra variables (name=>value). Values can be number, string, boolean, nodes or object literals and arrays. * @param {?number=} returnType One of the return types, indicates the expected type of the XPath query. * @param {?Options=} options Extra options for evaluating this XPath * * @return {!Array<!Node>|Node|!Array<*>|*} */ function evaluateXPath (xpathSelector, contextItem, domFacade, variables = {}, returnType = evaluateXPath.ANY_TYPE, options = { namespaceResolver: null, nodesFactory: null, language: 'XPath3.1' }) { if (!xpathSelector || typeof xpathSelector !== 'string' ) { throw new TypeError('Failed to execute \'evaluateXPath\': xpathSelector must be a string.'); } if (!domFacade) { domFacade = domBackedDomFacade; } xpathSelector = normalizeEndOfLines(xpathSelector); // Always wrap in an actual domFacade const wrappedDomFacade = new DomFacade(domFacade); const compilationOptions = { allowXQuery: options.language === 'XQuery3.1' }; const compiledSelector = createSelectorFromXPath(xpathSelector, compilationOptions); const contextSequence = contextItem ? adaptJavaScriptValueToXPathValue(contextItem) : Sequence.empty(); const untypedVariables = Object.assign( { 'theBest': 'FontoXML is the best!' }, variables); const namespaceResolver = options['namespaceResolver'] || createDefaultNamespaceResolver(contextItem); /** * @type {!Object} */ const typedVariables = Object.keys(untypedVariables) .reduce(function (typedVariables, variableName) { if (untypedVariables[variableName] === undefined) { return typedVariables; } typedVariables[variableName] = () => adaptJavaScriptValueToXPathValue(untypedVariables[variableName]); return typedVariables; }, Object.create(null)); /** * @type {INodesFactory} */ let nodesFactory = options['nodesFactory']; if (!nodesFactory) { if (contextItem && 'nodeType' in contextItem) { const ownerDocument = contextItem.ownerDocument || contextItem; nodesFactory = { createElementNS: ownerDocument.createElementNS.bind(ownerDocument), createTextNode: ownerDocument.createTextNode.bind(ownerDocument), createComment: ownerDocument.createComment.bind(ownerDocument), createProcessingInstruction: ownerDocument.createProcessingInstruction.bind(ownerDocument) }; } else { nodesFactory = { createElementNS: () => { throw new Error('Please pass a node factory if an XQuery script uses node constructors'); }, createTextNode: () => { throw new Error('Please pass a node factory if an XQuery script uses node constructors'); }, createComment: () => { throw new Error('Please pass a node factory if an XQuery script uses node constructors'); }, createProcessingInstruction: () => { throw new Error('Please pass a node factory if an XQuery script uses node constructors'); } }; } } /** * @type {!DynamicContext} */ const dynamicContext = new DynamicContext({ contextItemIndex: 0, contextSequence: contextSequence, contextItem: contextSequence.first(), domFacade: wrappedDomFacade, variables: typedVariables, resolveNamespacePrefix: prefix => { if (DEFAULT_NAMESPACES[prefix]) { return DEFAULT_NAMESPACES[prefix]; } return namespaceResolver(prefix); }, // propagate the compiler here createSelectorFromXPath: createSelectorFromXPath, nodesFactory: nodesFactory }); /** * @type {!./selectors/dataTypes/Sequence} */ const rawResults = compiledSelector.evaluateMaybeStatically(dynamicContext); switch (returnType) { case evaluateXPath.BOOLEAN_TYPE: { const ebv = rawResults.tryGetEffectiveBooleanValue(); if (!ebv.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } return ebv.value; } case evaluateXPath.STRING_TYPE: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (!allValues.value.length) { return ''; } // Atomize to convert (attribute)nodes to be strings return allValues.value.map(value => castToType(atomize(value, dynamicContext), 'xs:string').value).join(' '); } case evaluateXPath.STRINGS_TYPE: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (!allValues.value.length) { return []; } // Atomize all parts return allValues.value.map(function (value) { return atomize(value, dynamicContext).value + ''; }); } case evaluateXPath.NUMBER_TYPE: { const first = rawResults.tryGetFirst(); if (!first.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (!first.value) { return NaN; } if (!isSubtypeOf(first.value.type, 'xs:numeric')) { return NaN; } return first.value.value; } case evaluateXPath.FIRST_NODE_TYPE: { const first = rawResults.tryGetFirst(); if (!first.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (!first.value) { return null; } if (!(isSubtypeOf(first.value.type, 'node()'))) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to Node. Got ' + rawResults.value[0]); } if (isSubtypeOf(first.value.type, 'attribute()')) { throw new Error('XPath can not resolve to attribute nodes'); } return first.value.value; } case evaluateXPath.NODES_TYPE: { const allResults = rawResults.tryGetAllValues(); if (!allResults.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (!allResults.value.every(function (value) { return isSubtypeOf(value.type, 'node()'); })) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to a sequence of Nodes.'); } if (allResults.value.some(function (value) { return isSubtypeOf(value.type, 'attribute()'); })) { throw new Error('XPath ' + xpathSelector + ' should not resolve to attribute nodes'); } return allResults.value.map(function (nodeValue) { return nodeValue.value; }); } case evaluateXPath.MAP_TYPE: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (allValues.value.length !== 1) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to a single map.'); } const first = allValues.value[0]; if (!(isSubtypeOf(first.type, 'map(*)'))) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to a map'); } const transformedMap = transformMapToObject(first, dynamicContext).next(); if (!transformedMap.ready) { throw new Error('Expected XPath ' + xpathSelector + ' to synchronously resolve to a map'); } return transformedMap.value; } case evaluateXPath.ARRAY_TYPE: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } if (allValues.value.length !== 1) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to a single array.'); } const first = allValues.value[0]; if (!(isSubtypeOf(first.type, 'array(*)'))) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to an array'); } const transformedArray = transformArrayToArray(first, dynamicContext).next(); if (!transformedArray.ready) { throw new Error('Expected XPath ' + xpathSelector + ' to synchronously resolve to a map'); } return transformedArray.value; } case evaluateXPath.NUMBERS_TYPE: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error(`The XPath ${xpathSelector} can not be resolved synchronously.`); } return allValues.value.map(function (value) { if (!isSubtypeOf(value.type, 'xs:numeric')) { throw new Error('Expected XPath ' + xpathSelector + ' to resolve to numbers'); } return value.value; }); } case evaluateXPath.ASYNC_ITERATOR_TYPE: { /** * @type {./selectors/util/iterators.AsyncIterator} */ const it = rawResults.value(); let transformedValueGenerator = null; let done = false; function getNextResult () { while (!done) { if (!transformedValueGenerator) { const value = it.next(); if (value.done) { done = true; break; } if (!value.ready) { return value.promise.then(getNextResult); } transformedValueGenerator = transformXPathItemToJavascriptObject(value.value, dynamicContext); } const transformedValue = transformedValueGenerator.next(); if (!transformedValue.ready) { return transformedValue.promise.then(getNextResult); } transformedValueGenerator = null; return transformedValue; } return { done: true }; } if ('asyncIterator' in Symbol) { return { [(/** @type {{asyncIterator:*}} */ (Symbol)).asyncIterator]: function () { return this; }, next: () => new Promise(resolve => resolve(getNextResult())) }; } return { next: () => new Promise(resolve => resolve(getNextResult())) }; } default: { const allValues = rawResults.tryGetAllValues(); if (!allValues.ready) { throw new Error('The XPath ' + xpathSelector + ' can not be resolved synchronously.'); } const allValuesAreNodes = allValues.value.every(function (value) { return isSubtypeOf(value.type, 'node()') && !(isSubtypeOf(value.type, 'attribute()')); }); if (allValuesAreNodes) { if (allValues.value.length === 1) { return allValues.value[0].value; } return allValues.value.map(function (nodeValue) { return nodeValue.value; }); } if (allValues.value.length === 1) { return atomize(allValues.value[0], dynamicContext).value; } return rawResults.atomize(dynamicContext).getAllValues().map(function (atomizedValue) { return atomizedValue.value; }); } } } /** * Returns the result of the query, can be anything depending on the query */ evaluateXPath['ANY_TYPE'] = evaluateXPath.ANY_TYPE = 0; /** * Resolve to a number, like count((1,2,3)) resolves to 3. */ evaluateXPath['NUMBER_TYPE'] = evaluateXPath.NUMBER_TYPE = 1; /** * Resolve to a string, like //someElement[1] resolves to the text content of the first someElement */ evaluateXPath['STRING_TYPE'] = evaluateXPath.STRING_TYPE = 2; /** * Resolves to true or false, uses the effective boolean value to determin result. count(1) resolves to true, count(()) resolves to false */ evaluateXPath['BOOLEAN_TYPE'] = evaluateXPath.BOOLEAN_TYPE = 3; /** * Resolve to all nodes the XPath resolves to. Returns nodes in the order the XPath would. Meaning (//a, //b) resolves to all A nodes, followed by all B nodes. //*[self::a or self::b] resolves to A and B nodes in document order. */ evaluateXPath['NODES_TYPE'] = evaluateXPath.NODES_TYPE = 7; /** * Resolves to the first node.NODES_TYPE would have resolved to. */ evaluateXPath['FIRST_NODE_TYPE'] = evaluateXPath.FIRST_NODE_TYPE = 9; /** * Resolve to an array of strings */ evaluateXPath['STRINGS_TYPE'] = evaluateXPath.STRINGS_TYPE = 10; /** * Resolve to an object, as a map */ evaluateXPath['MAP_TYPE'] = evaluateXPath.MAP_TYPE = 11; evaluateXPath['ARRAY_TYPE'] = evaluateXPath.ARRAY_TYPE = 12; evaluateXPath['ASYNC_ITERATOR_TYPE'] = evaluateXPath.ASYNC_ITERATOR_TYPE = 99; /** * Resolve to an array of numbers */ evaluateXPath['NUMBERS_TYPE'] = evaluateXPath.NUMBERS_TYPE = 13; /** * Can be used to signal an XQuery program should be executed instead of an XPath */ evaluateXPath['XQUERY_3_1_LANGUAGE'] = evaluateXPath.XQUERY_3_1_LANGUAGE = 'XQuery3.1'; /** * Can be used to signal an XPath program should executed */ evaluateXPath['XPATH_3_1_LANGUAGE'] = evaluateXPath.XPATH_3_1_LANGUAGE = 'XPath3.1'; export default evaluateXPath;