UNPKG

xml2js-xpath

Version:

Search xml2js JSON documents with XPath query strings.

308 lines (286 loc) 8.1 kB
let _ = require("lodash"); const ATTRKEY = '$' const CHARKEY = '_' // Definition of an XML name tag: https://www.w3.org/TR/xml/#NT-Name const NAME_START_CHAR = 'A-Za-z:_\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D' + '\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF' + '\uF900-\uFDCF\uFDF0-\uFFFD' const NAME_CHAR = `${NAME_START_CHAR}.0-9·\u0300-\u036F\u203F-\u2040-` const TAG_NAME = `[${NAME_START_CHAR}][${NAME_CHAR}]*` const TAG_AND_PROP = `(${TAG_NAME})\\[@([\\w:]+)='([^']+)'\\]` const TAG_AND_CHILD = `(${TAG_NAME})\\[([\\w:]+)='([^']+)'\\]` const TAG_AND_POSITION = `(${TAG_NAME})\\[([\\d]+)\\]` // Given a JSON document returned by xml2js (with _ and $ keys), return the // combined text value of the tags. let jsonText = function(json) { if (_.isString(json)) { return json; } let result = json[CHARKEY] || ''; _.forEach(_.keys(json),function(key) { if (key === ATTRKEY || key === CHARKEY) { return; } let value = json[key]; if (_.isArray(value)) { _.forEach(value,function(entry) { result += jsonText(entry); }); } else { result += jsonText(value); } }); return result; }; let findAllKeys = function(json, key, matches) { if (_.isString(json)) { return matches; } _.forEach(_.keys(json),function(jsonKey) { if (jsonKey === ATTRKEY || jsonKey === CHARKEY) { return; } if (key === jsonKey && key in json) { if (!_.isArray(json[jsonKey])) { matches.push(json[jsonKey]); } else { _.forEach(json[jsonKey], function(val) { matches.push(val); }); } } else { findAllKeys(json[jsonKey], key, matches); } }); return matches; }; let findAllProperties = function(json, property, matches) { let foundMatch = false; _.forEach(_.keys(json.$),function(jsonProperty) { if (property === jsonProperty && property in json.$) { matches.push(json); foundMatch = true; } }); if (!foundMatch) { _.forEach(_.keys(json),function(jsonKey) { if (jsonKey === ATTRKEY || jsonKey === CHARKEY) { return; } findAllProperties(json[jsonKey], property, matches); }); } return matches; }; // Perform simple search of a xml2js document with XPath. // // This function supports simple XPath queries like: // * //Element/SubElement // * /Element/Subelement/SubSubElement // * //Element[@id='4']/SubElement // * //Element[@id='4']/@property // // Returns an array of matches. let find = function(json, path) { if (path === "") { if (!_.isArray(json)) { return [json]; } return json; } if (path.length > 0 && path[0] === ".") { path = path.substring(1); } // match /Element[@key='value'] let match = path.match(`^\\/${TAG_AND_PROP}`); if (match) { let node = match[1]; let key = match[2]; let value = match[3]; let newPath = path.replace(`/${node}[@${key}='${value}']`, ""); if (node in json) { if (ATTRKEY in json[node]) { if (key in json[node][ATTRKEY] && json[node][ATTRKEY][key] === value) { return find(json[node], newPath); } } else if (_.isArray(json[node])) { let results = [] _.forEach(json[node], function (item) { if (ATTRKEY in item && key in item[ATTRKEY] && item[ATTRKEY][key] === value) { let hits = find(item, newPath); results = results.concat(hits); } }) return results; } } } // match //Element[@key='value'] match = path.match(`^\\/\\/${TAG_AND_PROP}`); if (match) { // see if the current dictionary has a match, for all that do not match, see // if their values have matches, etc... let node = match[1]; let key = match[2]; let value = match[3]; let newPath = path.replace(`//${node}[@${key}='${value}']`, ""); let foundKeys = findAllKeys(json, node, []); let matches = _.filter(foundKeys, function(val) { if (ATTRKEY in val) { return key in val[ATTRKEY] && val[ATTRKEY][key] === value; } return false; }); let results = []; _.forEach(matches, function(item) { let hits = find(item, newPath); results = results.concat(hits); }); return results; } // match /Element[SubElement1='value'] match = path.match(`^\\/${TAG_AND_CHILD}`); if (match) { let node = match[1]; let key = match[2]; let value = match[3]; let newPath = path.replace(`/${node}[${key}='${value}']`, ""); if (node in json) { if (key in json[node]) { if (json[node][key] === value) { return find(json[node], newPath); } } else if (_.isArray(json[node])) { let results = [] _.forEach(json[node], function (item) { if (jsonText(item[key]) === value) { let hits = find(item, newPath); results = results.concat(hits); } }) return results; } } } // match //Element match = extractAllKeys(json, path, new RegExp(`^//(${TAG_NAME})`)); if (match) { return match; } // match intermediate /Element/ match = path.match(`^\\/(${TAG_NAME})\\/`); if (match) { const node = match[1] if (_.isArray(json[node])) { let results = [] json[node].forEach((sub) => results = results.concat(find(sub, path.replace(`/${node}/`, "/")))) return results } else { return find(json[node], path.replace(`/${node}/`, "/")) } } // match /Element[\d+] (with numeric positional predicate) match = path.match(`^\\/${TAG_AND_POSITION}`); if (match) { const node = match[1] const position = +match[2] // xpath is one-based indexed, js-array is zero-based! let newPath = path.replace(`/${node}[${position}]`, ""); if (_.has(json, node)) { if (_.isArray(json[node])) { return json[node][position - 1] ? find(json[node][position - 1], newPath) : [] } } } // match leaf /Element match = path.match(`^\\/(${TAG_NAME})$`); if (match) { const node = match[1] if (_.has(json, node)) { if (_.isArray(json[node])) { return json[node] } else { return [json[node]] } } } // match //@property match = extractAllProperties(json, path, /^\/\/@([\w:]+)/); if (match) { return match; } // match /@property match = path.match(/^\/@([\w:]+)/); if (match) { let matches = []; _.forEach(_.keys(json),function(key) { if (_.isArray(json[key])) { _.forEach(json[key], function(sub) { if (_.has(sub,ATTRKEY) && match[1] in sub[ATTRKEY]) { matches.push(sub); } }); } else { if (_.has(json,ATTRKEY) && match[1] in json[ATTRKEY]) { matches.push(json); } } }); return matches; } return []; }; let extractAll = function(json, path, pattern, extractFn) { let match = path.match(pattern); if (match) { // see if the current dictionary has a match, for all that do not match, see // if their values have matches, etc... let newPath = path.replace(pattern, ''); let matches = extractFn(json, match[1], []); let results = []; _.forEach(matches, function(value) { let matches = find(value, newPath); results = results.concat(matches); }); return results; } return false; }; let extractAllKeys = _.partialRight(extractAll,findAllKeys); let extractAllProperties = _.partialRight(extractAll,findAllProperties); // Use find to search a JSON object. // // Parameters: // * json = the JSON document to search (genearted by xml2js) // * path = An XPath search. // * fetch = If true, run jsonText() on the output. If a string, try to return // the tag property. If not supplied, return the node. // // Returns a node (one with no attributes and a null value if no match) // If more than one match is found, the first is returned. let evalFirst = function(json, path, fetch) { let matches = find(json, path); if (matches.length === 0) { if (fetch) { return undefined; } return undefined; } // searches like //Somehting/SomethingElse return nested arrays matches = matches[0]; if (_.isArray(matches)) { matches = matches[0]; } if (fetch === true) { return jsonText(matches); } if (_.isString(fetch)) { if (ATTRKEY in matches && fetch in matches[ATTRKEY]) { return matches[ATTRKEY][fetch]; } return undefined; } return matches; }; module.exports.evalFirst = evalFirst; module.exports.find = find; module.exports.jsonText = jsonText;