@remotemerge/xpath-parser
Version:
JavaScript utility for extracting data from HTML and XML documents!
126 lines (124 loc) • 4.83 kB
JavaScript
class XPathParser {
// init options
options = {
queryFirst: false
};
// init DOM node
domContent;
/**
* Initialize Node from DOM Node or HTML string.
*
* @param {Node|string} content - A DOM Node or an HTML string.
*/
constructor(content) {
this.domContent = content instanceof Node ? content : new DOMParser().parseFromString(content, "text/html");
}
/**
* Evaluate an XPath expression in the specified Node and return the result.
*
* @param {string} expression - The XPath expression to evaluate.
* @return {XPathResult} The result of evaluating the XPath expression.
*/
evaluate(expression) {
const resultType = this.options.queryFirst ? XPathResult.FIRST_ORDERED_NODE_TYPE : XPathResult.ORDERED_NODE_ITERATOR_TYPE;
return document.evaluate(expression, this.domContent, null, resultType, null);
}
/**
* Extract the text value from a given DOM Node.
*
* @param {Node|null} node - The DOM Node to extract the text value from.
* @return {string} The text value of the DOM Node.
*/
getValue(node) {
return (node instanceof Attr ? node.value : node instanceof HTMLElement ? node.textContent : "")?.trim() || "";
}
/**
* Evaluate the expression and return the first matching result.
*
* @param {string} expression - The XPath expression to evaluate.
* @return {string} The first matching result of evaluating the XPath expression.
*/
queryFirst(expression) {
this.options.queryFirst = true;
return this.getValue(this.evaluate(expression).singleNodeValue);
}
/**
* Evaluate the expression and return all matching results.
*
* @param {string} expression - The XPath expression to evaluate.
* @return {string[]} An array of all the matching results of evaluating the XPath expression.
*/
queryList(expression) {
const response = [];
const evaluate = this.evaluate(expression);
let node;
while (node = evaluate.iterateNext()) {
response.push(this.getValue(node));
}
return response;
}
/**
* Evaluate the expressions and return the matching result in the associative format.
*
* @param {Expression[]} expressions - An object with XPath expressions as values.
* @return {Record<string, string>} An object with the results of evaluating the XPath expressions as key-value pairs.
*/
multiQuery(expressions) {
const response = {};
Object.keys(expressions).forEach((key) => {
response[key] = this.queryFirst(expressions[key]);
});
return response;
}
/**
* This method selects all matching parent nodes and runs sub queries on child nodes and generate an associative array result.
*
* @param {Expression} expression - An object that specifies the XPath expressions to use for the root node, child nodes, and pagination.
* @return {Record<string, string>[]} An array of objects with the results of evaluating the XPath expressions as key-value pairs, and optionally, a pagination URL.
*/
subQuery(expression) {
const rootDom = this.evaluate(expression.root);
const results = [];
let nodeDom = null;
while (nodeDom = rootDom.iterateNext()) {
this.domContent = nodeDom;
const record = {};
Object.entries(expression.queries).forEach(([key, value]) => {
record[key] = this.queryFirst(value);
});
results.push(record);
}
const response = { results };
if (expression.pagination) {
response.paginationUrl = this.queryFirst(expression.pagination);
}
return response;
}
/**
* This method tries to match a given expression every second up to a maximum number of seconds.
*
* @param {string} expression - The XPath expression to match.
* @param {number} maxSeconds - The maximum number of seconds to keep trying. Default is 10.
* @return {Promise<{ found: boolean; message: string }>} A Promise that resolves with an object containing a boolean indicating whether the expression was found, and a message with the first match if it was found, or rejects with a TimeoutError if the expression was not found within the maximum number of seconds.
*/
async waitXPath(expression, maxSeconds = 10) {
let timer = 1;
return new Promise((resolve, reject) => {
const refreshId = setInterval(() => {
const firstMatch = this.queryFirst(expression);
if (firstMatch) {
clearInterval(refreshId);
resolve({ found: true, message: firstMatch });
}
if (timer++ >= maxSeconds) {
clearInterval(refreshId);
const error = new Error(`Timeout! Max ${maxSeconds} seconds are allowed.`);
error.name = "TimeoutError";
reject(error);
}
}, 1e3);
});
}
}
export { XPathParser as default };
//# sourceMappingURL=index.es.js.map