UNPKG

ayakashi

Version:

The next generation web scraping framework

82 lines (81 loc) 3.05 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.attachCoreExtractors = void 0; function attachCoreExtractors(ayakashiInstance) { ayakashiInstance.registerExtractor("text", function () { return { extract: function (element) { let data = null; if (element.nodeType === 3) { data = element.data.trim().replace(/[\s\n\r]+/g, " "); } else { if (element.text && element.text.length > 0) { data = element.text.trim().replace(/[\s\n\r]+/g, " "); } else if (element.textContent && element.textContent.length > 0) { data = element.textContent.trim().replace(/[\s\n\r]+/g, " "); } } return data; }, isValid: function (result) { return !!result; }, useDefault: function () { return ""; } }; }); const integerExtractor = function () { const self = this; return { extract: function (element) { //@ts-ignore const textExtractor = self.extractors.text(); let textResult = textExtractor.extract(element); if (!textExtractor.isValid(textResult)) { textResult = textExtractor.useDefault(); } const match = textResult.match(/\d/g); if (match) { textResult = match.join(""); } return parseInt(textResult); }, isValid: function (result) { return Number.isInteger(result); }, useDefault: function () { return 0; } }; }; ayakashiInstance.registerExtractor("integer", integerExtractor, ["text"]); ayakashiInstance.registerExtractor("number", integerExtractor, ["text"]); ayakashiInstance.registerExtractor("float", function () { const self = this; return { extract: function (element) { //@ts-ignore const textExtractor = self.extractors.text(); let textResult = textExtractor.extract(element); if (!textExtractor.isValid(textResult)) { textResult = textExtractor.useDefault(); } const match = textResult.match(/\d|,|\./g); if (match) { textResult = match.join("").replace(",", "."); } return parseFloat(textResult); }, isValid: function (result) { return Number.isInteger(parseInt(result.toString())); }, useDefault: function () { return 0; } }; }, ["text"]); } exports.attachCoreExtractors = attachCoreExtractors;