ayakashi
Version:
The next generation web scraping framework
82 lines (81 loc) • 3.05 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.attachCoreExtractors = void 0;
function attachCoreExtractors(ayakashiInstance) {
ayakashiInstance.registerExtractor("text", function () {
return {
extract: function (element) {
let data = null;
if (element.nodeType === 3) {
data = element.data.trim().replace(/[\s\n\r]+/g, " ");
}
else {
if (element.text && element.text.length > 0) {
data = element.text.trim().replace(/[\s\n\r]+/g, " ");
}
else if (element.textContent && element.textContent.length > 0) {
data = element.textContent.trim().replace(/[\s\n\r]+/g, " ");
}
}
return data;
},
isValid: function (result) {
return !!result;
},
useDefault: function () {
return "";
}
};
});
const integerExtractor = function () {
const self = this;
return {
extract: function (element) {
//@ts-ignore
const textExtractor = self.extractors.text();
let textResult = textExtractor.extract(element);
if (!textExtractor.isValid(textResult)) {
textResult = textExtractor.useDefault();
}
const match = textResult.match(/\d/g);
if (match) {
textResult = match.join("");
}
return parseInt(textResult);
},
isValid: function (result) {
return Number.isInteger(result);
},
useDefault: function () {
return 0;
}
};
};
ayakashiInstance.registerExtractor("integer", integerExtractor, ["text"]);
ayakashiInstance.registerExtractor("number", integerExtractor, ["text"]);
ayakashiInstance.registerExtractor("float", function () {
const self = this;
return {
extract: function (element) {
//@ts-ignore
const textExtractor = self.extractors.text();
let textResult = textExtractor.extract(element);
if (!textExtractor.isValid(textResult)) {
textResult = textExtractor.useDefault();
}
const match = textResult.match(/\d|,|\./g);
if (match) {
textResult = match.join("").replace(",", ".");
}
return parseFloat(textResult);
},
isValid: function (result) {
return Number.isInteger(parseInt(result.toString()));
},
useDefault: function () {
return 0;
}
};
}, ["text"]);
}
exports.attachCoreExtractors = attachCoreExtractors;