UNPKG

site-metadata-extractor

Version:
55 lines 6.41 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const cache = {}; const candiateWords = (strippedInput) => { return strippedInput.split(" "); }; const getFilePath = (lang) => { return path_1.default.resolve(__dirname, `../data/stopwords/stopwords-${lang}.txt`); }; const removePunctuation = (content) => { return content.replace(/[|@<>[\]"'.,-/#?!$%^&*+;:{}=\-_`~()]/g, ""); }; const stopwords = (content, lang = "en") => { let filePath = getFilePath(lang); let stopWords; if (!fs_1.default.existsSync(filePath)) { console.warn(`WARNING: No stopwords file found for '${lang}' - defaulting to English!`); filePath = getFilePath("en"); } if (Object.prototype.hasOwnProperty.call(cache, lang)) { stopWords = cache[lang]; } else { stopWords = fs_1.default .readFileSync(filePath) .toString() .split("\n") .filter((str) => { return str.length > 0; }); cache[lang] = stopWords; } const strippedInput = removePunctuation(content); const words = candiateWords(strippedInput); const overlappingStopwords = []; let count = 0; words.forEach((word) => { count++; if (stopWords.indexOf(word.toLowerCase()) > -1) { overlappingStopwords.push(word.toLowerCase()); } }); return { stopWordCount: overlappingStopwords.length, stopWords: overlappingStopwords, wordCount: count, }; }; exports.default = stopwords; //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic3RvcHdvcmRzLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vc3JjL3N0b3B3b3Jkcy50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiOzs7OztBQUFBLDRDQUFvQjtBQUNwQixnREFBd0I7QUFReEIsTUFBTSxLQUFLLEdBQWdDLEVBQUUsQ0FBQztBQUU5QyxNQUFNLGFBQWEsR0FBRyxDQUFDLGFBQXFCLEVBQVksRUFBRTtJQUN4RCxPQUFPLGFBQWEsQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLENBQUM7QUFDbEMsQ0FBQyxDQUFDO0FBRUYsTUFBTSxXQUFXLEdBQUcsQ0FBQyxJQUFZLEVBQUUsRUFBRTtJQUNuQyxPQUFPLGNBQUksQ0FBQyxPQUFPLENBQUMsU0FBUyxFQUFFLCtCQUErQixJQUFJLE1BQU0sQ0FBQyxDQUFDO0FBQzVFLENBQUMsQ0FBQztBQUVGLE1BQU0saUJBQWlCLEdBQUcsQ0FBQyxPQUFlLEVBQUUsRUFBRTtJQUM1QyxPQUFPLE9BQU8sQ0FBQyxPQUFPLENBQUMsdUNBQXVDLEVBQUUsRUFBRSxDQUFDLENBQUM7QUFDdEUsQ0FBQyxDQUFDO0FBRUYsTUFBTSxTQUFTLEdBQUcsQ0FBQyxPQUFlLEVBQUUsSUFBSSxHQUFHLElBQUksRUFBYSxFQUFFO0lBQzVELElBQUksUUFBUSxHQUFHLFdBQVcsQ0FBQyxJQUFJLENBQUMsQ0FBQztJQUNqQyxJQUFJLFNBQW1CLENBQUM7SUFFeEIsSUFBSSxDQUFDLFlBQUUsQ0FBQyxVQUFVLENBQUMsUUFBUSxDQUFDLEVBQUUsQ0FBQztRQUM3QixPQUFPLENBQUMsSUFBSSxDQUNWLHlDQUF5QyxJQUFJLDRCQUE0QixDQUMxRSxDQUFDO1FBQ0YsUUFBUSxHQUFHLFdBQVcsQ0FBQyxJQUFJLENBQUMsQ0FBQztJQUMvQixDQUFDO0lBRUQsSUFBSSxNQUFNLENBQUMsU0FBUyxDQUFDLGNBQWMsQ0FBQyxJQUFJLENBQUMsS0FBSyxFQUFFLElBQUksQ0FBQyxFQUFFLENBQUM7UUFDdEQsU0FBUyxHQUFHLEtBQUssQ0FBQyxJQUFJLENBQUMsQ0FBQztJQUMxQixDQUFDO1NBQU0sQ0FBQztRQUNOLFNBQVMsR0FBRyxZQUFFO2FBQ1gsWUFBWSxDQUFDLFFBQVEsQ0FBQzthQUN0QixRQUFRLEVBQUU7YUFDVixLQUFLLENBQUMsSUFBSSxDQUFDO2FBQ1gsTUFBTSxDQUFDLENBQUMsR0FBRyxFQUFFLEVBQUU7WUFDZCxPQUFPLEdBQUcsQ0FBQyxNQUFNLEdBQUcsQ0FBQyxDQUFDO1FBQ3hCLENBQUMsQ0FBQyxDQUFDO1FBQ0wsS0FBSyxDQUFDLElBQUksQ0FBQyxHQUFHLFNBQVMsQ0FBQztJQUMxQixDQUFDO0lBRUQsTUFBTSxhQUFhLEdBQUcsaUJBQWlCLENBQUMsT0FBTyxDQUFDLENBQUM7SUFDakQsTUFBTSxLQUFLLEdBQUcsYUFBYSxDQUFDLGFBQWEsQ0FBQyxDQUFDO0lBQzNDLE1BQU0sb0JBQW9CLEdBQWEsRUFBRSxDQUFDO0lBQzFDLElBQUksS0FBSyxHQUFHLENBQUMsQ0FBQztJQUVkLEtBQUssQ0FBQyxPQUFPLENBQUMsQ0FBQyxJQUFJLEVBQUUsRUFBRTtRQUNyQixLQUFLLEVBQUUsQ0FBQztRQUNSLElBQUksU0FBUyxDQUFDLE9BQU8sQ0FBQyxJQUFJLENBQUMsV0FBVyxFQUFFLENBQUMsR0FBRyxDQUFDLENBQUMsRUFBRSxDQUFDO1lBQy9DLG9CQUFvQixDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsV0FBVyxFQUFFLENBQUMsQ0FBQztRQUNoRCxDQUFDO0lBQ0gsQ0FBQyxDQUFDLENBQUM7SUFFSCxPQUFPO1FBQ0wsYUFBYSxFQUFFLG9CQUFvQixDQUFDLE1BQU07UUFDMUMsU0FBUyxFQUFFLG9CQUFvQjtRQUMvQixTQUFTLEVBQUUsS0FBSztLQUNqQixDQUFDO0FBQ0osQ0FBQyxDQUFDO0FBRUYsa0JBQWUsU0FBUyxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiaW1wb3J0IGZzIGZyb20gXCJmc1wiO1xuaW1wb3J0IHBhdGggZnJvbSBcInBhdGhcIjtcblxuaW50ZXJmYWNlIFN0b3BXb3JkcyB7XG4gIHN0b3BXb3JkQ291bnQ6IG51bWJlcjtcbiAgc3RvcFdvcmRzOiBzdHJpbmdbXTtcbiAgd29yZENvdW50OiBudW1iZXI7XG59XG5cbmNvbnN0IGNhY2hlOiB7IFtrZXk6IHN0cmluZ106IHN0cmluZ1tdIH0gPSB7fTtcblxuY29uc3QgY2FuZGlhdGVXb3JkcyA9IChzdHJpcHBlZElucHV0OiBzdHJpbmcpOiBzdHJpbmdbXSA9PiB7XG4gIHJldHVybiBzdHJpcHBlZElucHV0LnNwbGl0KFwiIFwiKTtcbn07XG5cbmNvbnN0IGdldEZpbGVQYXRoID0gKGxhbmc6IHN0cmluZykgPT4ge1xuICByZXR1cm4gcGF0aC5yZXNvbHZlKF9fZGlybmFtZSwgYC4uL2RhdGEvc3RvcHdvcmRzL3N0b3B3b3Jkcy0ke2xhbmd9LnR4dGApO1xufTtcblxuY29uc3QgcmVtb3ZlUHVuY3R1YXRpb24gPSAoY29udGVudDogc3RyaW5nKSA9PiB7XG4gIHJldHVybiBjb250ZW50LnJlcGxhY2UoL1t8QDw+W1xcXVwiJy4sLS8jPyEkJV4mKis7Ont9PVxcLV9gfigpXS9nLCBcIlwiKTtcbn07XG5cbmNvbnN0IHN0b3B3b3JkcyA9IChjb250ZW50OiBzdHJpbmcsIGxhbmcgPSBcImVuXCIpOiBTdG9wV29yZHMgPT4ge1xuICBsZXQgZmlsZVBhdGggPSBnZXRGaWxlUGF0aChsYW5nKTtcbiAgbGV0IHN0b3BXb3Jkczogc3RyaW5nW107XG5cbiAgaWYgKCFmcy5leGlzdHNTeW5jKGZpbGVQYXRoKSkge1xuICAgIGNvbnNvbGUud2FybihcbiAgICAgIGBXQVJOSU5HOiBObyBzdG9wd29yZHMgZmlsZSBmb3VuZCBmb3IgJyR7bGFuZ30nIC0gZGVmYXVsdGluZyB0byBFbmdsaXNoIWAsXG4gICAgKTtcbiAgICBmaWxlUGF0aCA9IGdldEZpbGVQYXRoKFwiZW5cIik7XG4gIH1cblxuICBpZiAoT2JqZWN0LnByb3RvdHlwZS5oYXNPd25Qcm9wZXJ0eS5jYWxsKGNhY2hlLCBsYW5nKSkge1xuICAgIHN0b3BXb3JkcyA9IGNhY2hlW2xhbmddO1xuICB9IGVsc2Uge1xuICAgIHN0b3BXb3JkcyA9IGZzXG4gICAgICAucmVhZEZpbGVTeW5jKGZpbGVQYXRoKVxuICAgICAgLnRvU3RyaW5nKClcbiAgICAgIC5zcGxpdChcIlxcblwiKVxuICAgICAgLmZpbHRlcigoc3RyKSA9PiB7XG4gICAgICAgIHJldHVybiBzdHIubGVuZ3RoID4gMDtcbiAgICAgIH0pO1xuICAgIGNhY2hlW2xhbmddID0gc3RvcFdvcmRzO1xuICB9XG5cbiAgY29uc3Qgc3RyaXBwZWRJbnB1dCA9IHJlbW92ZVB1bmN0dWF0aW9uKGNvbnRlbnQpO1xuICBjb25zdCB3b3JkcyA9IGNhbmRpYXRlV29yZHMoc3RyaXBwZWRJbnB1dCk7XG4gIGNvbnN0IG92ZXJsYXBwaW5nU3RvcHdvcmRzOiBzdHJpbmdbXSA9IFtdO1xuICBsZXQgY291bnQgPSAwO1xuXG4gIHdvcmRzLmZvckVhY2goKHdvcmQpID0+IHtcbiAgICBjb3VudCsrO1xuICAgIGlmIChzdG9wV29yZHMuaW5kZXhPZih3b3JkLnRvTG93ZXJDYXNlKCkpID4gLTEpIHtcbiAgICAgIG92ZXJsYXBwaW5nU3RvcHdvcmRzLnB1c2god29yZC50b0xvd2VyQ2FzZSgpKTtcbiAgICB9XG4gIH0pO1xuXG4gIHJldHVybiB7XG4gICAgc3RvcFdvcmRDb3VudDogb3ZlcmxhcHBpbmdTdG9wd29yZHMubGVuZ3RoLFxuICAgIHN0b3BXb3Jkczogb3ZlcmxhcHBpbmdTdG9wd29yZHMsXG4gICAgd29yZENvdW50OiBjb3VudCxcbiAgfTtcbn07XG5cbmV4cG9ydCBkZWZhdWx0IHN0b3B3b3JkcztcbiJdfQ==