@botonic/plugin-contentful
Version:
## What Does This Plugin Do?
104 lines • 3.94 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.TokenStripper = exports.TokenRange = void 0;
const arrays_1 = require("../util/arrays");
const locales_1 = require("./locales");
const normalizer_1 = require("./normalizer");
const token_skipper_1 = require("./token-skipper");
const tokens_1 = require("./tokens");
class TokenRange {
constructor(from, to) {
this.from = from;
this.to = to;
}
equals(other) {
return this.from == other.from && this.to == other.to;
}
toString() {
return `from ${this.from} to ${this.to}`;
}
}
exports.TokenRange = TokenRange;
/**
* Remove a substring if it matches from a list of provided strings, and it
* occurs at the specified position (start or end).
* It removes all separators between the stripped and the remaining parts.
* It preprocesses the strings (separators, capitals, accents) but apart
* from that the tokens must be identical
*/
class TokenStripper {
constructor(needlesByPos, locale, normalizer = new normalizer_1.Normalizer()) {
this.locale = locale;
this.normalizer = normalizer;
this.needleTokensByPos = {};
for (const pos in needlesByPos) {
// we sort them to avoid stripping "buenas" before "buenas tardes"
const firstLongest = (s1, s2) => {
return s2.length - s1.length;
};
this.needleTokensByPos[pos] = needlesByPos[pos]
.sort(firstLongest)
.map(n => normalizer.normalize(locale, n).words.map(w => w.token));
}
}
/**
* tokens do not to previously remove stopwords which may occur in needles
*/
search(tokens, pos) {
for (const needle of this.needleTokensByPos[pos] || []) {
let range;
if (pos === TokenStripper.START_POSITION) {
range = new TokenRange(0, needle.length);
}
else if (pos === TokenStripper.END_POSITION) {
range = new TokenRange(tokens.length - needle.length, tokens.length);
}
else {
throw new Error(`Invalid search position ${pos}`);
}
const chunk = tokens.slice(range.from, range.to);
if ((0, arrays_1.equalArrays)(chunk, needle)) {
return range;
}
}
return undefined;
}
strip(haystack, pos) {
const skipper = new token_skipper_1.TokenSkipper();
let tokens = (0, locales_1.preprocess)(this.locale, haystack)
.split(tokens_1.DEFAULT_SEPARATORS_REGEX)
.filter(t => !!t);
// eslint-disable-next-line no-constant-condition
while (true) {
const range = this.search(tokens, pos);
if (!range) {
return haystack;
}
const last = haystack;
switch (pos) {
case TokenStripper.START_POSITION: {
const idx = skipper.skipWords(haystack, range.to, true);
haystack = haystack.substr(idx);
tokens = tokens.slice(range.to);
break;
}
case TokenStripper.END_POSITION: {
const idx = skipper.skipWords(haystack, range.from, false);
haystack = haystack.substr(0, idx);
tokens = tokens.slice(0, range.from);
break;
}
default:
throw new Error(`Invalid search position ${pos}`);
}
if (last == haystack) {
console.error(`Could not skip ${range.toString()} from ${haystack}`);
return haystack;
}
}
}
}
exports.TokenStripper = TokenStripper;
TokenStripper.START_POSITION = 0;
TokenStripper.END_POSITION = 1;
//# sourceMappingURL=token-stripper.js.map