punkt
Version:
A port of NLTK's Punkt sentence tokenizer to JS.
32 lines • 798 B
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.escapeRegExp = void 0;
exports.matchAll = matchAll;
exports.pairIter = pairIter;
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\-]/g, "\\$&");
exports.escapeRegExp = escapeRegExp;
function* matchAll(str, regexp) {
let match;
regexp.lastIndex = 0;
while ((match = regexp.exec(str)) !== null) {
yield match;
if (match[0] === "") {
regexp.lastIndex++;
}
}
}
function* pairIter(tokens) {
let prev = null;
for (const el of tokens) {
if (!prev) {
prev = el;
continue;
}
yield [prev, el];
prev = el;
}
if (prev) {
yield [prev, null];
}
}
//# sourceMappingURL=utils.js.map