concepts-parser
Version:
Concepts Extracting from text
54 lines (53 loc) • 2.4 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
const debug = require("debug")("concepts-parser:filters");
const concept_1 = require("../concept");
const QuotationMarks = [
['"', '"'],
["„”“", "”“"],
["«", "»"],
["‘", "’"]
];
const StartQuotationMark = QuotationMarks.reduce((str, item) => (str += item[0]), "");
const StartQuotationMarkReg = new RegExp(`[${StartQuotationMark}]`);
const EndQuotationMark = QuotationMarks.reduce((str, item) => (str += item[1]), "");
const EndQuotationMarkReg = new RegExp(`[${EndQuotationMark}]`);
function filter(concepts, context) {
const newConcepts = [];
const text = context.text;
concepts.forEach((concept, index) => {
if (index === 0) {
newConcepts.push(concept);
return;
}
if (StartQuotationMarkReg.test(text[concept.index - 1])) {
debug(`starts with QM ${concept.value}`);
const conceptEndIndex = concept.index + concept.value.length;
debug(`testing ${text[conceptEndIndex]} for end with QM`);
if (EndQuotationMarkReg.test(text[conceptEndIndex])) {
debug(`ends with QM ${concept.value}`);
const prevConcept = concepts[index - 1];
const prevConceptEndIndex = prevConcept.index + prevConcept.value.length;
debug(`prevConceptEndIndex= ${prevConceptEndIndex} > ${concept.index - 2}`);
if (concept.index - 2 === prevConceptEndIndex) {
debug(`text beetwen= '${text[concept.index - 2]}'`);
if (text[concept.index - 2] === " ") {
const newConcept = new concept_1.Concept({
value: text.substring(prevConcept.index, conceptEndIndex + 1),
index: prevConcept.index,
lang: context.lang
});
if (newConcept.isValid()) {
newConcepts.splice(newConcepts.length - 1, 1);
newConcepts.push(newConcept);
return;
}
}
}
}
}
newConcepts.push(concept);
});
return newConcepts;
}
exports.filter = filter;
;