entity-finder
Version:
Named entity finder
94 lines (93 loc) • 3.08 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.similarityScore = exports.firstPhrase = exports.removeNestedParentheses = exports.getDisambiguationName = exports.formatTitle = exports.isWikidataId = exports.countWords = void 0;
const wikiData = require("wikipedia-data");
function countWords(title) {
return title.split(/[\s-]+/g).length;
}
exports.countWords = countWords;
function isWikidataId(id) {
return /^Q\d+$/.test(id);
}
exports.isWikidataId = isWikidataId;
function formatTitle(title) {
var result = /\(([^)]+)\)$/i.exec(title);
const pageTitle = {
title: title
};
if (result) {
pageTitle.simple = pageTitle.title.substring(0, result.index).trim();
pageTitle.special = result[1];
}
return pageTitle;
}
exports.formatTitle = formatTitle;
function getDisambiguationName(lang) {
return wikiData.getDisambiguationNames()[lang];
}
exports.getDisambiguationName = getDisambiguationName;
function removeNestedParentheses(text) {
const stack = [];
const chars = text.split("");
for (let i = 0; i < chars.length; i++) {
if (chars[i] === "(") {
stack.push(i);
}
else if (chars[i] === ")") {
if (stack.length > 0) {
const start = stack.pop();
for (let j = start; j <= i; j++) {
chars[j] = "";
}
}
}
}
return chars
.join("")
.replace(/\s+/g, " ")
.replace(/\s+([,;.])/g, "$1")
.trim();
}
exports.removeNestedParentheses = removeNestedParentheses;
function firstPhrase(text, min = 50) {
const sentenceEndings = /[.!?¿¡,;]/g;
let accumulatedText = "";
let match;
while ((match = sentenceEndings.exec(text)) !== null) {
const endIndex = match.index + 1;
accumulatedText = text.slice(0, endIndex).trim();
if (accumulatedText.length >= min) {
return accumulatedText;
}
}
return text;
}
exports.firstPhrase = firstPhrase;
function levenshteinDistance(a, b) {
const matrix = [];
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
}
else {
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1);
}
}
}
return matrix[b.length][a.length];
}
function similarityScore(a, b) {
const maxLen = Math.max(a.length, b.length);
if (maxLen === 0)
return 1;
const distance = levenshteinDistance(a.toLowerCase().trim(), b.toLowerCase().trim());
return (maxLen - distance) / maxLen;
}
exports.similarityScore = similarityScore;