UNPKG

entity-finder

Version:
94 lines (93 loc) 3.08 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.similarityScore = exports.firstPhrase = exports.removeNestedParentheses = exports.getDisambiguationName = exports.formatTitle = exports.isWikidataId = exports.countWords = void 0; const wikiData = require("wikipedia-data"); function countWords(title) { return title.split(/[\s-]+/g).length; } exports.countWords = countWords; function isWikidataId(id) { return /^Q\d+$/.test(id); } exports.isWikidataId = isWikidataId; function formatTitle(title) { var result = /\(([^)]+)\)$/i.exec(title); const pageTitle = { title: title }; if (result) { pageTitle.simple = pageTitle.title.substring(0, result.index).trim(); pageTitle.special = result[1]; } return pageTitle; } exports.formatTitle = formatTitle; function getDisambiguationName(lang) { return wikiData.getDisambiguationNames()[lang]; } exports.getDisambiguationName = getDisambiguationName; function removeNestedParentheses(text) { const stack = []; const chars = text.split(""); for (let i = 0; i < chars.length; i++) { if (chars[i] === "(") { stack.push(i); } else if (chars[i] === ")") { if (stack.length > 0) { const start = stack.pop(); for (let j = start; j <= i; j++) { chars[j] = ""; } } } } return chars .join("") .replace(/\s+/g, " ") .replace(/\s+([,;.])/g, "$1") .trim(); } exports.removeNestedParentheses = removeNestedParentheses; function firstPhrase(text, min = 50) { const sentenceEndings = /[.!?¿¡,;]/g; let accumulatedText = ""; let match; while ((match = sentenceEndings.exec(text)) !== null) { const endIndex = match.index + 1; accumulatedText = text.slice(0, endIndex).trim(); if (accumulatedText.length >= min) { return accumulatedText; } } return text; } exports.firstPhrase = firstPhrase; function levenshteinDistance(a, b) { const matrix = []; for (let i = 0; i <= b.length; i++) { matrix[i] = [i]; } for (let j = 0; j <= a.length; j++) { matrix[0][j] = j; } for (let i = 1; i <= b.length; i++) { for (let j = 1; j <= a.length; j++) { if (b.charAt(i - 1) === a.charAt(j - 1)) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1); } } } return matrix[b.length][a.length]; } function similarityScore(a, b) { const maxLen = Math.max(a.length, b.length); if (maxLen === 0) return 1; const distance = levenshteinDistance(a.toLowerCase().trim(), b.toLowerCase().trim()); return (maxLen - distance) / maxLen; } exports.similarityScore = similarityScore;