UNPKG

entity-finder

Version:
77 lines (76 loc) 2.84 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.findTitles = void 0; const debug = require("debug")("entity-finder"); const wikiApi = require("./wikipedia/api"); const searchTitles_1 = require("./searchTitles"); const utils_1 = require("./utils"); function findTitles(name, lang, options) { options = options || {}; name = name.trim(); lang = (lang && lang.trim().toLowerCase()) || "en"; const limit = options.limit || 2; const searchOptions = { limit: limit + 50, tags: options.tags, timeout: options.timeout }; return searchTitles_1.searchTitles(name, lang, searchOptions) .then((titles) => { titles = titles.slice(0, limit + 5); return filterDezambiguizationTitles(titles, lang).then((filteredTitles) => titles .map((item) => filteredTitles.find((it) => it.title === item.title)) .filter((item) => !!item)); }) .then((list) => list.slice(0, limit)); } exports.findTitles = findTitles; function filterDezambiguizationTitles(pageTitles, lang) { if (pageTitles.length === 0) { return Promise.resolve([]); } const titles = pageTitles.map((item) => item.title).join("|"); return wikiApi .query(lang, { titles: titles, prop: "categories", clshow: "!hidden", cllimit: 50 }) .then((data) => { if (!data.query) { return Promise.reject(new Error(JSON.stringify(data))); } data = data.query.pages; const filteredTitles = Object.keys(data) .map((pageId) => ({ pageid: data[pageId].pageid, title: data[pageId].title, categories: data[pageId].categories && data[pageId].categories.map((item) => item.title) })) .filter((item) => !hasADezambiguizationCategory(item.categories, lang)) .map((item) => { const title = pageTitles.find((it) => it.title === item.title); title.categories = item.categories; return title; }); return filteredTitles; }); } function hasADezambiguizationCategory(categories, lang) { return (categories && categories.findIndex((category) => isDezambiguizationCategory(category, lang)) > -1); } function isDezambiguizationCategory(category, lang) { const disName = utils_1.getDisambiguationName(lang); if (!disName) { throw new Error(`No Disambiguation Name for language ${lang}`); } const disNameReg = new RegExp("(^|\\b)" + disName + "(\\b|$)", "i"); const isDis = disNameReg.test(category); if (isDis) { debug(`Category ${category} is a dizambiguization`); } return isDis; }