UNPKG

geneea-nlp-client

Version:

The TypeScript Client for Geneea Interpretor G3 API.

461 lines (460 loc) 20.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RequestBuilder = exports.Request = exports.Diacritization = exports.TextType = exports.Domain = exports.LanguageCode = exports.parseStrToAnalysisType = exports.AnalysisType = exports.ParaSpec = void 0; const common_1 = require("../../common/common"); const model_1 = require("../model"); /** * Object specifying the [text] and [type] of a single paragraph. */ class ParaSpec { /** * * @param type Type of the paragraphs, typically one of [Paragraph.TYPE_TILE], [Paragraph.TYPE_ABSTRACT], * [Paragraph.TYPE_BODY]; possibly [Paragraph.TYPE_SECTION_HEADING] * @param text Text of the paragraph. */ constructor(type, text) { this.type = type; this.text = text; } /** Paragraph representing a title of the whole document. It's equivalent to [subject]. */ static title(text = "") { return new ParaSpec(model_1.Paragraph.TYPE_TITLE, text !== null && text !== void 0 ? text : ""); } /** Paragraph representing a subject of the document or email. It's equivalent to [title]. */ static subject(text = "") { return this.title(text); } /** Paragraph representing an abstract of the document. It's equivalent to [lead] and [perex]. */ static abstract(text = "") { return new ParaSpec(model_1.Paragraph.TYPE_ABSTRACT, text !== null && text !== void 0 ? text : ""); } /** Paragraph representing a lead of the document. It's equivalent to [abstract] and [perex]. */ static lead(text = "") { return this.abstract(text); } /** Paragraph representing a perex of the document. It's equivalent to [abstract] and [lead].*/ static perex(text = "") { return this.abstract(text); } /** Paragraph containing a body of the document. It's equivalent to [text]. */ static body(text = "") { return new ParaSpec(model_1.Paragraph.TYPE_BODY, text !== null && text !== void 0 ? text : ""); } /** Paragraph containing a text of the document. It's equivalent to [body]. */ static text(text = "") { return this.body(text); } } exports.ParaSpec = ParaSpec; /** * The linguistic analyses the G3 API can perform; * [more detail][https://help.geneea.com/api_general/guide/analyses.html] */ var AnalysisType; (function (AnalysisType) { /** Perform all analyses at once */ AnalysisType["ALL"] = "ALL"; /** Recognize and standardize entities in text; * [more details][https://help.geneea.com/api_general/guide/entities.html] */ AnalysisType["ENTITIES"] = "ENTITIES"; /** Assign semantic tags to a document. * [more details][<https://help.geneea.com/api_general/guide/tags.html] */ AnalysisType["TAGS"] = "TAGS"; /** Relations between entities and their attributes; * [more details][<https://help.geneea.com/api_general/guide/relations.html] */ AnalysisType["RELATIONS"] = "RELATIONS"; /** Detect the emotions of the author contained in the text; * [more details][https://help.geneea.com/api_general/guide/sentiment.html] */ AnalysisType["SENTIMENT"] = "SENTIMENT"; /** Detect the language the text is written in; * [more details][https://help.geneea.com/api_general/guide/language.html] */ AnalysisType["LANGUAGE"] = "LANGUAGE"; })(AnalysisType || (exports.AnalysisType = AnalysisType = {})); const parseStrToAnalysisType = (typeStr) => { const at = AnalysisType[typeStr.toUpperCase()]; if (!at) throw new Error(`Invalid analysis type '${typeStr}'`); return at; }; exports.parseStrToAnalysisType = parseStrToAnalysisType; /** Typically used ISO 639-1 language codes. */ var LanguageCode; (function (LanguageCode) { LanguageCode["CS"] = "cs"; LanguageCode["DE"] = "de"; LanguageCode["EN"] = "en"; LanguageCode["ES"] = "es"; LanguageCode["PL"] = "pl"; LanguageCode["SK"] = "sk"; })(LanguageCode || (exports.LanguageCode = LanguageCode = {})); /** Typically used domains. For more info [see][https://help.geneea.com/api_general/guide/domains.html] */ var Domain; (function (Domain) { /** General media articles. */ Domain["MEDIA"] = "media"; /** Media articles covering news. */ Domain["NEWS"] = "news"; /** Media articles covering sport news. */ Domain["SPORT"] = "sport"; /** Tabloid articles. */ Domain["TABLOID"] = "tabloid"; /** Media articles covering technology and science. */ Domain["TECH"] = "tech"; /** General Voice-of-the customer documents (e.g. reviews). */ Domain["VOC"] = "voc"; /** Voice-of-the customer documents covering banking (e.g. reviews of banks). */ Domain["VOC_BANKING"] = "voc-banking"; /** Voice-of-the customer documents covering restaurants (e.g. reviews of restaurants). */ Domain["VOC_HOSPITALITY"] = "voc-hospitality"; })(Domain || (exports.Domain = Domain = {})); /** Typically used text types. */ var TextType; (function (TextType) { /** Text that is mostly grammatically, orthographically and typographically correct, e.g. news articles. */ TextType["CLEAN"] = "clean"; /** Text that ignores many formal grammatical, orthographical and typographical conventions, * e.g. social media posts. */ TextType["CASUAL"] = "casual"; })(TextType || (exports.TextType = TextType = {})); /** Supported diacritization models. */ var Diacritization; (function (Diacritization) { /** No diacritization is performed. */ Diacritization["NONE"] = "none"; /** Diacritics is added if needed. */ Diacritization["AUTO"] = "auto"; /** Diacritics is added to words without it if needed. */ Diacritization["YES"] = "yes"; /** Diacritics is first removed and then added if needed. */ Diacritization["REDO"] = "redo"; })(Diacritization || (exports.Diacritization = Diacritization = {})); /** Standard keys used by the G3 request. */ const STD_KEYS = new Set([ "id", "title", "text", "paraSpecs", "analyses", "htmlExtractor", "language", "langDetectPrior", "domain", "textType", "referenceDate", "diacritization", "returnMentions", "returnItemSentiment", "metadata", ]); const DATE_FORMAT_REGEX = /^(\d{4})-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01])$/; /** * An object encapsulating a single REST request for the G3 API. */ class Request { /** * * @param id Unique identifier of the document. * @param title The title or subject of the document, when available; mutually exclusive with the ``paraSpecs`` parameter. * @param text The main text of the document; mutually exclusive with the ``paraSpecs`` parameter. * @param paraSpecs The document paragraphs; mutually exclusive with the `title` and `text` parameters. * @param analyses What analyses to return. * @param language The language of the document as ISO 639-1; auto-detection will be used if omitted. * @param langDetectPrior The language detection prior; e.g. ‘de,en’. * @param domain The source domain from which the document originates. * See the [available domains][https://help.geneea.com/api_general/guide/domains.html] * @param textType The type or genre of text; not supported in public workflows/domains yet. * @param referenceDate Date to be used for the analysis as a reference; values: “NOW” or in format YYYY-MM-DD. * @param diacritization Determines whether to perform text diacritization. * @param returnMentions Should entity/tag/relation mentions be returned? No mentions are returned if null. * @param returnItemSentiment Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null. * @param metadata Extra non-NLP type of information related to the document, key-value pairs. * @param customConfig */ constructor(id = null, title = null, text = null, paraSpecs = null, analyses = null, language = null, langDetectPrior = null, domain = null, textType = null, referenceDate = null, diacritization = null, returnMentions = null, returnItemSentiment = null, metadata = null, customConfig = null) { this.id = id; this.title = title; this.text = text; this.paraSpecs = paraSpecs; this.language = language; this.langDetectPrior = langDetectPrior; this.domain = domain; this.textType = textType; this.referenceDate = referenceDate; this.diacritization = diacritization; this.returnMentions = returnMentions; this.returnItemSentiment = returnItemSentiment; this.metadata = metadata; this.customConfig = customConfig; this.analyses = null; this.analyses = analyses ? new Set(analyses) : null; } /** Converts a request instance to a JSON compatible object. */ toJson() { const obj = {}; if (this.id !== null) obj.id = this.id; if (this.title !== null) obj.title = this.title; if (this.text !== null) obj.text = this.text; if (this.language !== null) obj.language = this.language; if (this.langDetectPrior !== null) obj.langDetectPrior = this.langDetectPrior; if (this.domain !== null) obj.domain = this.domain; if (this.textType !== null) obj.textType = this.textType; if (this.referenceDate !== null) obj.referenceDate = this.referenceDate; if (this.diacritization !== null) obj.diacritization = this.diacritization; if (this.paraSpecs) { obj.paraSpecs = this.paraSpecs.map((ps) => { return { type: ps.type, text: ps.text }; }); } if (this.analyses) { obj.analyses = Array.from(this.analyses) .map((a) => a.toLowerCase()) .sort(); } if (this.returnMentions) obj.returnMentions = true; if (this.returnItemSentiment) obj.returnItemSentiment = true; if (this.metadata && this.metadata.size > 0) obj.metadata = (0, common_1.serializeMap)(this.metadata); if (this.customConfig) { for (const entry of this.customConfig.entries()) { obj[entry[0]] = entry[1]; } } return obj; } /** Converts a request instance to a JSON string. */ toJsonString() { return JSON.stringify(this.toJson()); } /** Reads a request instance from a JSON object. */ // eslint-disable-next-line @typescript-eslint/no-explicit-any static fromJson(raw) { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m; const title = (_a = raw.title) !== null && _a !== void 0 ? _a : null; const text = (_b = raw.text) !== null && _b !== void 0 ? _b : null; const paraSpecs = (_c = raw.paraSpecs) !== null && _c !== void 0 ? _c : null; if ((text !== null || title !== null) && paraSpecs !== null) throw new Error("Parameters text/title and paraSpecs are mutually exclusive."); if (text === null && paraSpecs === null) throw new Error("Either the text or paraSpecs parameter has to be provided"); const analyses = raw.analyses ? raw.analyses.map((a) => (0, exports.parseStrToAnalysisType)(a)) : null; let metadata = null; if (raw.metadata) { const entries = Object.entries(raw.metadata).map((e) => [e[0], e[1].toString()]); metadata = new Map(entries); } const unknownKeys = Object.keys(raw).filter((k) => !STD_KEYS.has(k)); const customConfig = unknownKeys.length > 0 ? new Map(unknownKeys.map((k) => [k, raw[k]])) : null; return new Request((_d = raw.id) !== null && _d !== void 0 ? _d : null, title, text, paraSpecs, analyses, (_e = raw.language) !== null && _e !== void 0 ? _e : null, (_f = raw.lanDetectPrior) !== null && _f !== void 0 ? _f : null, (_g = raw.domain) !== null && _g !== void 0 ? _g : null, (_h = raw.textType) !== null && _h !== void 0 ? _h : null, (_j = raw.referenceDate) !== null && _j !== void 0 ? _j : null, (_k = raw.diacritization) !== null && _k !== void 0 ? _k : null, (_l = raw.returnMentions) !== null && _l !== void 0 ? _l : null, (_m = raw.returnItemSentiment) !== null && _m !== void 0 ? _m : null, metadata, customConfig); } } exports.Request = Request; /** * Creates a builder with fields meant to be shared across requests. * * When analyzing multiple documents, one should: * * first, create a builder specifying all parameters shared by all the analyses to perform (e.g. [analyses] to perform, * [language prior][langDetectPrior]), * * then, use the build function to create individual requests specifying the parameters that are specific * for the analysis of each document (Id and text of the document, but possibly also `language`, etc). */ class RequestBuilder { constructor() { this._analyses = null; this._language = null; this._langDetectPrior = null; this._domain = null; this._textType = null; this._referenceDate = null; this._diacritization = null; this._returnMentions = null; this._returnItemSentiment = null; this._metadata = null; this._customConfig = null; } /** What analyses to return */ get analyses() { return this._analyses ? Array.from(this._analyses) : null; } /** What analyses to return */ set analyses(analyses) { this._analyses = new Set(analyses); } /** The language of the document as ISO 639-1; auto-detection will be used if omitted. */ get language() { return this._language; } /** The language of the document as ISO 639-1; auto-detection will be used if omitted. */ set language(language) { this._language = language; } /** The language detection prior; e.g. ‘de,en’. */ get langDetectPrior() { return this._langDetectPrior; } /** The language detection prior; e.g. ‘de,en’. */ set langDetectPrior(langDetectPrior) { this._langDetectPrior = langDetectPrior; } /** The source domain from which the document originates. * See the [available domains][https://help.geneea.com/api_general/guide/domains.html] */ get domain() { return this._domain; } /** The source domain from which the document originates. * See the [available domains][https://help.geneea.com/api_general/guide/domains.html] */ set domain(domain) { this._domain = domain; } /** The type or genre of text; not supported in public workflows/domains yet. */ get textType() { return this._textType; } /** The type or genre of text; not supported in public workflows/domains yet. */ set textType(textType) { this._textType = textType; } /** Date to be used for the analysis as a reference; either “NOW” or in format YYYY-MM-DD. */ get referenceDate() { return this._referenceDate; } /** Date to be used for the analysis as a reference; either “NOW” or in format YYYY-MM-DD. */ set referenceDate(referenceDate) { this._referenceDate = referenceDate ? this.formatRefDate(referenceDate) : null; } /** Determines whether to perform text diacritization */ get diacritization() { return this._diacritization; } /** Determines whether to perform text diacritization */ set diacritization(diacritization) { this._diacritization = diacritization; } /** Should entity/tag/relation mentions be returned? No mentions are returned if null. */ get returnMentions() { return this._returnMentions; } /** Should entity/tag/relation mentions be returned? No mentions are returned if null. */ set returnMentions(returnMentions) { this._returnMentions = returnMentions; } /** Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null. */ get returnItemSentiment() { return this._returnItemSentiment; } /** Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null. */ set returnItemSentiment(returnItemSentiment) { this._returnItemSentiment = returnItemSentiment; } /** Extra non-NLP type of information related to the document, key-value pairs. */ get metadata() { return this._metadata; } /** Extra non-NLP type of information related to the document, key-value pairs. */ set metadata(metadata) { this._metadata = metadata; } /** * Add [custom options][customConfig] to the request builder to be passed to the G3 API endpoint. * Existing custom options are overwritten. */ get customConfig() { return this._customConfig; } /** * Add [custom options][customConfig] to the request builder to be passed to the G3 API endpoint. * Existing custom options are overwritten. */ set customConfig(customConfig) { if (customConfig !== null && containsStdKey(customConfig.keys())) { throw new Error(`Keys ${Array.from(customConfig.keys()).sort()} overlap with the standard request keys`); } if (this._customConfig === null) { this._customConfig = customConfig; } else if (customConfig !== null && customConfig.size > 0) { for (const entry of customConfig.entries()) { this._customConfig.set(entry[0], entry[1]); } } } formatRefDate(date) { if (date.toUpperCase() === "NOW") return "NOW"; // eslint-disable-next-line prefer-const let { year, month, day } = this.parseRefDate(date); if (month.length < 2) month = "0" + month; if (day.length < 2) day = "0" + day; return [year, month, day].join("-"); } parseRefDate(date) { const matches = date.match(DATE_FORMAT_REGEX); if (matches === null) throw new Error(`Referene date ${date} is not valid.`); return { year: matches[1], month: matches[2], day: matches[3] }; } /** * Creates a new request object to be passed to the G3 client. * * @param options A RequestOptions object. * @returns Request object to be passed to the G3 client. */ build(options) { const { id, text, title, paraSpecs, language, referenceDate, metadata, customConfig, } = options; if ((text !== undefined || title !== undefined) && paraSpecs !== undefined) { throw new Error("Parameters text/title and paraSpecs are mutually exclusive"); } if (text === undefined && paraSpecs === undefined) { throw new Error("Either the text or paraSpecs parameter has to be provided"); } if (customConfig !== undefined && containsStdKey(customConfig.keys())) { throw new Error(`Keys ${Array.from(customConfig.keys()).sort()} overlap with the standard request keys`); } const refDate = referenceDate ? this.formatRefDate(referenceDate) : this.referenceDate; return new Request(id, title, text, paraSpecs, this.analyses, language !== null && language !== void 0 ? language : this.language, this.langDetectPrior, this.domain, this.textType, refDate, this.diacritization, this.returnMentions, this.returnItemSentiment, combineMaps(this.metadata, metadata !== null && metadata !== void 0 ? metadata : null), combineMaps(this.customConfig, customConfig !== null && customConfig !== void 0 ? customConfig : null)); } } exports.RequestBuilder = RequestBuilder; function containsStdKey(keys) { return Array.from(keys).filter((k) => STD_KEYS.has(k)).length > 0; } function combineMaps(a, b) { if (!a || a.size === 0) return b; if (!b || b.size === 0) return a; const c = new Map(); a.forEach((v, k) => c.set(k, v)); b.forEach((v, k) => c.set(k, v)); return c; }