geneea-nlp-client
Version:
The TypeScript Client for Geneea Interpretor G3 API.
461 lines (460 loc) • 20.3 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RequestBuilder = exports.Request = exports.Diacritization = exports.TextType = exports.Domain = exports.LanguageCode = exports.parseStrToAnalysisType = exports.AnalysisType = exports.ParaSpec = void 0;
const common_1 = require("../../common/common");
const model_1 = require("../model");
/**
* Object specifying the [text] and [type] of a single paragraph.
*/
class ParaSpec {
/**
*
* @param type Type of the paragraphs, typically one of [Paragraph.TYPE_TILE], [Paragraph.TYPE_ABSTRACT],
* [Paragraph.TYPE_BODY]; possibly [Paragraph.TYPE_SECTION_HEADING]
* @param text Text of the paragraph.
*/
constructor(type, text) {
this.type = type;
this.text = text;
}
/** Paragraph representing a title of the whole document. It's equivalent to [subject]. */
static title(text = "") {
return new ParaSpec(model_1.Paragraph.TYPE_TITLE, text !== null && text !== void 0 ? text : "");
}
/** Paragraph representing a subject of the document or email. It's equivalent to [title]. */
static subject(text = "") {
return this.title(text);
}
/** Paragraph representing an abstract of the document. It's equivalent to [lead] and [perex]. */
static abstract(text = "") {
return new ParaSpec(model_1.Paragraph.TYPE_ABSTRACT, text !== null && text !== void 0 ? text : "");
}
/** Paragraph representing a lead of the document. It's equivalent to [abstract] and [perex]. */
static lead(text = "") {
return this.abstract(text);
}
/** Paragraph representing a perex of the document. It's equivalent to [abstract] and [lead].*/
static perex(text = "") {
return this.abstract(text);
}
/** Paragraph containing a body of the document. It's equivalent to [text]. */
static body(text = "") {
return new ParaSpec(model_1.Paragraph.TYPE_BODY, text !== null && text !== void 0 ? text : "");
}
/** Paragraph containing a text of the document. It's equivalent to [body]. */
static text(text = "") {
return this.body(text);
}
}
exports.ParaSpec = ParaSpec;
/**
* The linguistic analyses the G3 API can perform;
* [more detail][https://help.geneea.com/api_general/guide/analyses.html]
*/
var AnalysisType;
(function (AnalysisType) {
/** Perform all analyses at once */
AnalysisType["ALL"] = "ALL";
/** Recognize and standardize entities in text;
* [more details][https://help.geneea.com/api_general/guide/entities.html]
*/
AnalysisType["ENTITIES"] = "ENTITIES";
/** Assign semantic tags to a document.
* [more details][<https://help.geneea.com/api_general/guide/tags.html]
*/
AnalysisType["TAGS"] = "TAGS";
/** Relations between entities and their attributes;
* [more details][<https://help.geneea.com/api_general/guide/relations.html]
*/
AnalysisType["RELATIONS"] = "RELATIONS";
/** Detect the emotions of the author contained in the text;
* [more details][https://help.geneea.com/api_general/guide/sentiment.html]
*/
AnalysisType["SENTIMENT"] = "SENTIMENT";
/** Detect the language the text is written in;
* [more details][https://help.geneea.com/api_general/guide/language.html]
*/
AnalysisType["LANGUAGE"] = "LANGUAGE";
})(AnalysisType || (exports.AnalysisType = AnalysisType = {}));
const parseStrToAnalysisType = (typeStr) => {
const at = AnalysisType[typeStr.toUpperCase()];
if (!at)
throw new Error(`Invalid analysis type '${typeStr}'`);
return at;
};
exports.parseStrToAnalysisType = parseStrToAnalysisType;
/** Typically used ISO 639-1 language codes. */
var LanguageCode;
(function (LanguageCode) {
LanguageCode["CS"] = "cs";
LanguageCode["DE"] = "de";
LanguageCode["EN"] = "en";
LanguageCode["ES"] = "es";
LanguageCode["PL"] = "pl";
LanguageCode["SK"] = "sk";
})(LanguageCode || (exports.LanguageCode = LanguageCode = {}));
/** Typically used domains. For more info [see][https://help.geneea.com/api_general/guide/domains.html] */
var Domain;
(function (Domain) {
/** General media articles. */
Domain["MEDIA"] = "media";
/** Media articles covering news. */
Domain["NEWS"] = "news";
/** Media articles covering sport news. */
Domain["SPORT"] = "sport";
/** Tabloid articles. */
Domain["TABLOID"] = "tabloid";
/** Media articles covering technology and science. */
Domain["TECH"] = "tech";
/** General Voice-of-the customer documents (e.g. reviews). */
Domain["VOC"] = "voc";
/** Voice-of-the customer documents covering banking (e.g. reviews of banks). */
Domain["VOC_BANKING"] = "voc-banking";
/** Voice-of-the customer documents covering restaurants (e.g. reviews of restaurants). */
Domain["VOC_HOSPITALITY"] = "voc-hospitality";
})(Domain || (exports.Domain = Domain = {}));
/** Typically used text types. */
var TextType;
(function (TextType) {
/** Text that is mostly grammatically, orthographically and typographically correct, e.g. news articles. */
TextType["CLEAN"] = "clean";
/** Text that ignores many formal grammatical, orthographical and typographical conventions,
* e.g. social media posts.
*/
TextType["CASUAL"] = "casual";
})(TextType || (exports.TextType = TextType = {}));
/** Supported diacritization models. */
var Diacritization;
(function (Diacritization) {
/** No diacritization is performed. */
Diacritization["NONE"] = "none";
/** Diacritics is added if needed. */
Diacritization["AUTO"] = "auto";
/** Diacritics is added to words without it if needed. */
Diacritization["YES"] = "yes";
/** Diacritics is first removed and then added if needed. */
Diacritization["REDO"] = "redo";
})(Diacritization || (exports.Diacritization = Diacritization = {}));
/** Standard keys used by the G3 request. */
const STD_KEYS = new Set([
"id",
"title",
"text",
"paraSpecs",
"analyses",
"htmlExtractor",
"language",
"langDetectPrior",
"domain",
"textType",
"referenceDate",
"diacritization",
"returnMentions",
"returnItemSentiment",
"metadata",
]);
const DATE_FORMAT_REGEX = /^(\d{4})-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01])$/;
/**
* An object encapsulating a single REST request for the G3 API.
*/
class Request {
/**
*
* @param id Unique identifier of the document.
* @param title The title or subject of the document, when available; mutually exclusive with the ``paraSpecs`` parameter.
* @param text The main text of the document; mutually exclusive with the ``paraSpecs`` parameter.
* @param paraSpecs The document paragraphs; mutually exclusive with the `title` and `text` parameters.
* @param analyses What analyses to return.
* @param language The language of the document as ISO 639-1; auto-detection will be used if omitted.
* @param langDetectPrior The language detection prior; e.g. ‘de,en’.
* @param domain The source domain from which the document originates.
* See the [available domains][https://help.geneea.com/api_general/guide/domains.html]
* @param textType The type or genre of text; not supported in public workflows/domains yet.
* @param referenceDate Date to be used for the analysis as a reference; values: “NOW” or in format YYYY-MM-DD.
* @param diacritization Determines whether to perform text diacritization.
* @param returnMentions Should entity/tag/relation mentions be returned? No mentions are returned if null.
* @param returnItemSentiment Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null.
* @param metadata Extra non-NLP type of information related to the document, key-value pairs.
* @param customConfig
*/
constructor(id = null, title = null, text = null, paraSpecs = null, analyses = null, language = null, langDetectPrior = null, domain = null, textType = null, referenceDate = null, diacritization = null, returnMentions = null, returnItemSentiment = null, metadata = null, customConfig = null) {
this.id = id;
this.title = title;
this.text = text;
this.paraSpecs = paraSpecs;
this.language = language;
this.langDetectPrior = langDetectPrior;
this.domain = domain;
this.textType = textType;
this.referenceDate = referenceDate;
this.diacritization = diacritization;
this.returnMentions = returnMentions;
this.returnItemSentiment = returnItemSentiment;
this.metadata = metadata;
this.customConfig = customConfig;
this.analyses = null;
this.analyses = analyses ? new Set(analyses) : null;
}
/** Converts a request instance to a JSON compatible object. */
toJson() {
const obj = {};
if (this.id !== null)
obj.id = this.id;
if (this.title !== null)
obj.title = this.title;
if (this.text !== null)
obj.text = this.text;
if (this.language !== null)
obj.language = this.language;
if (this.langDetectPrior !== null)
obj.langDetectPrior = this.langDetectPrior;
if (this.domain !== null)
obj.domain = this.domain;
if (this.textType !== null)
obj.textType = this.textType;
if (this.referenceDate !== null)
obj.referenceDate = this.referenceDate;
if (this.diacritization !== null)
obj.diacritization = this.diacritization;
if (this.paraSpecs) {
obj.paraSpecs = this.paraSpecs.map((ps) => {
return { type: ps.type, text: ps.text };
});
}
if (this.analyses) {
obj.analyses = Array.from(this.analyses)
.map((a) => a.toLowerCase())
.sort();
}
if (this.returnMentions)
obj.returnMentions = true;
if (this.returnItemSentiment)
obj.returnItemSentiment = true;
if (this.metadata && this.metadata.size > 0)
obj.metadata = (0, common_1.serializeMap)(this.metadata);
if (this.customConfig) {
for (const entry of this.customConfig.entries()) {
obj[entry[0]] = entry[1];
}
}
return obj;
}
/** Converts a request instance to a JSON string. */
toJsonString() {
return JSON.stringify(this.toJson());
}
/** Reads a request instance from a JSON object. */
// eslint-disable-next-line @typescript-eslint/no-explicit-any
static fromJson(raw) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
const title = (_a = raw.title) !== null && _a !== void 0 ? _a : null;
const text = (_b = raw.text) !== null && _b !== void 0 ? _b : null;
const paraSpecs = (_c = raw.paraSpecs) !== null && _c !== void 0 ? _c : null;
if ((text !== null || title !== null) && paraSpecs !== null)
throw new Error("Parameters text/title and paraSpecs are mutually exclusive.");
if (text === null && paraSpecs === null)
throw new Error("Either the text or paraSpecs parameter has to be provided");
const analyses = raw.analyses
? raw.analyses.map((a) => (0, exports.parseStrToAnalysisType)(a))
: null;
let metadata = null;
if (raw.metadata) {
const entries = Object.entries(raw.metadata).map((e) => [e[0], e[1].toString()]);
metadata = new Map(entries);
}
const unknownKeys = Object.keys(raw).filter((k) => !STD_KEYS.has(k));
const customConfig = unknownKeys.length > 0
? new Map(unknownKeys.map((k) => [k, raw[k]]))
: null;
return new Request((_d = raw.id) !== null && _d !== void 0 ? _d : null, title, text, paraSpecs, analyses, (_e = raw.language) !== null && _e !== void 0 ? _e : null, (_f = raw.lanDetectPrior) !== null && _f !== void 0 ? _f : null, (_g = raw.domain) !== null && _g !== void 0 ? _g : null, (_h = raw.textType) !== null && _h !== void 0 ? _h : null, (_j = raw.referenceDate) !== null && _j !== void 0 ? _j : null, (_k = raw.diacritization) !== null && _k !== void 0 ? _k : null, (_l = raw.returnMentions) !== null && _l !== void 0 ? _l : null, (_m = raw.returnItemSentiment) !== null && _m !== void 0 ? _m : null, metadata, customConfig);
}
}
exports.Request = Request;
/**
* Creates a builder with fields meant to be shared across requests.
*
* When analyzing multiple documents, one should:
* * first, create a builder specifying all parameters shared by all the analyses to perform (e.g. [analyses] to perform,
* [language prior][langDetectPrior]),
* * then, use the build function to create individual requests specifying the parameters that are specific
* for the analysis of each document (Id and text of the document, but possibly also `language`, etc).
*/
class RequestBuilder {
constructor() {
this._analyses = null;
this._language = null;
this._langDetectPrior = null;
this._domain = null;
this._textType = null;
this._referenceDate = null;
this._diacritization = null;
this._returnMentions = null;
this._returnItemSentiment = null;
this._metadata = null;
this._customConfig = null;
}
/** What analyses to return */
get analyses() {
return this._analyses ? Array.from(this._analyses) : null;
}
/** What analyses to return */
set analyses(analyses) {
this._analyses = new Set(analyses);
}
/** The language of the document as ISO 639-1; auto-detection will be used if omitted. */
get language() {
return this._language;
}
/** The language of the document as ISO 639-1; auto-detection will be used if omitted. */
set language(language) {
this._language = language;
}
/** The language detection prior; e.g. ‘de,en’. */
get langDetectPrior() {
return this._langDetectPrior;
}
/** The language detection prior; e.g. ‘de,en’. */
set langDetectPrior(langDetectPrior) {
this._langDetectPrior = langDetectPrior;
}
/** The source domain from which the document originates.
* See the [available domains][https://help.geneea.com/api_general/guide/domains.html]
*/
get domain() {
return this._domain;
}
/** The source domain from which the document originates.
* See the [available domains][https://help.geneea.com/api_general/guide/domains.html]
*/
set domain(domain) {
this._domain = domain;
}
/** The type or genre of text; not supported in public workflows/domains yet. */
get textType() {
return this._textType;
}
/** The type or genre of text; not supported in public workflows/domains yet. */
set textType(textType) {
this._textType = textType;
}
/** Date to be used for the analysis as a reference; either “NOW” or in format YYYY-MM-DD. */
get referenceDate() {
return this._referenceDate;
}
/** Date to be used for the analysis as a reference; either “NOW” or in format YYYY-MM-DD. */
set referenceDate(referenceDate) {
this._referenceDate = referenceDate
? this.formatRefDate(referenceDate)
: null;
}
/** Determines whether to perform text diacritization */
get diacritization() {
return this._diacritization;
}
/** Determines whether to perform text diacritization */
set diacritization(diacritization) {
this._diacritization = diacritization;
}
/** Should entity/tag/relation mentions be returned? No mentions are returned if null. */
get returnMentions() {
return this._returnMentions;
}
/** Should entity/tag/relation mentions be returned? No mentions are returned if null. */
set returnMentions(returnMentions) {
this._returnMentions = returnMentions;
}
/** Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null. */
get returnItemSentiment() {
return this._returnItemSentiment;
}
/** Should entity/mention/tag/relation etc. sentiment be returned? No sentiment is returned if null. */
set returnItemSentiment(returnItemSentiment) {
this._returnItemSentiment = returnItemSentiment;
}
/** Extra non-NLP type of information related to the document, key-value pairs. */
get metadata() {
return this._metadata;
}
/** Extra non-NLP type of information related to the document, key-value pairs. */
set metadata(metadata) {
this._metadata = metadata;
}
/**
* Add [custom options][customConfig] to the request builder to be passed to the G3 API endpoint.
* Existing custom options are overwritten.
*/
get customConfig() {
return this._customConfig;
}
/**
* Add [custom options][customConfig] to the request builder to be passed to the G3 API endpoint.
* Existing custom options are overwritten.
*/
set customConfig(customConfig) {
if (customConfig !== null && containsStdKey(customConfig.keys())) {
throw new Error(`Keys ${Array.from(customConfig.keys()).sort()} overlap with the standard request keys`);
}
if (this._customConfig === null) {
this._customConfig = customConfig;
}
else if (customConfig !== null && customConfig.size > 0) {
for (const entry of customConfig.entries()) {
this._customConfig.set(entry[0], entry[1]);
}
}
}
formatRefDate(date) {
if (date.toUpperCase() === "NOW")
return "NOW";
// eslint-disable-next-line prefer-const
let { year, month, day } = this.parseRefDate(date);
if (month.length < 2)
month = "0" + month;
if (day.length < 2)
day = "0" + day;
return [year, month, day].join("-");
}
parseRefDate(date) {
const matches = date.match(DATE_FORMAT_REGEX);
if (matches === null)
throw new Error(`Referene date ${date} is not valid.`);
return { year: matches[1], month: matches[2], day: matches[3] };
}
/**
* Creates a new request object to be passed to the G3 client.
*
* @param options A RequestOptions object.
* @returns Request object to be passed to the G3 client.
*/
build(options) {
const { id, text, title, paraSpecs, language, referenceDate, metadata, customConfig, } = options;
if ((text !== undefined || title !== undefined) &&
paraSpecs !== undefined) {
throw new Error("Parameters text/title and paraSpecs are mutually exclusive");
}
if (text === undefined && paraSpecs === undefined) {
throw new Error("Either the text or paraSpecs parameter has to be provided");
}
if (customConfig !== undefined && containsStdKey(customConfig.keys())) {
throw new Error(`Keys ${Array.from(customConfig.keys()).sort()} overlap with the standard request keys`);
}
const refDate = referenceDate
? this.formatRefDate(referenceDate)
: this.referenceDate;
return new Request(id, title, text, paraSpecs, this.analyses, language !== null && language !== void 0 ? language : this.language, this.langDetectPrior, this.domain, this.textType, refDate, this.diacritization, this.returnMentions, this.returnItemSentiment, combineMaps(this.metadata, metadata !== null && metadata !== void 0 ? metadata : null), combineMaps(this.customConfig, customConfig !== null && customConfig !== void 0 ? customConfig : null));
}
}
exports.RequestBuilder = RequestBuilder;
function containsStdKey(keys) {
return Array.from(keys).filter((k) => STD_KEYS.has(k)).length > 0;
}
function combineMaps(a, b) {
if (!a || a.size === 0)
return b;
if (!b || b.size === 0)
return a;
const c = new Map();
a.forEach((v, k) => c.set(k, v));
b.forEach((v, k) => c.set(k, v));
return c;
}