geneea-nlp-client
Version:
The TypeScript Client for Geneea Interpretor G3 API.
293 lines (292 loc) • 9.77 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.writeToJson = writeToJson;
/* eslint-disable @typescript-eslint/no-explicit-any */
const offset_converter_1 = require("../../common/offset-converter");
const upos_1 = require("../../common/upos");
const common_1 = require("../../common/common");
/** Writes the Analysis to a JSON object in a format as returned by Geneea G3 API. */
function writeToJson(analysis) {
return new Writer().toJson(analysis);
}
class Writer {
constructor() {
// NOTE: this implementation is not thread-safe.
this.offMap = new IdentityOffsetMapping();
}
toJson(analysis) {
const obj = {};
obj.version = "3.3.0";
obj.language = { detected: analysis.language.detected };
if (analysis.docId)
obj.id = analysis.docId;
if (analysis.paragraphs.length > 0)
obj.paragraphs = analysis.paragraphs.map((p) => this.toRawPara(p));
if (analysis.entities.length > 0)
obj.entities = analysis.entities.map((e) => this.toRawEntity(e));
if (analysis.tags.length > 0)
obj.tags = analysis.tags.map((t) => this.toRawTag(t));
if (analysis.relations.length > 0)
obj.relations = analysis.relations.map((r) => this.toRawRelation(r));
if (analysis.docSentiment) {
obj.docSentiment = this.toRawSentiment(analysis.docSentiment);
}
if (analysis.docVectors) {
obj.docVectors = this.toRawVectors(analysis.docVectors);
}
const id2sentiment = this.createRawItemSentiment(analysis);
if (id2sentiment.size > 0)
obj.itemSentiments = Object.fromEntries(id2sentiment);
const id2vectors = this.createRawItemVectors(analysis);
if (id2vectors.size > 0)
obj.itemVectors = Object.fromEntries(id2vectors);
if (analysis.usedChars)
obj.usedChars = analysis.usedChars;
if (analysis.metadata && analysis.metadata.size > 0)
obj.metadata = Object.fromEntries(analysis.metadata);
if (analysis.debugInfo)
obj.debugInfo = analysis.debugInfo;
return obj;
}
toRawPara(p) {
this.offMap = new JS2CpOffsetMapping(p.text, p.origText);
const obj = {
id: p.id,
type: p.type,
text: p.text,
};
if (p.origText !== p.text)
obj.origText = p.origText;
if (p.sentences.length > 0)
obj.sentences = p.sentences.map((s) => this.toRawSentence(s));
return obj;
}
toRawSentence(s) {
const obj = {
id: s.id,
};
if (s.tokens.length > 0)
obj.tokens = s.tokens.map((t) => this.toRawToken(t));
if (s.tectoTokens)
obj.tecto = s.tectoTokens.map((tt) => this.toRawTectoToken(tt));
return obj;
}
toRawToken(t) {
const off = this.offMap.get(t.charSpan.start);
const origOff = this.offMap.getOrig(t.origCharSpan.start);
const obj = {
id: t.id,
off: off,
text: t.text,
};
if (origOff !== off)
obj.origOff = origOff;
if (t.origText !== t.text)
obj.origText = t.origText;
if (t.deepLemma)
obj.dLemma = t.deepLemma;
if (t.lemma)
obj.lemma = t.lemma;
if (t.pos)
obj.pos = (0, upos_1.UPosToStr)(t.pos);
if (t.morphTag)
obj.mTag = t.morphTag;
if (t.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(t.feats);
if (t.fnc)
obj.fnc = t.fullFnc;
if (t.parent)
obj.parId = t.parent.id;
return obj;
}
toRawTectoToken(t) {
const obj = {
id: t.id,
fnc: t.fnc.toLowerCase(),
lemma: t.lemma,
};
if (t.parent)
obj.parId = t.parent.id;
if (t.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(t.feats);
if (t.entityMention)
obj.entityMentionId = t.entityMention.id;
obj.tokenIds = t.tokens ? t.tokens.ids : [];
return obj;
}
toRawEntity(e) {
const obj = {
id: e.id,
stdForm: e.stdForm,
type: e.type,
};
if (e.gkbId)
obj.gkbId = e.gkbId;
if (e.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(e.feats);
if (e.mentions.length > 0)
obj.mentions = e.mentions.map((m) => this.toRawEntityMention(m));
if (e.gkbProperties.length > 0)
obj.gkbProperties = e.gkbProperties.map((p) => this.toRawGkbProperty(p));
return obj;
}
toRawEntityMention(m) {
const obj = {
id: m.id,
text: m.text,
mwl: m.mwl,
};
if (m.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(m.feats);
if (m.derivedFrom)
obj.derivedFromEntityId = m.derivedFrom.id;
obj.tokenIds = m.tokens ? m.tokens.ids : [];
return obj;
}
toRawTag(t) {
const obj = {
id: t.id,
stdForm: t.stdForm,
type: t.type,
relevance: t.relevance,
};
if (t.gkbId)
obj.gkbId = t.gkbId;
if (t.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(t.feats);
if (t.mentions.length > 0)
obj.mentions = t.mentions.map((m) => this.toRawTagMention(m));
if (t.gkbProperties.length > 0)
obj.gkbProperties = t.gkbProperties.map((p) => this.toRawGkbProperty(p));
return obj;
}
toRawTagMention(m) {
const obj = {
id: m.id,
tokenIds: m.tokens ? m.tokens.ids : [],
};
if (m.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(m.feats);
return obj;
}
toRawRelation(r) {
const obj = {
id: r.id,
textRepr: r.textRepr,
name: r.name,
type: r.type,
};
if (r.args.length > 0)
obj.args = r.args.map((a) => this.toRawRelationArgument(a));
if (r.feats.size > 0)
obj.feats = (0, common_1.serializeMap)(r.feats);
if (r.support.length > 0)
obj.support = r.support.map((s) => this.toRawRelationSupport(s));
return obj;
}
toRawRelationArgument(a) {
const obj = {
name: a.name,
type: a.type,
};
if (a.entity)
obj.entityId = a.entity.id;
return obj;
}
toRawRelationSupport(s) {
const obj = {};
if (s.tokens)
obj.tokenIds = s.tokens.ids;
if (s.tectoToken)
obj.tectoId = s.tectoToken.id;
return obj;
}
toRawSentiment(s) {
return {
mean: s.mean,
label: s.label,
positive: s.positive,
negative: s.negative,
};
}
toRawVectors(vectors) {
return vectors.map((v) => {
return {
name: v.name,
version: v.version,
values: v.values,
};
});
}
toRawGkbProperty(p) {
const obj = {
name: p.name,
label: p.label,
};
if (p.valueGkbId !== null)
obj.valueGkbId = p.valueGkbId;
if (p.boolValue !== null)
obj.boolValue = p.boolValue;
if (p.floatValue !== null)
obj.floatValue = p.floatValue;
if (p.intValue !== null)
obj.intValue = p.intValue;
if (p.strValue !== null)
obj.strValue = p.strValue;
return obj;
}
createRawItemSentiment(ana) {
const id2Sentiment = new Map();
const registerSentiment = (items) => {
items.forEach((i) => {
if (i.sentiment !== null) {
id2Sentiment.set(i.id, this.toRawSentiment(i.sentiment));
}
});
};
registerSentiment(ana.paragraphs);
ana.paragraphs.forEach((p) => registerSentiment(p.sentences));
registerSentiment(ana.entities);
ana.entities.forEach((e) => registerSentiment(e.mentions));
registerSentiment(ana.tags);
ana.tags.forEach((t) => registerSentiment(t.mentions));
registerSentiment(ana.relations);
return id2Sentiment;
}
createRawItemVectors(ana) {
const id2vectors = new Map();
const registerVectors = (items) => {
items.forEach((i) => {
if (i.vectors !== null) {
id2vectors.set(i.id, this.toRawVectors(i.vectors));
}
});
};
registerVectors(ana.paragraphs);
ana.paragraphs.forEach((p) => registerVectors(p.sentences));
registerVectors(ana.entities);
ana.entities.forEach((e) => registerVectors(e.mentions));
registerVectors(ana.tags);
ana.tags.forEach((t) => registerVectors(t.mentions));
registerVectors(ana.relations);
return id2vectors;
}
}
class JS2CpOffsetMapping {
constructor(text, origText) {
const convForText = new offset_converter_1.JS2CpOffsetConverter(text);
const convForOrigText = origText === text
? convForText
: new offset_converter_1.JS2CpOffsetConverter(origText);
this.get = (jsOff) => convForText.convert(jsOff);
this.getOrig = (jsOff) => convForOrigText.convert(jsOff);
}
}
class IdentityOffsetMapping {
get(jsOff) {
return jsOff;
}
getOrig(jsOff) {
return jsOff;
}
}