UNPKG

@wowool/portal

Version:

A library for natural language processing tasks including tokenization, entity recognition, anonymization, semantic chunking, and much more.

444 lines (432 loc) 11.7 kB
// src/utilities/annotation.utility.ts var annotationNumberToType = { 1: "Sentence", 2: "Entity", 3: "Token" }; function isSentence(annotation) { return annotation.type === "Sentence"; } function isEntity(annotation) { return annotation.type === "Entity"; } function isToken(annotation) { return annotation.type === "Token"; } function isRawToken(annotation) { return annotation[0] === 3; } // src/classes/annotation.class.ts var Annotation = class { constructor(raw, rawChildren) { this.raw = raw; this.rawChildren = rawChildren; } get type() { return annotationNumberToType[this.raw[0]]; } get beginOffset() { return this.raw[1]; } get endOffset() { return this.raw[2]; } toJSON() { return { type: this.type, beginOffset: this.beginOffset, endOffset: this.endOffset }; } log() { console.log(JSON.stringify(this.toJSON(), null, 2)); } }; // src/classes/token.class.ts function parseMorpheme(rawMorpheme) { const morpheme = { lemma: rawMorpheme[0], partOfSpeech: rawMorpheme[1] }; return morpheme; } var Token = class extends Annotation { constructor(rawToken) { super(rawToken, []); if (rawToken.length !== 6) { throw new Error(`Invalid token: ${rawToken}`); } this.literal = rawToken[3]; this.properties = rawToken[4]; this.morphemes = rawToken[5].map(parseMorpheme); } toJSON() { return { ...super.toJSON(), literal: this.literal, properties: this.properties, morphemes: this.morphemes }; } }; // src/utilities/assert.utility.ts function assert(condition, message) { if (!condition) { throw new Error(message); } } // src/classes/entity.class.ts function parseAttribute(entry) { const attribute = { key: entry[0], values: entry[1] }; return attribute; } var Entity = class extends Annotation { constructor(rawEntity, rawAnnotations) { super(rawEntity, rawAnnotations); if (rawEntity.length !== 5) { throw new Error(`Invalid entity: ${rawEntity}`); } this.uri = rawEntity[3]; this.attributes = Object.entries(rawEntity[4]).map(parseAttribute); this.tokens = rawAnnotations.filter(isRawToken).filter( (rawToken) => rawToken[1] >= this.beginOffset && rawToken[2] <= this.endOffset ).map((rawAnnotation) => new Token(rawAnnotation)); } get text() { assert(this.tokens.length > 0, "Entity has no tokens"); return this.tokens.map((token) => token.literal).join(" "); } toJSON() { return { ...super.toJSON(), uri: this.uri, attributes: this.attributes }; } }; // src/classes/sentence.class.ts var Sentence = class _Sentence extends Annotation { constructor(rawSentence) { super(rawSentence, []); if (rawSentence.length != 4 && rawSentence.length != 5) throw new Error(`Invalid sentence ${rawSentence}`); const rawSentenceAnnotations = rawSentence[3]; this.annotations = rawSentenceAnnotations.map((rawAnnotation) => { const rawAnnotationType = rawAnnotation[0]; if ("Sentence" === annotationNumberToType[rawAnnotationType]) { return new _Sentence(rawAnnotation); } else if ("Entity" === annotationNumberToType[rawAnnotationType]) { return new Entity(rawAnnotation, rawSentenceAnnotations); } else if ("Token" === annotationNumberToType[rawAnnotationType]) { return new Token(rawAnnotation); } }); } get entities() { return this.annotations.filter(isEntity).map((annotation) => { return new Entity(annotation.raw, this.raw[3]); }); } get tokens() { return this.annotations.filter(isToken).map((annotation) => { return new Token(annotation.raw); }); } get text() { return this.tokens.map((token) => token.literal).join(" "); } forEachEntity(callback) { this.entities.forEach(callback); } forEachToken(callback) { this.tokens.forEach(callback); } toJSON() { return { ...super.toJSON(), annotations: this.annotations.map( (annotation) => annotation.toJSON() ) }; } }; // src/classes/text-analysis.class.ts var TextAnalysis = class { constructor(rawTextAnalysis) { this.rawTextAnalysis = rawTextAnalysis; this._tokens = void 0; this._entities = void 0; let sentences = []; if (rawTextAnalysis.results !== void 0) { sentences = rawTextAnalysis.results.sentences.map( (rawSentence) => new Sentence(rawSentence) ); } this.sentences = sentences; } get tokens() { if (this._tokens === void 0) { this._tokens = this.sentences.flatMap((sentence) => sentence.tokens); } return this._tokens; } get entities() { if (this._entities === void 0) { this._entities = this.sentences.flatMap((sentence) => sentence.entities); } return this._entities; } forEachSentence(callback) { this.sentences.forEach(callback); } forEachToken(callback) { let index = 0; for (const sentence of this.sentences) { sentence.forEachToken((token) => { callback(token, index++); }); } } forEachEntity(callback) { let index = 0; for (const sentence of this.sentences) { sentence.forEachEntity((entity) => { callback(entity, index++); }); } } }; // src/errors.ts var PortalError = class extends Error { constructor(type, message) { super(message); this.type = type; this.message = message; } toJSON() { return { type: this.type, message: this.message }; } log() { console.error(this.toJSON()); } }; var PortalClientError = class extends PortalError { constructor(type, message, details) { super(type, message); this.type = type; this.message = message; this.details = details; } toJSON() { return { ...super.toJSON(), details: this.details }; } }; var PortalApiError = class extends PortalError { constructor(type, message, statusCode, details) { super(type, message); this.type = type; this.message = message; this.statusCode = statusCode; this.details = details; } toJSON() { return { ...super.toJSON(), statusCode: this.statusCode, details: this.details }; } }; // src/classes/analysis-document.class.ts var AnalysisDocument = class { constructor(_rawAnalysisDocument) { this._rawAnalysisDocument = _rawAnalysisDocument; if (_rawAnalysisDocument.mimeType !== "application/vnd.wowool.document-analysis+json") { throw new PortalClientError( "AnalysisDocumentParsingError", `Invalid Wowool analysis document: document has MIME type ${_rawAnalysisDocument.mimeType}, expected application/vnd.wowool.document-analysis+json` ); } const documentAnalysis = _rawAnalysisDocument.data; const rawTextAnalysis = documentAnalysis.wowool_analysis; this._analysis = rawTextAnalysis === void 0 ? void 0 : new TextAnalysis(rawTextAnalysis); } get id() { return this._rawAnalysisDocument.id; } get mimeType() { return this._rawAnalysisDocument.mimeType; } get encoding() { return this._rawAnalysisDocument.encoding; } get data() { return this._rawAnalysisDocument.data; } get metadata() { return this._rawAnalysisDocument.metadata; } get analysis() { if (!this._analysis) { throw new PortalClientError( "AnalysisNotFoundError", "This analysis document does not contain any analysis results." ); } return this._analysis; } }; // src/services/pipeline.service.ts import { v4 as uuidv4 } from "uuid"; var Pipeline = class { constructor(steps, portalOrApiKey) { this.steps = steps; if (portalOrApiKey === void 0) { this.portal = new Portal(); } else if (portalOrApiKey instanceof Portal) { this.portal = portalOrApiKey; } else { this.portal = new Portal(portalOrApiKey); } } async process(document) { if (typeof document === "string") { document = { id: uuidv4(), mimeType: "text/plain", encoding: "utf-8", data: document, metadata: {} }; } const response = await fetch(`${this.portal.baseUrl}/pipelines/process`, { method: "POST", headers: this.portal.headers, body: JSON.stringify({ document, pipeline: this.steps }) }); await this.portal.validateResponse(response); const rawAnalysisDocument = await response.json(); return new AnalysisDocument(rawAnalysisDocument); } async processBatch(documents) { const inputDocuments = documents.map((doc) => { if (typeof doc === "string") { return { id: uuidv4(), mimeType: "text/plain", encoding: "utf-8", data: doc, metadata: {} }; } else return doc; }); const response = await fetch( `${this.portal.baseUrl}/pipelines/process/batch`, { method: "POST", headers: this.portal.headers, body: JSON.stringify({ documents: inputDocuments, pipeline: this.steps }) } ); await this.portal.validateResponse(response); const rawAnalysisDocuments = await response.json(); return rawAnalysisDocuments.map( (rawAnalysisDocument) => { return new AnalysisDocument(rawAnalysisDocument); } ); } }; // src/utilities/env.utility.ts function getEnv(name, defaultValue = "") { if (typeof process !== "undefined" && typeof process.env !== "undefined") { const value = process.env[name]; return value !== void 0 ? value : defaultValue; } return defaultValue; } // src/services/portal.service.ts var Portal = class { constructor(apiKey, hostUrl) { this.apiKey = apiKey != null ? apiKey : getEnv("WOWOOL_PORTAL_API_KEY", ""); if (!this.apiKey) { throw new PortalClientError( "MissingApiKeyError", "Missing API key. Pass it to the constructor or set the WOWOOL_PORTAL_API_KEY environment variable." ); } const rawBaseUrl = hostUrl != null ? hostUrl : getEnv("WOWOOL_PORTAL_HOST", "https://api.wowool.com"); this.baseUrl = (rawBaseUrl.endsWith("/") ? rawBaseUrl.slice(0, -1) : rawBaseUrl) + "/v1"; } async ping() { const response = await fetch(`${this.baseUrl}/ping`, { method: "GET", headers: this.headers }); await this.validateResponse(response); const pingResponse = await response.json(); return pingResponse; } createPipeline(steps) { return new Pipeline(steps, this); } get headers() { return { "Content-Type": "application/json", "X-Client-Agent": "wowool-portal-typescript", "X-Client-Version": "1.1.1", "X-API-Key": this.apiKey }; } async validateResponse(response) { if (response.ok) return; await this.throwApiError(response); } async throwApiError(response) { let error = {}; try { error = await response.json(); } catch (_) { throw new Error( `Failed to parse error response with status ${response.status}` ); } const errorType = error.type || "UnknownError"; const errorMessage = error.message || "Unknown error"; const errorStatusCode = response.status || 500; const errorDetails = error.details || void 0; throw new PortalApiError( errorType, errorMessage, errorStatusCode, errorDetails ); } }; export { AnalysisDocument, Annotation, Entity, Pipeline, Portal, Sentence, Token, isEntity, isSentence, isToken };