UNPKG

@wowool/portal

Version:

A library for natural language processing tasks including tokenization, entity recognition, anonymization, semantic chunking, and much more.

621 lines (609 loc) 17.5 kB
// src/utilities/annotation.utility.ts var annotationNumberToType = { 1: "Sentence", 2: "Entity", 3: "Token" }; function isSentence(annotation) { return annotation.type === "Sentence"; } function isEntity(annotation) { return annotation.type === "Entity"; } function isToken(annotation) { return annotation.type === "Token"; } function isRawToken(annotation) { return annotation[0] === 3; } // src/classes/annotation.class.ts var Annotation = class { /** * Creates an Annotation instance. * @param raw The raw annotation array. * @param rawChildren The raw child annotations. */ constructor(raw, rawChildren) { this.raw = raw; this.rawChildren = rawChildren; } /** * The type of this annotation (Sentence, Entity, or Token). */ get type() { return annotationNumberToType[this.raw[0]]; } /** * The character offset where this annotation begins. */ get beginOffset() { return this.raw[1]; } /** * The character offset where this annotation ends. */ get endOffset() { return this.raw[2]; } /** * Converts the annotation to a JSON-serializable object. * @returns An object representation of the annotation. */ toJSON() { return { type: this.type, beginOffset: this.beginOffset, endOffset: this.endOffset }; } /** * Logs the annotation as a formatted JSON string to the console. */ log() { console.log(JSON.stringify(this.toJSON(), null, 2)); } }; // src/classes/token.class.ts function parseMorpheme(rawMorpheme) { const morpheme = { lemma: rawMorpheme[0], partOfSpeech: rawMorpheme[1] }; return morpheme; } var Token = class extends Annotation { /** * Creates a Token instance. * @param rawToken The raw token annotation array. * @throws Error if the token is not valid. */ constructor(rawToken) { super(rawToken, []); if (rawToken.length !== 6) { throw new Error(`Invalid token: ${rawToken}`); } this.literal = rawToken[3]; this.properties = rawToken[4]; this.morphemes = rawToken[5].map(parseMorpheme); } /** * Converts the token to a JSON-serializable object. * @returns An object representation of the token. */ toJSON() { return { ...super.toJSON(), literal: this.literal, properties: this.properties, morphemes: this.morphemes }; } }; // src/utilities/assert.utility.ts function assert(condition, message) { if (!condition) { throw new Error(message); } } // src/classes/entity.class.ts function parseAttribute(entry) { const attribute = { key: entry[0], values: entry[1] }; return attribute; } var Entity = class extends Annotation { /** * Creates an Entity instance. * @param rawEntity The raw entity annotation array. * @param rawAnnotations The list of all raw annotations in the sentence. * @throws Error if the entity is not valid. */ constructor(rawEntity, rawAnnotations) { super(rawEntity, rawAnnotations); if (rawEntity.length !== 5) { throw new Error(`Invalid entity: ${rawEntity}`); } this.uri = rawEntity[3]; this.attributes = Object.entries(rawEntity[4]).map(parseAttribute); this.tokens = rawAnnotations.filter(isRawToken).filter( (rawToken) => rawToken[1] >= this.beginOffset && rawToken[2] <= this.endOffset ).map((rawAnnotation) => new Token(rawAnnotation)); } /** * The text content of the entity, constructed from its tokens. * @returns The concatenated literal values of the entity's tokens. * @throws Error if the entity has no tokens. */ get text() { assert(this.tokens.length > 0, "Entity has no tokens"); return this.tokens.map((token) => token.literal).join(" "); } /** * Converts the entity to a JSON-serializable object. * @returns An object representation of the entity. */ toJSON() { return { ...super.toJSON(), uri: this.uri, attributes: this.attributes }; } }; // src/classes/sentence.class.ts var Sentence = class _Sentence extends Annotation { /** * Creates a Sentence instance. * @param rawSentence The raw sentence annotation array. * @throws Error if the sentence is not valid. */ constructor(rawSentence) { super(rawSentence, []); if (rawSentence.length != 4 && rawSentence.length != 5) throw new Error(`Invalid sentence ${rawSentence}`); const rawSentenceAnnotations = rawSentence[3]; this.annotations = rawSentenceAnnotations.map((rawAnnotation) => { const rawAnnotationType = rawAnnotation[0]; if ("Sentence" === annotationNumberToType[rawAnnotationType]) { return new _Sentence(rawAnnotation); } else if ("Entity" === annotationNumberToType[rawAnnotationType]) { return new Entity(rawAnnotation, rawSentenceAnnotations); } else if ("Token" === annotationNumberToType[rawAnnotationType]) { return new Token(rawAnnotation); } }); } /** * Returns all entity annotations within this sentence. * @returns An array of Entity objects. */ get entities() { return this.annotations.filter(isEntity).map((annotation) => { return new Entity(annotation.raw, this.raw[3]); }); } /** * Returns all token annotations within this sentence. * @returns An array of Token objects. */ get tokens() { return this.annotations.filter(isToken).map((annotation) => { return new Token(annotation.raw); }); } /** * The text content of the sentence, constructed from its tokens. * @returns The concatenated literal values of the sentence's tokens. */ get text() { return this.tokens.map((token) => token.literal).join(" "); } /** * Iterates over each entity in the sentence and invokes the callback. * @param callback The function to call for each entity. */ forEachEntity(callback) { this.entities.forEach(callback); } /** * Iterates over each token in the sentence and invokes the callback. * @param callback The function to call for each token. */ forEachToken(callback) { this.tokens.forEach(callback); } toJSON() { return { ...super.toJSON(), annotations: this.annotations.map( (annotation) => annotation.toJSON() ) }; } }; // src/classes/text-analysis.class.ts var TextAnalysis = class { /** * Creates a TextAnalysis instance from raw analysis data. * @param rawTextAnalysis The raw text analysis object. */ constructor(rawTextAnalysis) { this.rawTextAnalysis = rawTextAnalysis; this._tokens = void 0; this._entities = void 0; let sentences = []; if (rawTextAnalysis.results !== void 0) { sentences = rawTextAnalysis.results.sentences.map( (rawSentence) => new Sentence(rawSentence) ); } this.sentences = sentences; } /** * Returns all tokens in the analysis, across all sentences. * @returns An array of Token objects. */ get tokens() { if (this._tokens === void 0) { this._tokens = this.sentences.flatMap((sentence) => sentence.tokens); } return this._tokens; } /** * Returns all entities in the analysis, across all sentences. * @returns An array of Entity objects. */ get entities() { if (this._entities === void 0) { this._entities = this.sentences.flatMap((sentence) => sentence.entities); } return this._entities; } /** * Iterates over each sentence in the analysis and invokes the callback. * @param callback The function to call for each sentence. */ forEachSentence(callback) { this.sentences.forEach(callback); } /** * Iterates over each token in the analysis and invokes the callback. * @param callback The function to call for each token. */ forEachToken(callback) { let index = 0; for (const sentence of this.sentences) { sentence.forEachToken((token) => { callback(token, index++); }); } } /** * Iterates over each entity in the analysis and invokes the callback. * @param callback The function to call for each entity. */ forEachEntity(callback) { let index = 0; for (const sentence of this.sentences) { sentence.forEachEntity((entity) => { callback(entity, index++); }); } } }; // src/errors.ts var PortalError = class extends Error { constructor(type, message) { super(message); this.type = type; this.message = message; } /** * Serializes the error to a JSON object. */ toJSON() { return { type: this.type, message: this.message }; } /** * Logs the error to the console in JSON format. */ log() { console.error(this.toJSON()); } }; var PortalClientError = class extends PortalError { constructor(type, message, details) { super(type, message); this.type = type; this.message = message; this.details = details; } /** * Serializes the client error to a JSON object, including details. */ toJSON() { return { ...super.toJSON(), details: this.details }; } }; var PortalApiError = class extends PortalError { constructor(type, message, statusCode, details) { super(type, message); this.type = type; this.message = message; this.statusCode = statusCode; this.details = details; } /** * Serializes the API error to a JSON object, including status code and details. */ toJSON() { return { ...super.toJSON(), statusCode: this.statusCode, details: this.details }; } }; // src/classes/analysis-document.class.ts var AnalysisDocument = class { /** * Constructs an AnalysisDocument from a raw analysis document. * @param _rawAnalysisDocument The raw analysis document to wrap and validate. * @throws {PortalClientError} If the MIME type is not 'application/vnd.wowool.document-analysis+json'. */ constructor(_rawAnalysisDocument) { this._rawAnalysisDocument = _rawAnalysisDocument; if (_rawAnalysisDocument.mimeType !== "application/vnd.wowool.document-analysis+json") { console.error("Invalid AnalysisDocument:", _rawAnalysisDocument); throw new PortalClientError( "AnalysisDocumentParsingError", `Invalid AnalysisDocument: MIME type ${_rawAnalysisDocument.mimeType}, expected application/vnd.wowool.document-analysis+json` ); } const documentAnalysis = _rawAnalysisDocument.data; const rawTextAnalysis = documentAnalysis.wowool_analysis; this._analysis = rawTextAnalysis === void 0 ? void 0 : new TextAnalysis(rawTextAnalysis); } /** * The unique identifier of the analysis document. * @returns The document ID string. */ get id() { return this._rawAnalysisDocument.id; } /** * The MIME type of the analysis document. * @returns The MIME type string. */ get mimeType() { return this._rawAnalysisDocument.mimeType; } /** * The encoding of the analysis document. */ get encoding() { return this._rawAnalysisDocument.encoding; } /** * The raw data payload of the analysis document. */ get data() { return this._rawAnalysisDocument.data; } /** * The metadata associated with the analysis document. */ get metadata() { return this._rawAnalysisDocument.metadata; } /** * The text analysis results, if present. * @throws {PortalClientError} If no analysis results are available in the document. */ get analysis() { if (!this._analysis) { throw new PortalClientError( "AnalysisNotFoundError", "This analysis document does not contain any analysis results." ); } return this._analysis; } }; // src/services/pipeline.service.ts import { v4 as uuidv4 } from "uuid"; var Pipeline = class { /** * Internal constructor implementation. * @param steps The steps to process the document. * @param portalOrApiKey Either a Portal instance or an API key string. */ constructor(steps, portalOrOpts) { this.steps = steps; if (portalOrOpts === void 0) { this.portal = new Portal(); } else if (portalOrOpts instanceof Portal) { this.portal = portalOrOpts; } else { this.portal = new Portal({ apiKey: portalOrOpts }); } } /** * Processes a single document through the pipeline. * @param document The input document or a string to be wrapped as a document. * @returns The processed AnalysisDocument. */ async process(document) { if (typeof document === "string") { document = { id: uuidv4(), mimeType: "text/plain", encoding: "utf-8", data: document, metadata: {} }; } const response = await fetch(`${this.portal.baseUrl}/pipelines/process`, { method: "POST", headers: this.portal.headers, body: JSON.stringify({ document, pipeline: this.steps }) }); await this.portal.validateResponse(response); const rawAnalysisDocument = await response.json(); return new AnalysisDocument(rawAnalysisDocument); } /** * Processes multiple documents through the pipeline in a batch. * @param documents An array of input documents or strings. * @returns An array of processed AnalysisDocuments. */ async processBatch(documents) { const inputDocuments = documents.map((doc) => { if (typeof doc === "string") { return { id: uuidv4(), mimeType: "text/plain", encoding: "utf-8", data: doc, metadata: {} }; } else return doc; }); const response = await fetch( `${this.portal.baseUrl}/pipelines/process/batch`, { method: "POST", headers: this.portal.headers, body: JSON.stringify({ documents: inputDocuments, pipeline: this.steps }) } ); await this.portal.validateResponse(response); const rawAnalysisDocuments = await response.json(); return rawAnalysisDocuments.map( (rawAnalysisDocument) => { return new AnalysisDocument(rawAnalysisDocument); } ); } }; // src/utilities/env.utility.ts function getEnv(name, defaultValue = "") { if (typeof process !== "undefined" && typeof process.env !== "undefined") { const value = process.env[name]; return value !== void 0 ? value : defaultValue; } return defaultValue; } // src/services/portal.service.ts var Portal = class { /** * Constructs a new Portal service instance. * @param apiKey Optional API key for authentication. If not provided, reads from environment. * @param hostUrl Optional base URL for the API. If not provided, reads from environment or defaults. * @throws {PortalClientError} If the API key is missing. */ constructor(opts = {}) { this.apiKey = opts.apiKey || getEnv("WOWOOL_PORTAL_KEY", ""); if (!this.apiKey) { throw new PortalClientError( "MissingApiKeyError", "Missing API key. Pass it to the constructor or set the WOWOOL_PORTAL_KEY environment variable." ); } const rawBaseUrl = opts.hostUrl || getEnv("WOWOOL_PORTAL_HOST", "https://api.wowool.com"); this.baseUrl = (rawBaseUrl.endsWith("/") ? rawBaseUrl.slice(0, -1) : rawBaseUrl) + "/v1"; } /** * Pings the Wowool Portal API to check connectivity. * @returns {Promise<PingResponse>} The ping response from the API. * @throws {PortalApiError} If the API returns an error response. */ async ping() { const response = await fetch(`${this.baseUrl}/ping`, { method: "GET", headers: this.headers }); await this.validateResponse(response); const pingResponse = await response.json(); return pingResponse; } /** * Creates a new pipeline instance with the specified steps. * @param steps The pipeline steps to initialize. * @returns {Pipeline} The created pipeline instance. */ createPipeline(steps) { return new Pipeline(steps, this); } /** * Returns the default headers for API requests. */ get headers() { return { "Content-Type": "application/json", "X-Client-Agent": "wowool-portal-typescript", "X-Client-Version": "1.1.1", "X-API-Key": this.apiKey }; } /** * Validates the API response, throwing an error if not successful. * @param response The fetch API response object. * @throws {PortalApiError} If the response is not OK. */ async validateResponse(response) { if (response.ok) return; await this.throwApiError(response); } /** * Throws a PortalApiError based on the API error response. * @param response The fetch API response object. * @throws {PortalApiError} Always throws with parsed error details. */ async throwApiError(response) { let error = {}; try { error = await response.json(); } catch (_) { throw new Error( `Failed to parse error response with status ${response.status}` ); } const errorType = error.type || "UnknownError"; const errorMessage = error.message || "Unknown error"; const errorStatusCode = response.status || 500; const errorDetails = error.details || void 0; throw new PortalApiError( errorType, errorMessage, errorStatusCode, errorDetails ); } }; export { AnalysisDocument, Annotation, Entity, Pipeline, Portal, Sentence, Token, isEntity, isSentence, isToken };