@wowool/portal
Version:
A library for natural language processing tasks including tokenization, entity recognition, anonymization, semantic chunking, and much more.
621 lines (609 loc) • 17.5 kB
JavaScript
// src/utilities/annotation.utility.ts
var annotationNumberToType = {
1: "Sentence",
2: "Entity",
3: "Token"
};
function isSentence(annotation) {
return annotation.type === "Sentence";
}
function isEntity(annotation) {
return annotation.type === "Entity";
}
function isToken(annotation) {
return annotation.type === "Token";
}
function isRawToken(annotation) {
return annotation[0] === 3;
}
// src/classes/annotation.class.ts
var Annotation = class {
/**
* Creates an Annotation instance.
* @param raw The raw annotation array.
* @param rawChildren The raw child annotations.
*/
constructor(raw, rawChildren) {
this.raw = raw;
this.rawChildren = rawChildren;
}
/**
* The type of this annotation (Sentence, Entity, or Token).
*/
get type() {
return annotationNumberToType[this.raw[0]];
}
/**
* The character offset where this annotation begins.
*/
get beginOffset() {
return this.raw[1];
}
/**
* The character offset where this annotation ends.
*/
get endOffset() {
return this.raw[2];
}
/**
* Converts the annotation to a JSON-serializable object.
* @returns An object representation of the annotation.
*/
toJSON() {
return {
type: this.type,
beginOffset: this.beginOffset,
endOffset: this.endOffset
};
}
/**
* Logs the annotation as a formatted JSON string to the console.
*/
log() {
console.log(JSON.stringify(this.toJSON(), null, 2));
}
};
// src/classes/token.class.ts
function parseMorpheme(rawMorpheme) {
const morpheme = {
lemma: rawMorpheme[0],
partOfSpeech: rawMorpheme[1]
};
return morpheme;
}
var Token = class extends Annotation {
/**
* Creates a Token instance.
* @param rawToken The raw token annotation array.
* @throws Error if the token is not valid.
*/
constructor(rawToken) {
super(rawToken, []);
if (rawToken.length !== 6) {
throw new Error(`Invalid token: ${rawToken}`);
}
this.literal = rawToken[3];
this.properties = rawToken[4];
this.morphemes = rawToken[5].map(parseMorpheme);
}
/**
* Converts the token to a JSON-serializable object.
* @returns An object representation of the token.
*/
toJSON() {
return {
...super.toJSON(),
literal: this.literal,
properties: this.properties,
morphemes: this.morphemes
};
}
};
// src/utilities/assert.utility.ts
function assert(condition, message) {
if (!condition) {
throw new Error(message);
}
}
// src/classes/entity.class.ts
function parseAttribute(entry) {
const attribute = {
key: entry[0],
values: entry[1]
};
return attribute;
}
var Entity = class extends Annotation {
/**
* Creates an Entity instance.
* @param rawEntity The raw entity annotation array.
* @param rawAnnotations The list of all raw annotations in the sentence.
* @throws Error if the entity is not valid.
*/
constructor(rawEntity, rawAnnotations) {
super(rawEntity, rawAnnotations);
if (rawEntity.length !== 5) {
throw new Error(`Invalid entity: ${rawEntity}`);
}
this.uri = rawEntity[3];
this.attributes = Object.entries(rawEntity[4]).map(parseAttribute);
this.tokens = rawAnnotations.filter(isRawToken).filter(
(rawToken) => rawToken[1] >= this.beginOffset && rawToken[2] <= this.endOffset
).map((rawAnnotation) => new Token(rawAnnotation));
}
/**
* The text content of the entity, constructed from its tokens.
* @returns The concatenated literal values of the entity's tokens.
* @throws Error if the entity has no tokens.
*/
get text() {
assert(this.tokens.length > 0, "Entity has no tokens");
return this.tokens.map((token) => token.literal).join(" ");
}
/**
* Converts the entity to a JSON-serializable object.
* @returns An object representation of the entity.
*/
toJSON() {
return {
...super.toJSON(),
uri: this.uri,
attributes: this.attributes
};
}
};
// src/classes/sentence.class.ts
var Sentence = class _Sentence extends Annotation {
/**
* Creates a Sentence instance.
* @param rawSentence The raw sentence annotation array.
* @throws Error if the sentence is not valid.
*/
constructor(rawSentence) {
super(rawSentence, []);
if (rawSentence.length != 4 && rawSentence.length != 5)
throw new Error(`Invalid sentence ${rawSentence}`);
const rawSentenceAnnotations = rawSentence[3];
this.annotations = rawSentenceAnnotations.map((rawAnnotation) => {
const rawAnnotationType = rawAnnotation[0];
if ("Sentence" === annotationNumberToType[rawAnnotationType]) {
return new _Sentence(rawAnnotation);
} else if ("Entity" === annotationNumberToType[rawAnnotationType]) {
return new Entity(rawAnnotation, rawSentenceAnnotations);
} else if ("Token" === annotationNumberToType[rawAnnotationType]) {
return new Token(rawAnnotation);
}
});
}
/**
* Returns all entity annotations within this sentence.
* @returns An array of Entity objects.
*/
get entities() {
return this.annotations.filter(isEntity).map((annotation) => {
return new Entity(annotation.raw, this.raw[3]);
});
}
/**
* Returns all token annotations within this sentence.
* @returns An array of Token objects.
*/
get tokens() {
return this.annotations.filter(isToken).map((annotation) => {
return new Token(annotation.raw);
});
}
/**
* The text content of the sentence, constructed from its tokens.
* @returns The concatenated literal values of the sentence's tokens.
*/
get text() {
return this.tokens.map((token) => token.literal).join(" ");
}
/**
* Iterates over each entity in the sentence and invokes the callback.
* @param callback The function to call for each entity.
*/
forEachEntity(callback) {
this.entities.forEach(callback);
}
/**
* Iterates over each token in the sentence and invokes the callback.
* @param callback The function to call for each token.
*/
forEachToken(callback) {
this.tokens.forEach(callback);
}
toJSON() {
return {
...super.toJSON(),
annotations: this.annotations.map(
(annotation) => annotation.toJSON()
)
};
}
};
// src/classes/text-analysis.class.ts
var TextAnalysis = class {
/**
* Creates a TextAnalysis instance from raw analysis data.
* @param rawTextAnalysis The raw text analysis object.
*/
constructor(rawTextAnalysis) {
this.rawTextAnalysis = rawTextAnalysis;
this._tokens = void 0;
this._entities = void 0;
let sentences = [];
if (rawTextAnalysis.results !== void 0) {
sentences = rawTextAnalysis.results.sentences.map(
(rawSentence) => new Sentence(rawSentence)
);
}
this.sentences = sentences;
}
/**
* Returns all tokens in the analysis, across all sentences.
* @returns An array of Token objects.
*/
get tokens() {
if (this._tokens === void 0) {
this._tokens = this.sentences.flatMap((sentence) => sentence.tokens);
}
return this._tokens;
}
/**
* Returns all entities in the analysis, across all sentences.
* @returns An array of Entity objects.
*/
get entities() {
if (this._entities === void 0) {
this._entities = this.sentences.flatMap((sentence) => sentence.entities);
}
return this._entities;
}
/**
* Iterates over each sentence in the analysis and invokes the callback.
* @param callback The function to call for each sentence.
*/
forEachSentence(callback) {
this.sentences.forEach(callback);
}
/**
* Iterates over each token in the analysis and invokes the callback.
* @param callback The function to call for each token.
*/
forEachToken(callback) {
let index = 0;
for (const sentence of this.sentences) {
sentence.forEachToken((token) => {
callback(token, index++);
});
}
}
/**
* Iterates over each entity in the analysis and invokes the callback.
* @param callback The function to call for each entity.
*/
forEachEntity(callback) {
let index = 0;
for (const sentence of this.sentences) {
sentence.forEachEntity((entity) => {
callback(entity, index++);
});
}
}
};
// src/errors.ts
var PortalError = class extends Error {
constructor(type, message) {
super(message);
this.type = type;
this.message = message;
}
/**
* Serializes the error to a JSON object.
*/
toJSON() {
return {
type: this.type,
message: this.message
};
}
/**
* Logs the error to the console in JSON format.
*/
log() {
console.error(this.toJSON());
}
};
var PortalClientError = class extends PortalError {
constructor(type, message, details) {
super(type, message);
this.type = type;
this.message = message;
this.details = details;
}
/**
* Serializes the client error to a JSON object, including details.
*/
toJSON() {
return {
...super.toJSON(),
details: this.details
};
}
};
var PortalApiError = class extends PortalError {
constructor(type, message, statusCode, details) {
super(type, message);
this.type = type;
this.message = message;
this.statusCode = statusCode;
this.details = details;
}
/**
* Serializes the API error to a JSON object, including status code and details.
*/
toJSON() {
return {
...super.toJSON(),
statusCode: this.statusCode,
details: this.details
};
}
};
// src/classes/analysis-document.class.ts
var AnalysisDocument = class {
/**
* Constructs an AnalysisDocument from a raw analysis document.
* @param _rawAnalysisDocument The raw analysis document to wrap and validate.
* @throws {PortalClientError} If the MIME type is not 'application/vnd.wowool.document-analysis+json'.
*/
constructor(_rawAnalysisDocument) {
this._rawAnalysisDocument = _rawAnalysisDocument;
if (_rawAnalysisDocument.mimeType !== "application/vnd.wowool.document-analysis+json") {
console.error("Invalid AnalysisDocument:", _rawAnalysisDocument);
throw new PortalClientError(
"AnalysisDocumentParsingError",
`Invalid AnalysisDocument: MIME type ${_rawAnalysisDocument.mimeType}, expected application/vnd.wowool.document-analysis+json`
);
}
const documentAnalysis = _rawAnalysisDocument.data;
const rawTextAnalysis = documentAnalysis.wowool_analysis;
this._analysis = rawTextAnalysis === void 0 ? void 0 : new TextAnalysis(rawTextAnalysis);
}
/**
* The unique identifier of the analysis document.
* @returns The document ID string.
*/
get id() {
return this._rawAnalysisDocument.id;
}
/**
* The MIME type of the analysis document.
* @returns The MIME type string.
*/
get mimeType() {
return this._rawAnalysisDocument.mimeType;
}
/**
* The encoding of the analysis document.
*/
get encoding() {
return this._rawAnalysisDocument.encoding;
}
/**
* The raw data payload of the analysis document.
*/
get data() {
return this._rawAnalysisDocument.data;
}
/**
* The metadata associated with the analysis document.
*/
get metadata() {
return this._rawAnalysisDocument.metadata;
}
/**
* The text analysis results, if present.
* @throws {PortalClientError} If no analysis results are available in the document.
*/
get analysis() {
if (!this._analysis) {
throw new PortalClientError(
"AnalysisNotFoundError",
"This analysis document does not contain any analysis results."
);
}
return this._analysis;
}
};
// src/services/pipeline.service.ts
import { v4 as uuidv4 } from "uuid";
var Pipeline = class {
/**
* Internal constructor implementation.
* @param steps The steps to process the document.
* @param portalOrApiKey Either a Portal instance or an API key string.
*/
constructor(steps, portalOrOpts) {
this.steps = steps;
if (portalOrOpts === void 0) {
this.portal = new Portal();
} else if (portalOrOpts instanceof Portal) {
this.portal = portalOrOpts;
} else {
this.portal = new Portal({ apiKey: portalOrOpts });
}
}
/**
* Processes a single document through the pipeline.
* @param document The input document or a string to be wrapped as a document.
* @returns The processed AnalysisDocument.
*/
async process(document) {
if (typeof document === "string") {
document = {
id: uuidv4(),
mimeType: "text/plain",
encoding: "utf-8",
data: document,
metadata: {}
};
}
const response = await fetch(`${this.portal.baseUrl}/pipelines/process`, {
method: "POST",
headers: this.portal.headers,
body: JSON.stringify({
document,
pipeline: this.steps
})
});
await this.portal.validateResponse(response);
const rawAnalysisDocument = await response.json();
return new AnalysisDocument(rawAnalysisDocument);
}
/**
* Processes multiple documents through the pipeline in a batch.
* @param documents An array of input documents or strings.
* @returns An array of processed AnalysisDocuments.
*/
async processBatch(documents) {
const inputDocuments = documents.map((doc) => {
if (typeof doc === "string") {
return {
id: uuidv4(),
mimeType: "text/plain",
encoding: "utf-8",
data: doc,
metadata: {}
};
} else return doc;
});
const response = await fetch(
`${this.portal.baseUrl}/pipelines/process/batch`,
{
method: "POST",
headers: this.portal.headers,
body: JSON.stringify({
documents: inputDocuments,
pipeline: this.steps
})
}
);
await this.portal.validateResponse(response);
const rawAnalysisDocuments = await response.json();
return rawAnalysisDocuments.map(
(rawAnalysisDocument) => {
return new AnalysisDocument(rawAnalysisDocument);
}
);
}
};
// src/utilities/env.utility.ts
function getEnv(name, defaultValue = "") {
if (typeof process !== "undefined" && typeof process.env !== "undefined") {
const value = process.env[name];
return value !== void 0 ? value : defaultValue;
}
return defaultValue;
}
// src/services/portal.service.ts
var Portal = class {
/**
* Constructs a new Portal service instance.
* @param apiKey Optional API key for authentication. If not provided, reads from environment.
* @param hostUrl Optional base URL for the API. If not provided, reads from environment or defaults.
* @throws {PortalClientError} If the API key is missing.
*/
constructor(opts = {}) {
this.apiKey = opts.apiKey || getEnv("WOWOOL_PORTAL_KEY", "");
if (!this.apiKey) {
throw new PortalClientError(
"MissingApiKeyError",
"Missing API key. Pass it to the constructor or set the WOWOOL_PORTAL_KEY environment variable."
);
}
const rawBaseUrl = opts.hostUrl || getEnv("WOWOOL_PORTAL_HOST", "https://api.wowool.com");
this.baseUrl = (rawBaseUrl.endsWith("/") ? rawBaseUrl.slice(0, -1) : rawBaseUrl) + "/v1";
}
/**
* Pings the Wowool Portal API to check connectivity.
* @returns {Promise<PingResponse>} The ping response from the API.
* @throws {PortalApiError} If the API returns an error response.
*/
async ping() {
const response = await fetch(`${this.baseUrl}/ping`, {
method: "GET",
headers: this.headers
});
await this.validateResponse(response);
const pingResponse = await response.json();
return pingResponse;
}
/**
* Creates a new pipeline instance with the specified steps.
* @param steps The pipeline steps to initialize.
* @returns {Pipeline} The created pipeline instance.
*/
createPipeline(steps) {
return new Pipeline(steps, this);
}
/**
* Returns the default headers for API requests.
*/
get headers() {
return {
"Content-Type": "application/json",
"X-Client-Agent": "wowool-portal-typescript",
"X-Client-Version": "1.1.1",
"X-API-Key": this.apiKey
};
}
/**
* Validates the API response, throwing an error if not successful.
* @param response The fetch API response object.
* @throws {PortalApiError} If the response is not OK.
*/
async validateResponse(response) {
if (response.ok) return;
await this.throwApiError(response);
}
/**
* Throws a PortalApiError based on the API error response.
* @param response The fetch API response object.
* @throws {PortalApiError} Always throws with parsed error details.
*/
async throwApiError(response) {
let error = {};
try {
error = await response.json();
} catch (_) {
throw new Error(
`Failed to parse error response with status ${response.status}`
);
}
const errorType = error.type || "UnknownError";
const errorMessage = error.message || "Unknown error";
const errorStatusCode = response.status || 500;
const errorDetails = error.details || void 0;
throw new PortalApiError(
errorType,
errorMessage,
errorStatusCode,
errorDetails
);
}
};
export {
AnalysisDocument,
Annotation,
Entity,
Pipeline,
Portal,
Sentence,
Token,
isEntity,
isSentence,
isToken
};