@wowool/portal
Version:
A library for natural language processing tasks including tokenization, entity recognition, anonymization, semantic chunking, and much more.
444 lines (432 loc) • 11.7 kB
JavaScript
// src/utilities/annotation.utility.ts
var annotationNumberToType = {
1: "Sentence",
2: "Entity",
3: "Token"
};
function isSentence(annotation) {
return annotation.type === "Sentence";
}
function isEntity(annotation) {
return annotation.type === "Entity";
}
function isToken(annotation) {
return annotation.type === "Token";
}
function isRawToken(annotation) {
return annotation[0] === 3;
}
// src/classes/annotation.class.ts
var Annotation = class {
constructor(raw, rawChildren) {
this.raw = raw;
this.rawChildren = rawChildren;
}
get type() {
return annotationNumberToType[this.raw[0]];
}
get beginOffset() {
return this.raw[1];
}
get endOffset() {
return this.raw[2];
}
toJSON() {
return {
type: this.type,
beginOffset: this.beginOffset,
endOffset: this.endOffset
};
}
log() {
console.log(JSON.stringify(this.toJSON(), null, 2));
}
};
// src/classes/token.class.ts
function parseMorpheme(rawMorpheme) {
const morpheme = {
lemma: rawMorpheme[0],
partOfSpeech: rawMorpheme[1]
};
return morpheme;
}
var Token = class extends Annotation {
constructor(rawToken) {
super(rawToken, []);
if (rawToken.length !== 6) {
throw new Error(`Invalid token: ${rawToken}`);
}
this.literal = rawToken[3];
this.properties = rawToken[4];
this.morphemes = rawToken[5].map(parseMorpheme);
}
toJSON() {
return {
...super.toJSON(),
literal: this.literal,
properties: this.properties,
morphemes: this.morphemes
};
}
};
// src/utilities/assert.utility.ts
function assert(condition, message) {
if (!condition) {
throw new Error(message);
}
}
// src/classes/entity.class.ts
function parseAttribute(entry) {
const attribute = {
key: entry[0],
values: entry[1]
};
return attribute;
}
var Entity = class extends Annotation {
constructor(rawEntity, rawAnnotations) {
super(rawEntity, rawAnnotations);
if (rawEntity.length !== 5) {
throw new Error(`Invalid entity: ${rawEntity}`);
}
this.uri = rawEntity[3];
this.attributes = Object.entries(rawEntity[4]).map(parseAttribute);
this.tokens = rawAnnotations.filter(isRawToken).filter(
(rawToken) => rawToken[1] >= this.beginOffset && rawToken[2] <= this.endOffset
).map((rawAnnotation) => new Token(rawAnnotation));
}
get text() {
assert(this.tokens.length > 0, "Entity has no tokens");
return this.tokens.map((token) => token.literal).join(" ");
}
toJSON() {
return {
...super.toJSON(),
uri: this.uri,
attributes: this.attributes
};
}
};
// src/classes/sentence.class.ts
var Sentence = class _Sentence extends Annotation {
constructor(rawSentence) {
super(rawSentence, []);
if (rawSentence.length != 4 && rawSentence.length != 5)
throw new Error(`Invalid sentence ${rawSentence}`);
const rawSentenceAnnotations = rawSentence[3];
this.annotations = rawSentenceAnnotations.map((rawAnnotation) => {
const rawAnnotationType = rawAnnotation[0];
if ("Sentence" === annotationNumberToType[rawAnnotationType]) {
return new _Sentence(rawAnnotation);
} else if ("Entity" === annotationNumberToType[rawAnnotationType]) {
return new Entity(rawAnnotation, rawSentenceAnnotations);
} else if ("Token" === annotationNumberToType[rawAnnotationType]) {
return new Token(rawAnnotation);
}
});
}
get entities() {
return this.annotations.filter(isEntity).map((annotation) => {
return new Entity(annotation.raw, this.raw[3]);
});
}
get tokens() {
return this.annotations.filter(isToken).map((annotation) => {
return new Token(annotation.raw);
});
}
get text() {
return this.tokens.map((token) => token.literal).join(" ");
}
forEachEntity(callback) {
this.entities.forEach(callback);
}
forEachToken(callback) {
this.tokens.forEach(callback);
}
toJSON() {
return {
...super.toJSON(),
annotations: this.annotations.map(
(annotation) => annotation.toJSON()
)
};
}
};
// src/classes/text-analysis.class.ts
var TextAnalysis = class {
constructor(rawTextAnalysis) {
this.rawTextAnalysis = rawTextAnalysis;
this._tokens = void 0;
this._entities = void 0;
let sentences = [];
if (rawTextAnalysis.results !== void 0) {
sentences = rawTextAnalysis.results.sentences.map(
(rawSentence) => new Sentence(rawSentence)
);
}
this.sentences = sentences;
}
get tokens() {
if (this._tokens === void 0) {
this._tokens = this.sentences.flatMap((sentence) => sentence.tokens);
}
return this._tokens;
}
get entities() {
if (this._entities === void 0) {
this._entities = this.sentences.flatMap((sentence) => sentence.entities);
}
return this._entities;
}
forEachSentence(callback) {
this.sentences.forEach(callback);
}
forEachToken(callback) {
let index = 0;
for (const sentence of this.sentences) {
sentence.forEachToken((token) => {
callback(token, index++);
});
}
}
forEachEntity(callback) {
let index = 0;
for (const sentence of this.sentences) {
sentence.forEachEntity((entity) => {
callback(entity, index++);
});
}
}
};
// src/errors.ts
var PortalError = class extends Error {
constructor(type, message) {
super(message);
this.type = type;
this.message = message;
}
toJSON() {
return {
type: this.type,
message: this.message
};
}
log() {
console.error(this.toJSON());
}
};
var PortalClientError = class extends PortalError {
constructor(type, message, details) {
super(type, message);
this.type = type;
this.message = message;
this.details = details;
}
toJSON() {
return {
...super.toJSON(),
details: this.details
};
}
};
var PortalApiError = class extends PortalError {
constructor(type, message, statusCode, details) {
super(type, message);
this.type = type;
this.message = message;
this.statusCode = statusCode;
this.details = details;
}
toJSON() {
return {
...super.toJSON(),
statusCode: this.statusCode,
details: this.details
};
}
};
// src/classes/analysis-document.class.ts
var AnalysisDocument = class {
constructor(_rawAnalysisDocument) {
this._rawAnalysisDocument = _rawAnalysisDocument;
if (_rawAnalysisDocument.mimeType !== "application/vnd.wowool.document-analysis+json") {
throw new PortalClientError(
"AnalysisDocumentParsingError",
`Invalid Wowool analysis document: document has MIME type ${_rawAnalysisDocument.mimeType}, expected application/vnd.wowool.document-analysis+json`
);
}
const documentAnalysis = _rawAnalysisDocument.data;
const rawTextAnalysis = documentAnalysis.wowool_analysis;
this._analysis = rawTextAnalysis === void 0 ? void 0 : new TextAnalysis(rawTextAnalysis);
}
get id() {
return this._rawAnalysisDocument.id;
}
get mimeType() {
return this._rawAnalysisDocument.mimeType;
}
get encoding() {
return this._rawAnalysisDocument.encoding;
}
get data() {
return this._rawAnalysisDocument.data;
}
get metadata() {
return this._rawAnalysisDocument.metadata;
}
get analysis() {
if (!this._analysis) {
throw new PortalClientError(
"AnalysisNotFoundError",
"This analysis document does not contain any analysis results."
);
}
return this._analysis;
}
};
// src/services/pipeline.service.ts
import { v4 as uuidv4 } from "uuid";
var Pipeline = class {
constructor(steps, portalOrApiKey) {
this.steps = steps;
if (portalOrApiKey === void 0) {
this.portal = new Portal();
} else if (portalOrApiKey instanceof Portal) {
this.portal = portalOrApiKey;
} else {
this.portal = new Portal(portalOrApiKey);
}
}
async process(document) {
if (typeof document === "string") {
document = {
id: uuidv4(),
mimeType: "text/plain",
encoding: "utf-8",
data: document,
metadata: {}
};
}
const response = await fetch(`${this.portal.baseUrl}/pipelines/process`, {
method: "POST",
headers: this.portal.headers,
body: JSON.stringify({
document,
pipeline: this.steps
})
});
await this.portal.validateResponse(response);
const rawAnalysisDocument = await response.json();
return new AnalysisDocument(rawAnalysisDocument);
}
async processBatch(documents) {
const inputDocuments = documents.map((doc) => {
if (typeof doc === "string") {
return {
id: uuidv4(),
mimeType: "text/plain",
encoding: "utf-8",
data: doc,
metadata: {}
};
} else return doc;
});
const response = await fetch(
`${this.portal.baseUrl}/pipelines/process/batch`,
{
method: "POST",
headers: this.portal.headers,
body: JSON.stringify({
documents: inputDocuments,
pipeline: this.steps
})
}
);
await this.portal.validateResponse(response);
const rawAnalysisDocuments = await response.json();
return rawAnalysisDocuments.map(
(rawAnalysisDocument) => {
return new AnalysisDocument(rawAnalysisDocument);
}
);
}
};
// src/utilities/env.utility.ts
function getEnv(name, defaultValue = "") {
if (typeof process !== "undefined" && typeof process.env !== "undefined") {
const value = process.env[name];
return value !== void 0 ? value : defaultValue;
}
return defaultValue;
}
// src/services/portal.service.ts
var Portal = class {
constructor(apiKey, hostUrl) {
this.apiKey = apiKey != null ? apiKey : getEnv("WOWOOL_PORTAL_API_KEY", "");
if (!this.apiKey) {
throw new PortalClientError(
"MissingApiKeyError",
"Missing API key. Pass it to the constructor or set the WOWOOL_PORTAL_API_KEY environment variable."
);
}
const rawBaseUrl = hostUrl != null ? hostUrl : getEnv("WOWOOL_PORTAL_HOST", "https://api.wowool.com");
this.baseUrl = (rawBaseUrl.endsWith("/") ? rawBaseUrl.slice(0, -1) : rawBaseUrl) + "/v1";
}
async ping() {
const response = await fetch(`${this.baseUrl}/ping`, {
method: "GET",
headers: this.headers
});
await this.validateResponse(response);
const pingResponse = await response.json();
return pingResponse;
}
createPipeline(steps) {
return new Pipeline(steps, this);
}
get headers() {
return {
"Content-Type": "application/json",
"X-Client-Agent": "wowool-portal-typescript",
"X-Client-Version": "1.1.1",
"X-API-Key": this.apiKey
};
}
async validateResponse(response) {
if (response.ok) return;
await this.throwApiError(response);
}
async throwApiError(response) {
let error = {};
try {
error = await response.json();
} catch (_) {
throw new Error(
`Failed to parse error response with status ${response.status}`
);
}
const errorType = error.type || "UnknownError";
const errorMessage = error.message || "Unknown error";
const errorStatusCode = response.status || 500;
const errorDetails = error.details || void 0;
throw new PortalApiError(
errorType,
errorMessage,
errorStatusCode,
errorDetails
);
}
};
export {
AnalysisDocument,
Annotation,
Entity,
Pipeline,
Portal,
Sentence,
Token,
isEntity,
isSentence,
isToken
};