UNPKG

@wowool/portal

Version:

A library for natural language processing tasks including tokenization, entity recognition, anonymization, semantic chunking, and much more.

456 lines (444 loc) 15.4 kB
/** * Response returned by the ping endpoint. * @property status - Status of the API (e.g., 'ok'). * @property apiVersion - Version of the API. * @property message - Informational message. * @property clientAgent - Name of the client agent. * @property clientMinimumVersion - (Optional) Minimum supported client version. * @property clientCurrentVersion - (Optional) Current client version. */ interface PingResponse { status: string; apiVersion: string; message: string; clientAgent: string; clientMinimumVersion?: string; clientCurrentVersion?: string; } interface UID { name: string; options?: Record<string, any>; } type PipelineStep = string | UID; /** * Represents the pipeline as a comma separated list or array of steps. */ type PipelineSteps = string | PipelineStep[]; /** * Supported MIME types for input documents. */ type InputMimeType = "text/plain" | "text/html" | "text/markdown" | "application/pdf" | "application/rtf" | "application/msword" | "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | "application/vnd.wowool.document-analysis+json"; /** * MIME type for analysis documents. */ type AnalysisMimeType = "application/vnd.wowool.document-analysis+json"; /** * Metadata key-value pairs for documents. */ type Metadata = Record<string, string>; /** * Generic document interface for handling documents of various MIME types. * @template MimeType - The MIME type of the document. * @property id - Unique identifier for the document. * @property mimeType - MIME type of the document. * @property encoding - Encoding of the document data. * @property data - Document data (type depends on context). * @property metadata - Additional metadata (e.g., author, date). */ interface DocumentInterface<MimeType> { id: string; mimeType: MimeType; encoding: string; data: unknown; metadata: Metadata; } /** * Interface for input documents, restricted to supported input MIME types. */ interface InputDocumentInterface extends DocumentInterface<InputMimeType> { } /** * Interface for analysis documents, restricted to analysis MIME type. */ interface AnalysisDocumentInterface extends DocumentInterface<AnalysisMimeType> { } /** * Represents a diagnostic message, such as an error or warning. * @property message - Diagnostic message text. * @property type - Numeric code for the diagnostic type. * @property id - (Optional) Identifier for the diagnostic. * @property line - (Optional) Line number related to the diagnostic. * @property offset - (Optional) Offset within the line. */ interface Diagnostic { message: string; type: number; id?: string; line?: number; offset?: number; } /** * Results returned by an application, including optional diagnostics. * @property results - Application-specific results. * @property diagnostics - (Optional) List of diagnostics. */ interface AppResults { results?: any; diagnostics?: Diagnostic[]; } /** * Results of a text analysis operation. * @extends AppResults * @property results.language - Detected language. * @property results.sentences - List of analyzed sentences. * @property results.metadata - (Optional) Additional metadata. * @property diagnostics - (Optional) List of diagnostics. */ interface TextAnalysis$1 extends AppResults { results?: { language: string; sentences: any[]; metadata?: Record<string, string>; }; diagnostics?: Diagnostic[]; } /** * Mapping from document IDs to their analysis results. */ type DocumentAnalysis = Record<string, AppResults>; type AnnotationType = "Sentence" | "Entity" | "Token"; /** * Represents a generic annotation in the analysis document. * Provides access to annotation type, offsets, and serialization helpers. */ declare class Annotation { readonly raw: any[]; readonly rawChildren: any[]; /** * Creates an Annotation instance. * @param raw The raw annotation array. * @param rawChildren The raw child annotations. */ constructor(raw: any[], rawChildren: any[]); /** * The type of this annotation (Sentence, Entity, or Token). */ get type(): AnnotationType; /** * The character offset where this annotation begins. */ get beginOffset(): number; /** * The character offset where this annotation ends. */ get endOffset(): number; /** * Converts the annotation to a JSON-serializable object. * @returns An object representation of the annotation. */ toJSON(): { type: AnnotationType; beginOffset: number; endOffset: number; }; /** * Logs the annotation as a formatted JSON string to the console. */ log(): void; } /** * Represents a morpheme, the smallest meaningful unit in a token. */ interface Morpheme { lemma: string; partOfSpeech: string; } /** * Represents a token annotation, which may contain morphemes and properties. * Extends the Annotation class. */ declare class Token extends Annotation { /** The literal string value of the token. */ literal: string; /** The properties associated with the token. */ properties: string[]; /** The list of morphemes in the token. */ morphemes: Morpheme[]; /** * Creates a Token instance. * @param rawToken The raw token annotation array. * @throws Error if the token is not valid. */ constructor(rawToken: any[]); /** * Converts the token to a JSON-serializable object. * @returns An object representation of the token. */ toJSON(): { literal: string; properties: string[]; morphemes: Morpheme[]; type: AnnotationType; beginOffset: number; endOffset: number; }; } /** * Represents an attribute of an entity, consisting of a key and a list of values. */ interface Attribute { key: string; values: string[]; } /** * Represents an entity annotation, which may have attributes and tokens. * Extends the Annotation class. */ declare class Entity extends Annotation { /** The URI identifying this entity. */ uri: string; /** The attributes associated with this entity. */ attributes: Attribute[]; /** The tokens that make up this entity. */ tokens: Token[]; /** * Creates an Entity instance. * @param rawEntity The raw entity annotation array. * @param rawAnnotations The list of all raw annotations in the sentence. * @throws Error if the entity is not valid. */ constructor(rawEntity: any, rawAnnotations: any[]); /** * The text content of the entity, constructed from its tokens. * @returns The concatenated literal values of the entity's tokens. * @throws Error if the entity has no tokens. */ get text(): string; /** * Converts the entity to a JSON-serializable object. * @returns An object representation of the entity. */ toJSON(): { uri: string; attributes: Attribute[]; type: AnnotationType; beginOffset: number; endOffset: number; }; } /** * Represents a sentence annotation, which may contain entities and tokens. * Extends the Annotation class. */ declare class Sentence extends Annotation { /** The list of child annotations (entities, tokens, or nested sentences). */ annotations: Annotation[]; /** * Creates a Sentence instance. * @param rawSentence The raw sentence annotation array. * @throws Error if the sentence is not valid. */ constructor(rawSentence: any[]); /** * Returns all entity annotations within this sentence. * @returns An array of Entity objects. */ get entities(): Entity[]; /** * Returns all token annotations within this sentence. * @returns An array of Token objects. */ get tokens(): Token[]; /** * The text content of the sentence, constructed from its tokens. * @returns The concatenated literal values of the sentence's tokens. */ get text(): string; /** * Iterates over each entity in the sentence and invokes the callback. * @param callback The function to call for each entity. */ forEachEntity(callback: (entity: Entity, index: number) => void): void; /** * Iterates over each token in the sentence and invokes the callback. * @param callback The function to call for each token. */ forEachToken(callback: (token: Token, index: number) => void): void; toJSON(): { annotations: { type: AnnotationType; beginOffset: number; endOffset: number; }[]; type: AnnotationType; beginOffset: number; endOffset: number; }; } /** * Represents the results of a text analysis, including sentences, tokens, and entities. */ declare class TextAnalysis { private readonly rawTextAnalysis; /** The list of sentences in the analysis. */ readonly sentences: Sentence[]; private _tokens?; private _entities?; /** * Creates a TextAnalysis instance from raw analysis data. * @param rawTextAnalysis The raw text analysis object. */ constructor(rawTextAnalysis: TextAnalysis$1); /** * Returns all tokens in the analysis, across all sentences. * @returns An array of Token objects. */ get tokens(): Token[]; /** * Returns all entities in the analysis, across all sentences. * @returns An array of Entity objects. */ get entities(): Entity[]; /** * Iterates over each sentence in the analysis and invokes the callback. * @param callback The function to call for each sentence. */ forEachSentence(callback: (sentence: Sentence, index: number) => void): void; /** * Iterates over each token in the analysis and invokes the callback. * @param callback The function to call for each token. */ forEachToken(callback: (token: Token, index: number) => void): void; /** * Iterates over each entity in the analysis and invokes the callback. * @param callback The function to call for each entity. */ forEachEntity(callback: (entity: Entity, index: number) => void): void; } /** * Provides a typed wrapper around a raw analysis document, including validation and access to analysis results. * Exposes metadata, MIME type, and analysis results in a type-safe manner. */ declare class AnalysisDocument implements AnalysisDocumentInterface { private readonly _rawAnalysisDocument; /** The parsed text analysis, if available. */ private readonly _analysis?; /** * Constructs an AnalysisDocument from a raw analysis document. * @param _rawAnalysisDocument The raw analysis document to wrap and validate. * @throws {PortalClientError} If the MIME type is not 'application/vnd.wowool.document-analysis+json'. */ constructor(_rawAnalysisDocument: AnalysisDocumentInterface); /** * The unique identifier of the analysis document. * @returns The document ID string. */ get id(): string; /** * The MIME type of the analysis document. * @returns The MIME type string. */ get mimeType(): "application/vnd.wowool.document-analysis+json"; /** * The encoding of the analysis document. */ get encoding(): string; /** * The raw data payload of the analysis document. */ get data(): unknown; /** * The metadata associated with the analysis document. */ get metadata(): Metadata; /** * The text analysis results, if present. * @throws {PortalClientError} If no analysis results are available in the document. */ get analysis(): TextAnalysis; } declare function isSentence(annotation: Annotation): boolean; declare function isEntity(annotation: Annotation): boolean; declare function isToken(annotation: Annotation): boolean; /** * Pipeline class for processing documents through defined steps. */ declare class Pipeline { private readonly steps; /** * The Portal instance used for API communication. */ private readonly portal; /** * Constructs a Pipeline with the given steps and Portal instance. * @param steps The steps to process the document. * @param portal An existing Portal instance. */ constructor(steps: PipelineSteps, portal: Portal); /** * Constructs a Pipeline with the given steps and API key. * @param steps The steps to process the document. * @param apiKey Optional API key for authentication. */ constructor(steps: PipelineSteps, apiKey?: string); /** * Processes a single document through the pipeline. * @param document The input document or a string to be wrapped as a document. * @returns The processed AnalysisDocument. */ process(document: InputDocumentInterface | string): Promise<AnalysisDocument>; /** * Processes multiple documents through the pipeline in a batch. * @param documents An array of input documents or strings. * @returns An array of processed AnalysisDocuments. */ processBatch(documents: (InputDocumentInterface | string)[]): Promise<AnalysisDocument[]>; } interface PortalOptions { apiKey?: string; hostUrl?: string; } /** * Connection to the Portal API. * Handles authentication, API requests, and error management. */ declare class Portal { private readonly apiKey; readonly baseUrl: string; /** * Constructs a new Portal service instance. * @param apiKey Optional API key for authentication. If not provided, reads from environment. * @param hostUrl Optional base URL for the API. If not provided, reads from environment or defaults. * @throws {PortalClientError} If the API key is missing. */ constructor(opts?: PortalOptions); /** * Pings the Wowool Portal API to check connectivity. * @returns {Promise<PingResponse>} The ping response from the API. * @throws {PortalApiError} If the API returns an error response. */ ping(): Promise<PingResponse>; /** * Creates a new pipeline instance with the specified steps. * @param steps The pipeline steps to initialize. * @returns {Pipeline} The created pipeline instance. */ createPipeline(steps: PipelineSteps): Pipeline; /** * Returns the default headers for API requests. */ get headers(): Record<string, string>; /** * Validates the API response, throwing an error if not successful. * @param response The fetch API response object. * @throws {PortalApiError} If the response is not OK. */ validateResponse(response: Response): Promise<void>; /** * Throws a PortalApiError based on the API error response. * @param response The fetch API response object. * @throws {PortalApiError} Always throws with parsed error details. */ private throwApiError; } export { AnalysisDocument, type AnalysisDocumentInterface, type AnalysisMimeType, Annotation, type AppResults, type Diagnostic, type DocumentAnalysis, type DocumentInterface, Entity, type InputDocumentInterface, type InputMimeType, type Metadata, Pipeline, Portal, Sentence, type TextAnalysis$1 as TextAnalysis, Token, isEntity, isSentence, isToken };