genaiscript
Version:
A CLI for GenAIScript, a generative AI scripting framework.
148 lines (147 loc) • 7.11 kB
TypeScript
/**
* GenAIScript supporting runtime
* This module provides core functionality for text classification, data transformation,
* PDF processing, and file system operations in the GenAIScript environment.
*/
import { delay, uniq, uniqBy, chunk } from "es-toolkit";
import { z } from "zod";
/**
* Utility functions exported for general use
*/
export { delay, uniq, uniqBy, z, chunk };
/**
* Options for classifying data using AI models.
*
* @property {boolean} [other] - Inject a 'other' label.
* @property {boolean} [explanations] - Explain answers before returning token.
* @property {ChatGenerationContext} [ctx] - Options runPrompt context.
*/
export type ClassifyOptions = {
/**
* When true, adds an 'other' category to handle cases that don't match defined labels
*/
other?: boolean;
/**
* When true, provides explanatory text before the classification result
*/
explanations?: boolean;
/**
* Context for running the classification prompt
*/
ctx?: ChatGenerationContext;
} & Omit<PromptGeneratorOptions, "choices">;
/**
* Classifies input text into predefined categories using AI.
* Inspired by https://github.com/prefecthq/marvin.
*
* @param text - Text content to classify or a prompt generator function.
* @param labels - Object mapping label names to their descriptions.
* @param options - Configuration options for classification, including whether to add an "other" category, provide explanations, and specify context.
* @returns Classification result containing the chosen label, confidence metrics, log probabilities, the full answer text, and usage statistics.
* @throws Error if fewer than two labels are provided (including "other").
*/
export declare function classify<L extends Record<string, string>>(text: StringLike | PromptGenerator, labels: L, options?: ClassifyOptions): Promise<{
label: keyof typeof labels | "other";
entropy?: number;
logprob?: number;
probPercent?: number;
answer: string;
logprobs?: Record<keyof typeof labels | "other", Logprob>;
usage?: RunPromptUsage;
}>;
/**
* Enhances content generation by applying iterative improvements.
*
* @param options - Configuration for the improvement process.
* @param options.ctx - Chat generation context to use. Defaults to the environment generator if not provided.
* @param options.repeat - Number of improvement iterations to perform. Defaults to 1.
* @param options.instructions - Custom instructions for improvement. Defaults to "Make it better!".
* The instructions are applied in each iteration.
*/
export declare function makeItBetter(options?: {
ctx?: ChatGenerationContext;
repeat?: number;
instructions?: string;
}): void;
/**
* Converts unstructured text or data into structured JSON format.
* Inspired by https://github.com/prefecthq/marvin.
*
* @param data - Input text or a prompt generator function to convert.
* @param itemSchema - JSON schema defining the target data structure. If `multiple` is true, this will be treated as an array schema.
* @param options - Configuration options for the conversion process, including context, instructions, label, and additional settings. If `multiple` is true, the schema will be treated as an array schema.
* @returns An object containing the converted data, error information if applicable, and the raw text response.
*/
export declare function cast(data: StringLike | PromptGenerator, itemSchema: JSONSchema, options?: PromptGeneratorOptions & {
multiple?: boolean;
instructions?: string | PromptGenerator;
ctx?: ChatGenerationContext;
}): Promise<{
data?: unknown;
error?: string;
text: string;
}>;
/**
* Converts a PDF file to markdown format with intelligent formatting preservation.
*
* @param file - PDF file to convert.
* @param options - Configuration options for PDF processing and markdown conversion, including instructions, context, and additional settings. The options can include rendering images, providing custom instructions, and specifying the context for processing. The text and images from the PDF are analyzed to ensure accurate markdown formatting.
* @returns An object containing the original pages, rendered images, and markdown content for each page.
*/
export declare function markdownifyPdf(file: WorkspaceFile, options?: PromptGeneratorOptions & Omit<ParsePDFOptions, "renderAsImage"> & {
instructions?: string | PromptGenerator;
ctx?: ChatGenerationContext;
}): Promise<{
pages: string[];
images: string[];
markdowns: string[];
}>;
/**
* Creates a tree representation of files in the workspace.
*
* @param glob - Glob pattern to match files.
* @param options - Configuration options for tree generation.
* @param options.query - Optional search query to filter files.
* @param options.size - Whether to include file sizes in the output.
* @param options.ignore - Patterns to exclude from the results.
* @param options.frontmatter - Frontmatter fields to extract from markdown files. Only applies to markdown files.
* @param options.preview - Custom function to generate file previews based on file and stats.
* @returns A formatted string representing the file tree structure, including metadata and file sizes if specified.
*/
export declare function fileTree(glob: string, options?: WorkspaceGrepOptions & {
query?: string | RegExp;
size?: boolean;
ignore?: ElementOrArray<string>;
frontmatter?: OptionsOrString<"title" | "description" | "keywords" | "tags">[];
preview?: (file: WorkspaceFile, stats: FileStats) => Awaitable<unknown>;
}): Promise<string>;
/**
* Injects @mozilla/readability into a page to extract structured data from an article.
* This function evaluates the page content using the Readability library to parse and extract details such as title, content, text, and metadata.
*
* @param page - The browser page instance to evaluate and extract content from.
* @returns An object containing the parsed article details, including title, content, text content, length, excerpt, byline, direction, site name, language, and published time, or null if parsing fails.
* @see https://github.com/mishushakov/llm-scraper/
*/
export declare function parseReadableContent<T = string>(page: BrowserPage): Promise<null | {
/** article title */
title: string | null | undefined;
/** HTML string of processed article content */
content: T | null | undefined;
/** text content of the article, with all the HTML tags removed */
textContent: string | null | undefined;
/** length of an article, in characters */
length: number | null | undefined;
/** article description, or short excerpt from the content */
excerpt: string | null | undefined;
/** author metadata */
byline: string | null | undefined;
/** content direction */
dir: string | null | undefined;
/** name of the site */
siteName: string | null | undefined;
/** content language */
lang: string | null | undefined;
/** published time */
publishedTime: string | null | undefined;
}>;