weavebot-core
Version:
Generic content processing framework for web scraping and AI extraction
180 lines (177 loc) • 5.61 kB
text/typescript
import { z } from 'zod';
interface ProcessorConfig {
defaultSchema?: string;
processors?: Record<string, ProcessorPlugin>;
storage?: Record<string, StorageAdapter>;
logger?: Logger;
}
type ScrapingStrategy = 'static' | 'spa' | 'auto';
interface ScrapingOptions {
strategy?: ScrapingStrategy;
timeout?: number;
waitForSelector?: string;
userAgent?: string;
enableJavaScript?: boolean;
retries?: number;
}
interface ProcessingContext {
url?: string;
data?: unknown;
metadata?: Record<string, unknown>;
userId?: string;
sessionId?: string;
stepIndex?: number;
stepName?: string;
pipeline?: string;
[key: string]: unknown;
}
interface ProcessingStep {
name: string;
execute(input: unknown, context: ProcessingContext): Promise<unknown>;
}
interface ProcessorPlugin {
name: string;
description: string;
execute(params: unknown, context: ProcessingContext): Promise<unknown>;
}
interface StorageRecord {
id: string;
fields: Record<string, unknown>;
createdTime: string;
updatedTime?: string;
}
interface StorageAdapter {
name: string;
initialize(config: Record<string, unknown>): Promise<void>;
create(data: unknown, schema?: string): Promise<StorageRecord>;
update(id: string, data: Partial<unknown>): Promise<StorageRecord>;
fetch(filter?: StorageFilter): Promise<StorageRecord[]>;
delete(id: string): Promise<void>;
createTable?(schema: z.ZodType, tableName: string): Promise<void>;
}
interface StorageFilter {
schema?: string;
limit?: number;
offset?: number;
where?: Record<string, unknown>;
orderBy?: {
field: string;
direction: 'asc' | 'desc';
}[];
}
interface Pipeline {
name: string;
steps: ProcessingStep[];
execute(input: unknown, context: ProcessingContext): Promise<unknown>;
}
interface PipelineBuilder {
addStep(step: ProcessingStep): PipelineBuilder;
build(): Pipeline;
}
interface WebScrapingParams {
url: string;
strategy?: ScrapingStrategy;
options?: ScrapingOptions;
}
interface AIExtractionParams {
content: ScrapedContent;
schema: string | z.ZodType;
url?: string;
model?: string;
}
interface StorageParams {
data: unknown;
schema: string;
adapter?: string;
}
interface ScrapedContent {
url: string;
title: string;
text: string;
html: string;
metadata: Record<string, any>;
extractedAt: Date;
}
declare class WeaveBotError extends Error {
readonly type: string;
readonly context: Record<string, unknown>;
constructor(message: string, type: string, context?: Record<string, unknown>);
}
declare class ScrapingError extends WeaveBotError {
constructor(message: string, context?: Record<string, unknown>);
}
declare class ExtractionError extends WeaveBotError {
constructor(message: string, context?: Record<string, unknown>);
}
declare class ValidationError extends WeaveBotError {
constructor(message: string, context?: Record<string, unknown>);
}
declare class StorageError extends WeaveBotError {
constructor(message: string, context?: Record<string, unknown>);
}
declare class PipelineError extends WeaveBotError {
constructor(message: string, context?: Record<string, unknown>);
}
interface Logger {
debug(message: string, context?: Record<string, unknown>): void;
info(message: string, context?: Record<string, unknown>): void;
warn(message: string, context?: Record<string, unknown>): void;
error(message: string, context?: Record<string, unknown>): void;
time(operation: string, context?: Record<string, unknown>): () => void;
}
interface LogContext {
userId?: string;
sessionId?: string;
url?: string;
operation?: string;
duration?: number;
[key: string]: unknown;
}
interface ProcessorInput {
type: 'url' | 'text' | 'file';
data: string;
schema?: string;
options?: Record<string, unknown>;
}
interface ProcessorResult {
success: boolean;
data?: unknown;
error?: {
type: string;
message: string;
context?: Record<string, unknown>;
};
metadata: {
processingTime: number;
strategy?: string;
model?: string;
[key: string]: unknown;
};
}
interface CommandContext {
userId: string;
sessionId?: string;
platform: string;
args: string[];
metadata?: Record<string, unknown>;
}
interface CommandHandler {
command: string;
description: string;
execute(context: CommandContext): Promise<CommandResult>;
}
interface CommandResult {
success: boolean;
message: string;
data?: unknown;
error?: string;
}
interface NewsletterOptions {
startDate?: Date;
endDate?: Date;
maxEvents?: number;
maxUpdates?: number;
includeUpcoming?: boolean;
template?: string;
}
export { type AIExtractionParams as A, type CommandHandler as C, ExtractionError as E, type Logger as L, type NewsletterOptions as N, type Pipeline as P, type StorageAdapter as S, ValidationError as V, type WebScrapingParams as W, type LogContext as a, type ProcessingStep as b, type ProcessingContext as c, type PipelineBuilder as d, type ProcessorConfig as e, type ProcessorPlugin as f, type CommandContext as g, type CommandResult as h, type ProcessorInput as i, type ProcessorResult as j, type ScrapedContent as k, type StorageRecord as l, type StorageFilter as m, type ScrapingStrategy as n, type ScrapingOptions as o, type StorageParams as p, WeaveBotError as q, ScrapingError as r, StorageError as s, PipelineError as t };