llm-stream-parser
Version:
A TypeScript library for parsing and processing structured data from LLM streaming responses with custom tag definitions and event-driven architecture
322 lines (278 loc) • 8.5 kB
text/typescript
/**
* Content and attribute transformation utilities
*/
import { BaseTag, NestedTag } from '../types/base';
import { TagDefinition } from '../types/schema';
import { ParserError } from '../types/errors';
/**
* Content transformers for common use cases
*/
export class ContentTransformers {
/**
* Trim whitespace from content
*/
static trim(): (content: string) => string {
return (content: string) => content.trim();
}
/**
* Convert to lowercase
*/
static toLowerCase(): (content: string) => string {
return (content: string) => content.toLowerCase();
}
/**
* Convert to uppercase
*/
static toUpperCase(): (content: string) => string {
return (content: string) => content.toUpperCase();
}
/**
* Capitalize first letter
*/
static capitalize(): (content: string) => string {
return (content: string) => {
if (content.length === 0) return content;
return content.charAt(0).toUpperCase() + content.slice(1).toLowerCase();
};
}
/**
* Replace patterns
*/
static replace(searchValue: string | RegExp, replaceValue: string): (content: string) => string {
return (content: string) => content.replace(searchValue, replaceValue);
}
/**
* Remove HTML tags
*/
static stripHtml(): (content: string) => string {
return (content: string) => content.replace(/<[^>]*>/g, '');
}
/**
* Normalize whitespace (collapse multiple spaces/newlines)
*/
static normalizeWhitespace(): (content: string) => string {
return (content: string) => content.replace(/\s+/g, ' ').trim();
}
/**
* Parse as number
*/
static toNumber(
options: { integer?: boolean; defaultValue?: number } = {}
): (content: string) => string {
return (content: string) => {
const num = parseFloat(content.trim());
if (isNaN(num)) {
return options.defaultValue !== undefined ? options.defaultValue.toString() : content;
}
return options.integer ? Math.round(num).toString() : num.toString();
};
}
/**
* Parse as boolean
*/
static toBoolean(
options: { trueValues?: string[]; falseValues?: string[] } = {}
): (content: string) => string {
const trueValues = options.trueValues || ['true', '1', 'yes', 'on'];
const falseValues = options.falseValues || ['false', '0', 'no', 'off'];
return (content: string) => {
const normalized = content.toLowerCase().trim();
if (trueValues.includes(normalized)) {
return 'true';
}
if (falseValues.includes(normalized)) {
return 'false';
}
return content;
};
}
/**
* Apply multiple transformers in sequence
*/
static chain(...transformers: Array<(content: string) => string>): (content: string) => string {
return (content: string) => {
return transformers.reduce((result, transformer) => transformer(result), content);
};
}
/**
* Custom transformer with error handling
*/
static custom(
fn: (content: string) => string,
errorMessage?: string
): (content: string) => string {
return (content: string) => {
try {
return fn(content);
} catch (error) {
throw new ParserError(
errorMessage ||
`Content transformation failed: ${
error instanceof Error ? error.message : String(error)
}`,
'TRANSFORMATION_FAILED' as any
);
}
};
}
}
/**
* Attribute transformers
*/
export class AttributeTransformers {
/**
* Convert attribute types
*/
static convertTypes(
typeMap: Record<string, 'string' | 'number' | 'boolean'>
): (attributes?: Record<string, unknown>) => Record<string, unknown> {
return attributes => {
if (!attributes) return {};
const result: Record<string, unknown> = { ...attributes };
for (const [attr, targetType] of Object.entries(typeMap)) {
if (attr in result) {
const value = result[attr];
switch (targetType) {
case 'number':
result[attr] = typeof value === 'string' ? parseFloat(value) : Number(value);
break;
case 'boolean':
result[attr] =
typeof value === 'string'
? ['true', '1', 'yes', 'on'].includes(value.toLowerCase())
: Boolean(value);
break;
case 'string':
result[attr] = String(value);
break;
}
}
}
return result;
};
}
/**
* Rename attributes
*/
static rename(
mapping: Record<string, string>
): (attributes?: Record<string, unknown>) => Record<string, unknown> {
return attributes => {
if (!attributes) return {};
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(attributes)) {
const newKey = mapping[key] || key;
result[newKey] = value;
}
return result;
};
}
/**
* Filter attributes (keep only specified ones)
*/
static filter(
allowedAttributes: string[]
): (attributes?: Record<string, unknown>) => Record<string, unknown> {
return attributes => {
if (!attributes) return {};
const result: Record<string, unknown> = {};
for (const attr of allowedAttributes) {
if (attr in attributes) {
result[attr] = attributes[attr];
}
}
return result;
};
}
/**
* Add default attributes
*/
static addDefaults(
defaults: Record<string, unknown>
): (attributes?: Record<string, unknown>) => Record<string, unknown> {
return attributes => {
return { ...defaults, ...attributes };
};
}
}
/**
* Tag transformer that applies transformations according to definition
*/
export class TagTransformer {
/**
* Apply transformations to a tag according to its definition
*/
static transform<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void {
try {
// Transform content
if (definition.transformContent && tag.content) {
tag.content = definition.transformContent(tag.content);
}
// Transform attributes
if (definition.transformAttributes && tag.attributes) {
tag.attributes = definition.transformAttributes(tag.attributes) as Record<string, unknown>;
}
} catch (error) {
if (error instanceof ParserError) {
throw error;
}
throw ParserError.fromTransformation(
tag.tagName,
error instanceof Error ? error : new Error(String(error))
);
}
}
/**
* Apply default values to tag
*/
static applyDefaults<T extends BaseTag>(tag: T | NestedTag, definition: TagDefinition<T>): void {
// Apply default content if empty
if (definition.defaultContent && (!tag.content || tag.content.trim() === '')) {
tag.content = definition.defaultContent;
}
// Apply default attributes
if (definition.defaultAttributes) {
tag.attributes = { ...definition.defaultAttributes, ...tag.attributes };
}
}
/**
* Clean tag content and attributes
*/
static clean<T extends BaseTag>(
tag: T | NestedTag,
options: {
trimContent?: boolean;
normalizeWhitespace?: boolean;
removeEmptyAttributes?: boolean;
} = {}
): void {
const {
trimContent = true,
normalizeWhitespace = false,
removeEmptyAttributes = false,
} = options;
// Clean content
if (tag.content) {
if (trimContent) {
tag.content = tag.content.trim();
}
if (normalizeWhitespace) {
tag.content = tag.content.replace(/\s+/g, ' ').trim();
}
}
// Clean attributes
if (tag.attributes && removeEmptyAttributes) {
const cleaned: Record<string, unknown> = {};
for (const [key, value] of Object.entries(tag.attributes)) {
if (value !== null && value !== undefined && value !== '') {
cleaned[key] = value;
}
}
if (Object.keys(cleaned).length > 0) {
tag.attributes = cleaned;
} else {
delete tag.attributes;
}
}
}
}