@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
39 lines (38 loc) • 1.14 kB
JavaScript
/**
* HTML Chunker
*
* Splits HTML content by semantic tags.
*/
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
/**
* HTML Chunker
*/
export class HTMLChunker extends BaseChunker {
strategy = "html";
getDefaultConfig() {
return {
...DEFAULT_CHUNKER_CONFIG,
maxSize: 1000,
overlap: 0,
};
}
async doChunk(content, config) {
const maxSize = config.maxSize ?? 1000;
// Strip HTML tags for text content
const textContent = this.stripHtml(content);
// Use simple character-based splitting for now
const segments = this.splitBySizeWithOverlap(textContent, maxSize, 0);
return segments.map((segment, index) => this.createChunk(segment.text, index, segment.start, segment.end));
}
/**
* Strip HTML tags from content
*/
stripHtml(html) {
return html
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
.replace(/<[^>]+>/g, " ")
.replace(/\s+/g, " ")
.trim();
}
}