UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

64 lines (63 loc) 2.12 kB
/** * LaTeX Chunker * * Splits LaTeX documents by sections and environments. */ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js"; /** * LaTeX Chunker */ export class LaTeXChunker extends BaseChunker { strategy = "latex"; getDefaultConfig() { return { ...DEFAULT_CHUNKER_CONFIG, maxSize: 1000, overlap: 0, }; } async doChunk(content, config) { const maxSize = config.maxSize ?? 1000; // Split by sections const sectionPattern = /\\(?:section|subsection|subsubsection|chapter|paragraph)\{[^}]+\}/g; const sections = []; let lastIndex = 0; let match; while ((match = sectionPattern.exec(content)) !== null) { if (match.index > lastIndex) { sections.push(content.slice(lastIndex, match.index)); } lastIndex = match.index; } if (lastIndex < content.length) { sections.push(content.slice(lastIndex)); } if (sections.length === 0) { sections.push(content); } const chunks = []; let offset = 0; for (const section of sections) { const trimmed = section.trim(); if (!trimmed) { continue; } if (trimmed.length <= maxSize) { const startOffset = content.indexOf(trimmed, offset); chunks.push(this.createChunk(trimmed, chunks.length, startOffset >= 0 ? startOffset : offset, startOffset >= 0 ? startOffset + trimmed.length : offset + trimmed.length)); if (startOffset >= 0) { offset = startOffset + 1; } } else { const segments = this.splitBySizeWithOverlap(trimmed, maxSize, 0); for (const segment of segments) { chunks.push(this.createChunk(segment.text, chunks.length, segment.start, segment.end)); } } } return chunks; } }