UNPKG

generator-begcode

Version:

Spring Boot + Angular/React/Vue in one handy generator

55 lines (54 loc) 1.87 kB
import { load } from 'cheerio'; export class HTMLChunker { config; constructor(config) { this.config = config; } sanitize(html) { return html .replaceAll('\t', '') .replaceAll('\\\\t', '') .replaceAll('\n', ' ') .replaceAll('\\\\n', '\n') .replace(/ +(?= )/g, '') .trim(); } chunk(html) { const $ = load(html); const chunks = []; const extractContent = (element) => { let chunk = $(element).text().trim() || ''; chunk = this.sanitize(chunk); while (chunk.length > this.config.maxChunkSize) { let indexToSplit = chunk.lastIndexOf('<', this.config.maxChunkSize); if (indexToSplit === -1) { indexToSplit = this.config.maxChunkSize; } const subChunk = chunk.substring(0, indexToSplit).trim(); if (subChunk) chunks.push(subChunk); chunk = this.sanitize(chunk.substring(indexToSplit).trim()); } if (chunk && !chunks.includes(chunk)) { chunks.push(chunk); } }; const processElement = (selector) => { $(selector).each(function () { if ($(this).find(selector).length > 0) { $(this) .children(selector) .each(function () { extractContent(this); }); } else { const siblings = $(this).nextUntil(`:not(${selector})`).addBack(); extractContent(siblings); } }); }; ['div', 'section', 'article'].forEach(processElement); return chunks; } }