UNPKG

@autobe/agent

Version:

AI backend server code generator

165 lines 6.93 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.validateScenarioFileNames = exports.validateSectionSectionContent = exports.validateUnitSectionContent = exports.validateModuleSectionContent = exports.validateEnglishOnly = exports.findNonEnglishCharacters = exports.containsNonEnglish = void 0; /** * Validation utilities for ensuring English-only content in analysis documents. * * These validators detect non-English characters (Chinese, Korean, Japanese, * etc.) that may be incorrectly generated by LLMs despite prompt instructions. */ /** * Regex pattern to detect non-English characters. Includes: * * - Chinese (CJK Unified Ideographs): \u4e00-\u9fff * - Korean (Hangul Syllables): \uac00-\ud7af * - Japanese Hiragana: \u3040-\u309f * - Japanese Katakana: \u30a0-\u30ff * - CJK Extension A: \u3400-\u4dbf * - CJK Compatibility Ideographs: \uf900-\ufaff */ const NON_ENGLISH_PATTERN = /[\u4e00-\u9fff\uac00-\ud7af\u3040-\u309f\u30a0-\u30ff\u3400-\u4dbf\uf900-\ufaff]/g; /** Check if text contains non-English characters. */ const containsNonEnglish = (text) => { return NON_ENGLISH_PATTERN.test(text); }; exports.containsNonEnglish = containsNonEnglish; /** * Find all non-English characters in text. Returns array of { char, index, * context } objects. */ const findNonEnglishCharacters = (text) => { const results = []; const regex = new RegExp(NON_ENGLISH_PATTERN.source, "g"); let match; while ((match = regex.exec(text)) !== null) { const start = Math.max(0, match.index - 20); const end = Math.min(text.length, match.index + 21); results.push({ char: match[0], index: match.index, context: text.slice(start, end), }); } return results; }; exports.findNonEnglishCharacters = findNonEnglishCharacters; /** * Validate that content is English-only. Returns validation result with details * if non-English characters found. */ const validateEnglishOnly = (content) => { const nonEnglish = (0, exports.findNonEnglishCharacters)(content); if (nonEnglish.length === 0) { return { valid: true, errors: [] }; } const errors = nonEnglish.slice(0, 5).map((item) => { return `Non-English character "${item.char}" found at index ${item.index}: "...${item.context}..."`; }); if (nonEnglish.length > 5) { errors.push(`... and ${nonEnglish.length - 5} more non-English characters`); } return { valid: false, errors }; }; exports.validateEnglishOnly = validateEnglishOnly; /** Validate module section content for English-only requirement. */ const validateModuleSectionContent = (sections) => { const allErrors = []; sections.forEach((section, index) => { const titleResult = (0, exports.validateEnglishOnly)(section.title); const purposeResult = (0, exports.validateEnglishOnly)(section.purpose); const contentResult = (0, exports.validateEnglishOnly)(section.content); if (!titleResult.valid) { allErrors.push(`Module section ${index} title: ${titleResult.errors.join("; ")}`); } if (!purposeResult.valid) { allErrors.push(`Module section ${index} purpose: ${purposeResult.errors.join("; ")}`); } if (!contentResult.valid) { allErrors.push(`Module section ${index} content: ${contentResult.errors.join("; ")}`); } }); return { valid: allErrors.length === 0, errors: allErrors, }; }; exports.validateModuleSectionContent = validateModuleSectionContent; /** Validate unit section content for English-only requirement. */ const validateUnitSectionContent = (sections) => { const allErrors = []; sections.forEach((section, index) => { const titleResult = (0, exports.validateEnglishOnly)(section.title); const purposeResult = (0, exports.validateEnglishOnly)(section.purpose); const contentResult = (0, exports.validateEnglishOnly)(section.content); if (!titleResult.valid) { allErrors.push(`Unit section ${index} title: ${titleResult.errors.join("; ")}`); } if (!purposeResult.valid) { allErrors.push(`Unit section ${index} purpose: ${purposeResult.errors.join("; ")}`); } if (!contentResult.valid) { allErrors.push(`Unit section ${index} content: ${contentResult.errors.join("; ")}`); } section.keywords.forEach((keyword, kwIndex) => { const kwResult = (0, exports.validateEnglishOnly)(keyword); if (!kwResult.valid) { allErrors.push(`Unit section ${index} keyword ${kwIndex}: ${kwResult.errors.join("; ")}`); } }); }); return { valid: allErrors.length === 0, errors: allErrors, }; }; exports.validateUnitSectionContent = validateUnitSectionContent; /** Validate section section content for English-only requirement. */ const validateSectionSectionContent = (sections) => { const allErrors = []; sections.forEach((section, index) => { const titleResult = (0, exports.validateEnglishOnly)(section.title); const contentResult = (0, exports.validateEnglishOnly)(section.content); if (!titleResult.valid) { allErrors.push(`Section ${index} title: ${titleResult.errors.join("; ")}`); } if (!contentResult.valid) { allErrors.push(`Section ${index} content: ${contentResult.errors.join("; ")}`); } }); return { valid: allErrors.length === 0, errors: allErrors, }; }; exports.validateSectionSectionContent = validateSectionSectionContent; /** * Validate scenario file names for correct format. Expected format: 00-toc.md, * 01-xxx.md, 02-xxx.md, ... */ const validateScenarioFileNames = (files) => { const allErrors = []; const filenamePattern = /^\d{2}-[a-z][a-z0-9-]*\.md$/; // Check first file is 00-toc.md if (files.length > 0 && files[0].filename !== "00-toc.md") { allErrors.push(`First file must be "00-toc.md", got "${files[0].filename}"`); } // Check all filenames match pattern files.forEach((file, index) => { if (!filenamePattern.test(file.filename)) { allErrors.push(`File ${index}: Invalid filename format "${file.filename}". Expected format: XX-name.md`); } }); // Check sequential numbering files.forEach((file, index) => { const expectedPrefix = index.toString().padStart(2, "0"); if (!file.filename.startsWith(expectedPrefix + "-")) { allErrors.push(`File ${index}: Expected prefix "${expectedPrefix}-", got "${file.filename}"`); } }); return { valid: allErrors.length === 0, errors: allErrors, }; }; exports.validateScenarioFileNames = validateScenarioFileNames; //# sourceMappingURL=validateEnglishOnly.js.map