@autobe/agent
Version:
AI backend server code generator
165 lines • 6.93 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.validateScenarioFileNames = exports.validateSectionSectionContent = exports.validateUnitSectionContent = exports.validateModuleSectionContent = exports.validateEnglishOnly = exports.findNonEnglishCharacters = exports.containsNonEnglish = void 0;
/**
* Validation utilities for ensuring English-only content in analysis documents.
*
* These validators detect non-English characters (Chinese, Korean, Japanese,
* etc.) that may be incorrectly generated by LLMs despite prompt instructions.
*/
/**
* Regex pattern to detect non-English characters. Includes:
*
* - Chinese (CJK Unified Ideographs): \u4e00-\u9fff
* - Korean (Hangul Syllables): \uac00-\ud7af
* - Japanese Hiragana: \u3040-\u309f
* - Japanese Katakana: \u30a0-\u30ff
* - CJK Extension A: \u3400-\u4dbf
* - CJK Compatibility Ideographs: \uf900-\ufaff
*/
const NON_ENGLISH_PATTERN = /[\u4e00-\u9fff\uac00-\ud7af\u3040-\u309f\u30a0-\u30ff\u3400-\u4dbf\uf900-\ufaff]/g;
/** Check if text contains non-English characters. */
const containsNonEnglish = (text) => {
return NON_ENGLISH_PATTERN.test(text);
};
exports.containsNonEnglish = containsNonEnglish;
/**
* Find all non-English characters in text. Returns array of { char, index,
* context } objects.
*/
const findNonEnglishCharacters = (text) => {
const results = [];
const regex = new RegExp(NON_ENGLISH_PATTERN.source, "g");
let match;
while ((match = regex.exec(text)) !== null) {
const start = Math.max(0, match.index - 20);
const end = Math.min(text.length, match.index + 21);
results.push({
char: match[0],
index: match.index,
context: text.slice(start, end),
});
}
return results;
};
exports.findNonEnglishCharacters = findNonEnglishCharacters;
/**
* Validate that content is English-only. Returns validation result with details
* if non-English characters found.
*/
const validateEnglishOnly = (content) => {
const nonEnglish = (0, exports.findNonEnglishCharacters)(content);
if (nonEnglish.length === 0) {
return { valid: true, errors: [] };
}
const errors = nonEnglish.slice(0, 5).map((item) => {
return `Non-English character "${item.char}" found at index ${item.index}: "...${item.context}..."`;
});
if (nonEnglish.length > 5) {
errors.push(`... and ${nonEnglish.length - 5} more non-English characters`);
}
return { valid: false, errors };
};
exports.validateEnglishOnly = validateEnglishOnly;
/** Validate module section content for English-only requirement. */
const validateModuleSectionContent = (sections) => {
const allErrors = [];
sections.forEach((section, index) => {
const titleResult = (0, exports.validateEnglishOnly)(section.title);
const purposeResult = (0, exports.validateEnglishOnly)(section.purpose);
const contentResult = (0, exports.validateEnglishOnly)(section.content);
if (!titleResult.valid) {
allErrors.push(`Module section ${index} title: ${titleResult.errors.join("; ")}`);
}
if (!purposeResult.valid) {
allErrors.push(`Module section ${index} purpose: ${purposeResult.errors.join("; ")}`);
}
if (!contentResult.valid) {
allErrors.push(`Module section ${index} content: ${contentResult.errors.join("; ")}`);
}
});
return {
valid: allErrors.length === 0,
errors: allErrors,
};
};
exports.validateModuleSectionContent = validateModuleSectionContent;
/** Validate unit section content for English-only requirement. */
const validateUnitSectionContent = (sections) => {
const allErrors = [];
sections.forEach((section, index) => {
const titleResult = (0, exports.validateEnglishOnly)(section.title);
const purposeResult = (0, exports.validateEnglishOnly)(section.purpose);
const contentResult = (0, exports.validateEnglishOnly)(section.content);
if (!titleResult.valid) {
allErrors.push(`Unit section ${index} title: ${titleResult.errors.join("; ")}`);
}
if (!purposeResult.valid) {
allErrors.push(`Unit section ${index} purpose: ${purposeResult.errors.join("; ")}`);
}
if (!contentResult.valid) {
allErrors.push(`Unit section ${index} content: ${contentResult.errors.join("; ")}`);
}
section.keywords.forEach((keyword, kwIndex) => {
const kwResult = (0, exports.validateEnglishOnly)(keyword);
if (!kwResult.valid) {
allErrors.push(`Unit section ${index} keyword ${kwIndex}: ${kwResult.errors.join("; ")}`);
}
});
});
return {
valid: allErrors.length === 0,
errors: allErrors,
};
};
exports.validateUnitSectionContent = validateUnitSectionContent;
/** Validate section section content for English-only requirement. */
const validateSectionSectionContent = (sections) => {
const allErrors = [];
sections.forEach((section, index) => {
const titleResult = (0, exports.validateEnglishOnly)(section.title);
const contentResult = (0, exports.validateEnglishOnly)(section.content);
if (!titleResult.valid) {
allErrors.push(`Section ${index} title: ${titleResult.errors.join("; ")}`);
}
if (!contentResult.valid) {
allErrors.push(`Section ${index} content: ${contentResult.errors.join("; ")}`);
}
});
return {
valid: allErrors.length === 0,
errors: allErrors,
};
};
exports.validateSectionSectionContent = validateSectionSectionContent;
/**
* Validate scenario file names for correct format. Expected format: 00-toc.md,
* 01-xxx.md, 02-xxx.md, ...
*/
const validateScenarioFileNames = (files) => {
const allErrors = [];
const filenamePattern = /^\d{2}-[a-z][a-z0-9-]*\.md$/;
// Check first file is 00-toc.md
if (files.length > 0 && files[0].filename !== "00-toc.md") {
allErrors.push(`First file must be "00-toc.md", got "${files[0].filename}"`);
}
// Check all filenames match pattern
files.forEach((file, index) => {
if (!filenamePattern.test(file.filename)) {
allErrors.push(`File ${index}: Invalid filename format "${file.filename}". Expected format: XX-name.md`);
}
});
// Check sequential numbering
files.forEach((file, index) => {
const expectedPrefix = index.toString().padStart(2, "0");
if (!file.filename.startsWith(expectedPrefix + "-")) {
allErrors.push(`File ${index}: Expected prefix "${expectedPrefix}-", got "${file.filename}"`);
}
});
return {
valid: allErrors.length === 0,
errors: allErrors,
};
};
exports.validateScenarioFileNames = validateScenarioFileNames;
//# sourceMappingURL=validateEnglishOnly.js.map