@autobe/agent
Version:
AI backend server code generator
320 lines (274 loc) • 10.2 kB
text/typescript
import {
AutoBeAnalyze,
AutoBeAnalyzeWriteSectionEvent,
} from "@autobe/interface";
import YAML from "yaml";
// ─── Types ───
export interface IProseConstraintConflict {
entityAttr: string;
canonicalValues: number[];
proseValues: number[];
file: string;
sectionTitle: string;
context: string;
}
type FileSectionInput = Array<{
file: AutoBeAnalyze.IFileScenario;
sectionEvents: AutoBeAnalyzeWriteSectionEvent[][];
}>;
// ─── Constants ───
const YAML_CODE_BLOCK_REGEX = /```yaml\n[\s\S]*?```/g;
const CANONICAL_FILENAME = "02-domain-model.md";
/**
* Numeric constraint patterns found in prose text. Matches: "300 characters",
* "1-50 characters", "1–150 characters", "up to 2000 characters", "maximum 500
* chars", "minimum 8 characters", "exceeds 300 characters", "at least 1
* character", "at most 200 characters".
*/
const NUMERIC_PATTERNS: RegExp[] = [
// Range: "1-50 characters", "1–150 characters", "0–300 characters"
/(\d+)\s*[–\-]\s*(\d+)\s*(?:characters|chars?|unicode characters)/gi,
// Single number with unit: "300 characters", "2000 characters"
/(?:up to|maximum|max|at most|no more than|exceeds?|at least|minimum|min|no less than)\s+(\d+)\s*(?:characters|chars?|unicode characters)/gi,
// Plain: "N characters" (when preceded by constraint-like context)
/(?:limited to|restricted to|capped at|allow(?:s|ed)?)\s+(\d+)\s*(?:characters|chars?|unicode characters)/gi,
];
// ─── Helpers ───
function escapeRegExp(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
// ─── Canonical Registry ───
/**
* Build a map of Entity.attribute → canonical numeric values from
* 02-domain-model YAML blocks.
*/
function buildCanonicalNumericRegistry(
canonicalFile: FileSectionInput[number],
): Map<string, number[]> {
const registry: Map<string, number[]> = new Map();
for (const sectionsForModule of canonicalFile.sectionEvents) {
for (const sectionEvent of sectionsForModule) {
for (const section of sectionEvent.sectionSections) {
const yamlMatches = section.content.matchAll(/```yaml\n([\s\S]*?)```/g);
for (const match of yamlMatches) {
const yamlContent = match[1] ?? "";
try {
const parsed = YAML.parse(yamlContent);
if (
!parsed ||
typeof parsed !== "object" ||
typeof parsed.entity !== "string" ||
!Array.isArray(parsed.attributes)
)
continue;
for (const attr of parsed.attributes) {
if (!attr || typeof attr.name !== "string") continue;
const constraintStr = String(attr.constraints ?? "");
const numbers = extractAllNumbers(constraintStr);
if (numbers.length === 0) continue;
const key = `${parsed.entity}.${attr.name}`;
registry.set(key, numbers);
}
} catch {
// skip parse errors
}
}
}
}
}
return registry;
}
/**
* Build a reverse index: attribute name → list of Entity.attribute keys. e.g.,
* "bio" → ["User.bio"], "title" → ["Article.title", "Todo.title"]
*/
function buildAttributeNameIndex(
registry: Map<string, number[]>,
): Map<string, string[]> {
const index: Map<string, string[]> = new Map();
for (const key of registry.keys()) {
const dotIdx = key.indexOf(".");
if (dotIdx < 0) continue;
const attrName = key.slice(dotIdx + 1);
if (!index.has(attrName)) index.set(attrName, []);
index.get(attrName)!.push(key);
}
return index;
}
/**
* Extract all integer numbers from a constraint string. "1-50, required" → [1,
* 50] "optional, maximum 2000 characters, may be null" → [2000]
*/
function extractAllNumbers(value: string): number[] {
const nums: Set<number> = new Set();
const matches = value.matchAll(/\d+/g);
for (const m of matches) {
const n = parseInt(m[0], 10);
if (!isNaN(n)) nums.add(n);
}
return [...nums];
}
// ─── Prose Constraint Extraction (Value-Driven) ───
interface IProseMention {
entityAttr: string;
numbers: number[];
context: string;
}
/**
* Value-driven prose constraint extraction.
*
* Instead of finding backtick references first, this approach:
*
* 1. Finds lines with numeric constraint patterns ("N characters", etc.)
* 2. Checks if any canonical attribute name appears on that line
* 3. Compares the numbers against canonical values
*
* This catches all patterns regardless of backtick usage:
*
* - `User.bio`: 0-300 characters
* - `bio` (0-500 chars)
* - Bio text limited to 300 characters
* - | bio | 0-300 chars |
*/
function extractProseConstraintMentions(
proseContent: string,
attrNameIndex: Map<string, string[]>,
registry: Map<string, number[]>,
): IProseMention[] {
const results: IProseMention[] = [];
const lines = proseContent.split("\n");
for (const line of lines) {
// Step 1: Extract constraint-like numbers from this line
const numbers = extractConstraintNumbers(line);
if (numbers.length === 0) continue;
// Step 2: Check if any canonical attribute name appears on this line
for (const [attrName, entityAttrs] of attrNameIndex) {
const attrPattern = new RegExp(`\\b${escapeRegExp(attrName)}\\b`, "i");
if (!attrPattern.test(line)) continue;
// Step 3: Union all canonical values for all possible Entity.attr matches
const allCanonical: Set<number> = new Set();
for (const ea of entityAttrs) {
const vals = registry.get(ea);
if (vals) for (const v of vals) allCanonical.add(v);
}
// Step 4: Find numbers that don't match any canonical value
const conflicting = numbers.filter(
(n) => !allCanonical.has(n) && n !== 0,
);
if (conflicting.length === 0) continue;
results.push({
entityAttr: entityAttrs[0]!,
numbers,
context: line.trim().slice(0, 200),
});
}
}
// Deduplicate: same entityAttr + same numbers → keep first
const seen: Map<string, IProseMention> = new Map();
for (const mention of results) {
const key = `${mention.entityAttr}:${mention.numbers.sort((a, b) => a - b).join(",")}`;
if (!seen.has(key)) seen.set(key, mention);
}
return [...seen.values()];
}
/**
* Extract numbers from constraint-like patterns in text. Only extracts numbers
* that appear in constraint context (near "characters", etc.).
*/
function extractConstraintNumbers(text: string): number[] {
const numbers: Set<number> = new Set();
for (const pattern of NUMERIC_PATTERNS) {
pattern.lastIndex = 0;
const matches = text.matchAll(pattern);
for (const m of matches) {
if (m[1]) {
const n = parseInt(m[1], 10);
if (!isNaN(n)) numbers.add(n);
}
if (m[2]) {
const n = parseInt(m[2], 10);
if (!isNaN(n)) numbers.add(n);
}
}
}
return [...numbers];
}
// ─── Main Detection ───
/**
* Detect prose-level constraint value conflicts between non-canonical files and
* the canonical 02-domain-model.
*
* Uses a value-driven approach: builds a reverse index of canonical attribute
* names, then scans prose text for those names near numeric constraint
* patterns. Catches all patterns regardless of backtick usage.
*/
export const detectProseConstraintConflicts = (props: {
files: FileSectionInput;
}): IProseConstraintConflict[] => {
// Find canonical file (02-domain-model.md)
const canonicalFile = props.files.find(
(f) => f.file.filename === CANONICAL_FILENAME,
);
if (!canonicalFile) return [];
const registry = buildCanonicalNumericRegistry(canonicalFile);
if (registry.size === 0) return [];
const attrNameIndex = buildAttributeNameIndex(registry);
const conflicts: IProseConstraintConflict[] = [];
for (const { file, sectionEvents } of props.files) {
// Skip canonical file itself
if (file.filename === CANONICAL_FILENAME) continue;
for (const sectionsForModule of sectionEvents) {
for (const sectionEvent of sectionsForModule) {
for (const section of sectionEvent.sectionSections) {
// Strip YAML code blocks — those are handled by existing validators
const proseContent = section.content.replace(
YAML_CODE_BLOCK_REGEX,
"",
);
const mentions = extractProseConstraintMentions(
proseContent,
attrNameIndex,
registry,
);
for (const mention of mentions) {
const canonicalValues = registry.get(mention.entityAttr);
if (!canonicalValues) continue;
// Check if prose values conflict with canonical
const conflictingValues = mention.numbers.filter(
(n) => !canonicalValues.includes(n) && n !== 0,
);
if (conflictingValues.length === 0) continue;
conflicts.push({
entityAttr: mention.entityAttr,
canonicalValues,
proseValues: mention.numbers,
file: file.filename,
sectionTitle: section.title,
context: mention.context,
});
}
}
}
}
}
return conflicts;
};
/**
* Build a map from filename → list of prose conflict feedback strings. Only
* non-canonical files appear in the map.
*/
export const buildFileProseConflictMap = (
conflicts: IProseConstraintConflict[],
): Map<string, string[]> => {
const map: Map<string, string[]> = new Map();
for (const conflict of conflicts) {
const feedback =
`Prose constraint conflict: ${conflict.entityAttr} — ` +
`canonical values [${conflict.canonicalValues.join(", ")}] (from ${CANONICAL_FILENAME}) vs ` +
`prose values [${conflict.proseValues.join(", ")}] in "${conflict.sectionTitle}". ` +
`Remove the restated value and use a backtick reference to ${CANONICAL_FILENAME} instead.`;
if (!map.has(conflict.file)) map.set(conflict.file, []);
map.get(conflict.file)!.push(feedback);
}
return map;
};