scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
134 lines (133 loc) • 4.29 kB
JavaScript
// File: src/modules/cleanupModule.ts
import chalk from "chalk";
/** --- Helper: detect noise at top/bottom of content --- */
function isTopOrBottomNoise(line) {
const trimmed = line.trim();
if (/^```(?:\w+)?$/.test(trimmed))
return true;
if (/^<!--.*-->$/.test(trimmed))
return true;
const lower = trimmed.toLowerCase();
if (!trimmed.startsWith("//") && !trimmed.startsWith("/*")) {
return [
/^i\s/i,
/^here/,
/^this/,
/^the following/,
/^below/,
/^in this/,
/^we have/,
/the code above/,
/ensures that/,
/it handles/,
/used to/,
/note that/,
/example/,
/summary/,
/added comments/,
].some((pattern) => pattern.test(lower));
}
return false;
}
/** Extract first object slice { ... } */
function extractObject(text) {
const start = text.indexOf("{");
const end = text.lastIndexOf("}");
return start !== -1 && end !== -1 && end > start ? text.slice(start, end + 1) : null;
}
/** Extract first array slice [ ... ] */
function extractArray(text) {
const start = text.indexOf("[");
const end = text.lastIndexOf("]");
return start !== -1 && end !== -1 && end > start ? text.slice(start, end + 1) : null;
}
/** Try parsing with fallback slice logic */
function parseJsonWithFallback(content) {
const trimmed = content.trim();
// --- 1) JSON Array strategy ---
if (trimmed.startsWith("[")) {
try {
return JSON.parse(trimmed);
}
catch {
const slice = extractArray(trimmed);
if (slice) {
try {
return JSON.parse(slice);
}
catch { }
}
}
}
// --- 2) JSON Object strategy ---
if (trimmed.startsWith("{")) {
try {
return JSON.parse(trimmed);
}
catch {
const slice = extractObject(trimmed);
if (slice) {
try {
return JSON.parse(slice);
}
catch { }
}
}
}
// --- 3) Fallback: if mixed text, extract first object ---
const fallbackObj = extractObject(content);
if (fallbackObj) {
try {
return JSON.parse(fallbackObj);
}
catch { }
}
return null;
}
/** --- Module --- */
export const cleanupModule = {
name: "cleanup",
description: "Removes markdown fences, fluff, and reasoning text; extracts valid JSON if possible.",
groups: ["transform"],
async run(input) {
// --- Normalize input ---
let content = typeof input.content === "string"
? input.content
: JSON.stringify(input.content ?? "");
content = content.replace(/\r\n/g, "\n");
// --- Trim top/bottom noise ---
let lines = content.split("\n");
while (lines.length && (lines[0].trim() === "" || isTopOrBottomNoise(lines[0])))
lines.shift();
while (lines.length && (lines[lines.length - 1].trim() === "" || isTopOrBottomNoise(lines[lines.length - 1]))) {
lines.pop();
}
content = lines.join("\n");
// --- Strip markdown fences, comments, thinking tags ---
content = content
.replace(/```(?:json)?/gi, "")
.replace(/```/g, "")
.replace(/<!--.*?-->/gs, "")
.replace(/<think>[\s\S]*?<\/think>/gi, "")
.trim();
// If no JSON markers at all → treat as plain cleaned text
if (!content.includes("{") && !content.includes("[")) {
return { query: input.query, data: content };
}
// --- Parse JSON using simplified strategy ---
const parsed = parseJsonWithFallback(content);
if (parsed !== null) {
return {
query: input.query,
content,
data: parsed,
};
}
console.warn(chalk.red("[cleanupModule] Failed to parse JSON — returning raw content."));
return {
query: input.query,
content,
data: content,
};
},
};