llm-code-format
Version:
Parsing and serialization of multiple code files in Markdown for LLMs
193 lines (192 loc) • 9.21 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const vitest_1 = require("vitest");
const streamingParser_1 = require("./streamingParser");
const sampleStreams_1 = require("./sampleStreams");
(0, vitest_1.describe)("StreamingMarkdownParser", () => {
let fileNameChanges;
let codeLines;
let nonCodeLines;
let parser;
const callbacks = {
onFileNameChange: async (fileName, format) => {
fileNameChanges.push({ name: fileName, format });
},
onCodeLine: async (line) => {
codeLines.push(line);
},
onNonCodeLine: async (line) => {
nonCodeLines.push(line);
},
};
(0, vitest_1.beforeEach)(() => {
fileNameChanges = [];
codeLines = [];
nonCodeLines = [];
parser = new streamingParser_1.StreamingMarkdownParser(callbacks);
});
(0, vitest_1.it)("should process a complete markdown block in one chunk", async () => {
const input = "**index.html**\n```\n<html>\n</html>\n```\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>"]);
});
(0, vitest_1.it)("should handle multiple chunks with split lines", async () => {
// Simulate splitting a header and code block across chunks
await parser.processChunk("**inde");
await parser.processChunk("x.html**\n```\n<ht");
await parser.processChunk("ml>\n</html>\n```");
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>"]);
});
(0, vitest_1.it)("should process multiple files and code blocks", async () => {
const input = "**index.html**\n```\n<html>\n</html>\n```\n" +
"**styles.css**\n```\nbody { color: blue; }\n```\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
{ name: "styles.css", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>", "body { color: blue; }"]);
});
(0, vitest_1.it)("should handle code fence markers with language specifiers", async () => {
const input = "**script.js**\n```js\nconsole.log('Hello');\n```\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "script.js", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["console.log('Hello');"]);
});
(0, vitest_1.it)("should flush remaining partial lines on flushRemaining", async () => {
// Provide a header line without a trailing newline
await parser.processChunk("**partial.html**");
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "partial.html", format: "Bold Format" },
]);
});
(0, vitest_1.it)("should not trigger file name or code callbacks for irrelevant lines outside code fences", async () => {
// Provide a line that doesn't match any header or code fence
await parser.processChunk("This is an irrelevant line\n");
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([]);
(0, vitest_1.expect)(codeLines).toEqual([]);
(0, vitest_1.expect)(nonCodeLines).toEqual(["This is an irrelevant line"]);
});
(0, vitest_1.it)("should handle nested chunks with header and code block boundaries", async () => {
// Simulate a stream with multiple boundaries and chunk splits
const chunks = [
"**index.html**\n", // Header detected
"```\n<ht", // Start code fence and partial code
"ml>\n</ht", // Continuation of code
"ml>\n```\n", // End code fence
"Some irrelevant line\n", // Irrelevant line outside code fence
"**styles.css**\n", // New header
"```\nbody { color:", // Start second code fence with split code line
" blue; }\n```\n", // End code fence
];
for (const chunk of chunks) {
await parser.processChunk(chunk);
}
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
{ name: "styles.css", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>", "body { color: blue; }"]);
(0, vitest_1.expect)(nonCodeLines).toEqual(["Some irrelevant line"]);
});
(0, vitest_1.it)("should handle bold format with extra text", async () => {
const input = "**index.html** (some commentary)\n```\n<html>\n</html>\n```\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>"]);
});
(0, vitest_1.it)("should handle bold format with parentheses and strip them", async () => {
const input = "**renderArcs.js (New file)**\n```\n// JavaScript content\n```\n" +
"**utils.js (Modified)**\n```\n// More content\n```\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "renderArcs.js", format: "Bold Format" },
{ name: "utils.js", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["// JavaScript content", "// More content"]);
});
(0, vitest_1.it)("should capture all non-code, non-header lines", async () => {
const input = "This is a regular text line\n" +
"**index.html**\n" +
"This is a comment about the file\n" +
"```\n<html>\n</html>\n```\n" +
"Another comment line\n" +
"And one more line\n";
await parser.processChunk(input);
await parser.flushRemaining();
(0, vitest_1.expect)(fileNameChanges).toEqual([
{ name: "index.html", format: "Bold Format" },
]);
(0, vitest_1.expect)(codeLines).toEqual(["<html>", "</html>"]);
(0, vitest_1.expect)(nonCodeLines).toEqual([
"This is a regular text line",
"This is a comment about the file",
"Another comment line",
"And one more line",
]);
});
(0, vitest_1.it)("should process sample streams and match expected files", async () => {
for (const [index, sampleStream] of sampleStreams_1.sampleStreams.entries()) {
// Reset state for each sample stream
const fileContents = {};
let currentFileName = null;
const streamCallbacks = {
onFileNameChange: async (fileName, format) => {
currentFileName = fileName;
if (!fileContents[fileName]) {
fileContents[fileName] = [];
}
},
onCodeLine: async (line) => {
if (currentFileName) {
fileContents[currentFileName].push(line);
}
},
onNonCodeLine: async () => {
// Non-code lines are not part of file contents
},
};
const streamParser = new streamingParser_1.StreamingMarkdownParser(streamCallbacks);
// Process all chunks in the sample stream
for (const chunk of sampleStream.chunks) {
await streamParser.processChunk(chunk);
}
await streamParser.flushRemaining();
// Convert arrays to strings and compare with expected files
const builtFiles = {};
Object.keys(fileContents).forEach((fileName) => {
builtFiles[fileName] = fileContents[fileName].join("\n");
});
// console.log(JSON.stringify(builtFiles, null, 2));
// Check each expected file
Object.keys(sampleStream.expectedFiles).forEach((expectedFileName) => {
// console.log(builtFiles[expectedFileName]);
(0, vitest_1.expect)(builtFiles[expectedFileName]).toBeDefined();
(0, vitest_1.expect)(builtFiles[expectedFileName]).toBe(sampleStream.expectedFiles[expectedFileName]);
});
// Check that no unexpected files are present
Object.keys(builtFiles).forEach((builtFileName) => {
(0, vitest_1.expect)(sampleStream.expectedFiles[builtFileName]).toBeDefined();
});
}
});
});