llm-json-fix
Version:
Fix malformed JSON outputs from Large Language Models (LLMs)
103 lines (102 loc) • 2.99 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.isWhitespace = isWhitespace;
exports.isLineTerminator = isLineTerminator;
exports.isDigit = isDigit;
exports.isAlpha = isAlpha;
exports.isIdentifierChar = isIdentifierChar;
exports.getPositionDetails = getPositionDetails;
exports.formatPosition = formatPosition;
exports.extractMarkdownCodeBlocks = extractMarkdownCodeBlocks;
exports.normalizeQuotes = normalizeQuotes;
exports.containsMarkdown = containsMarkdown;
exports.isLLMStyleOutput = isLLMStyleOutput;
/**
* Checks if a character is a whitespace character
*/
function isWhitespace(char) {
return /\s/.test(char);
}
/**
* Checks if a character is a line terminator
*/
function isLineTerminator(char) {
return char === '\n' || char === '\r';
}
/**
* Checks if a character is a digit
*/
function isDigit(char) {
return /[0-9]/.test(char);
}
/**
* Checks if a character is a letter
*/
function isAlpha(char) {
return /[a-zA-Z]/.test(char);
}
/**
* Checks if a character can be part of a valid identifier
*/
function isIdentifierChar(char) {
return isAlpha(char) || isDigit(char) || char === '_' || char === '$';
}
/**
* Get the position in the text as line:column
*/
function getPositionDetails(text, index) {
const lines = text.slice(0, index).split('\n');
const line = lines.length;
const column = lines[lines.length - 1].length + 1;
return { line, column };
}
/**
* Format a position as a string
*/
function formatPosition(text, index) {
const { line, column } = getPositionDetails(text, index);
return `line ${line}, column ${column}`;
}
/**
* Extracts text that is likely markdown-formatted code blocks
*/
function extractMarkdownCodeBlocks(text) {
const codeBlockRegex = /```(?:json)?([^`]+)```/g;
const matches = text.match(codeBlockRegex);
return matches ? Array.from(matches) : null;
}
/**
* Normalizes different quote styles to standard double quotes
*/
function normalizeQuotes(text) {
// Replace fancy quotes with standard double quotes
return text
.replace(/[""]/g, '"')
.replace(/['']/g, "'");
}
/**
* Checks if text contains common markdown indicators
*/
function containsMarkdown(text) {
const markdownIndicators = [
/```/, // Code blocks
/^#+\s+/m, // Headers
/\*\*.+\*\*/, // Bold
/\*.+\*/, // Italic
/\[.+\]\(.+\)/ // Links
];
return markdownIndicators.some(pattern => pattern.test(text));
}
/**
* Checks if the text appears to be LLM formatted output with natural language
*/
function isLLMStyleOutput(text) {
// Look for patterns common in LLM outputs
const llmPatterns = [
/here(?:'|')?s\s+(?:the|an?|your)\s+(?:json|output|response)/i,
/I(?:'|')?(?:ve|ll|m)\s+(?:generated|created|provided)/i,
/```json/i,
/^\s*[\w\s]+:\s*$/m // Explanatory labels followed by a colon
];
return llmPatterns.some(pattern => pattern.test(text));
}