UNPKG

llm-json-fix

Version:

Fix malformed JSON outputs from Large Language Models (LLMs)

90 lines (89 loc) 2.52 kB
/** * Checks if a character is a whitespace character */ export function isWhitespace(char) { return /\s/.test(char); } /** * Checks if a character is a line terminator */ export function isLineTerminator(char) { return char === '\n' || char === '\r'; } /** * Checks if a character is a digit */ export function isDigit(char) { return /[0-9]/.test(char); } /** * Checks if a character is a letter */ export function isAlpha(char) { return /[a-zA-Z]/.test(char); } /** * Checks if a character can be part of a valid identifier */ export function isIdentifierChar(char) { return isAlpha(char) || isDigit(char) || char === '_' || char === '$'; } /** * Get the position in the text as line:column */ export function getPositionDetails(text, index) { const lines = text.slice(0, index).split('\n'); const line = lines.length; const column = lines[lines.length - 1].length + 1; return { line, column }; } /** * Format a position as a string */ export function formatPosition(text, index) { const { line, column } = getPositionDetails(text, index); return `line ${line}, column ${column}`; } /** * Extracts text that is likely markdown-formatted code blocks */ export function extractMarkdownCodeBlocks(text) { const codeBlockRegex = /```(?:json)?([^`]+)```/g; const matches = text.match(codeBlockRegex); return matches ? Array.from(matches) : null; } /** * Normalizes different quote styles to standard double quotes */ export function normalizeQuotes(text) { // Replace fancy quotes with standard double quotes return text .replace(/[""]/g, '"') .replace(/['']/g, "'"); } /** * Checks if text contains common markdown indicators */ export function containsMarkdown(text) { const markdownIndicators = [ /```/, // Code blocks /^#+\s+/m, // Headers /\*\*.+\*\*/, // Bold /\*.+\*/, // Italic /\[.+\]\(.+\)/ // Links ]; return markdownIndicators.some(pattern => pattern.test(text)); } /** * Checks if the text appears to be LLM formatted output with natural language */ export function isLLMStyleOutput(text) { // Look for patterns common in LLM outputs const llmPatterns = [ /here(?:'|')?s\s+(?:the|an?|your)\s+(?:json|output|response)/i, /I(?:'|')?(?:ve|ll|m)\s+(?:generated|created|provided)/i, /```json/i, /^\s*[\w\s]+:\s*$/m // Explanatory labels followed by a colon ]; return llmPatterns.some(pattern => pattern.test(text)); }