llm-json-fix
Version:
Fix malformed JSON outputs from Large Language Models (LLMs)
1 lines • 6.66 kB
JavaScript
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t():"function"==typeof define&&define.amd?define([],t):"object"==typeof exports?exports.LLMJSONFix=t():e.LLMJSONFix=t()}(this,(()=>(()=>{"use strict";var e={d:(t,n)=>{for(var r in n)e.o(n,r)&&!e.o(t,r)&&Object.defineProperty(t,r,{enumerable:!0,get:n[r]})},o:(e,t)=>Object.prototype.hasOwnProperty.call(e,t),r:e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})}},t={};e.r(t),e.d(t,{AmbiguousRepairError:()=>o,BufferLimitExceededError:()=>i,LLMJSONFixError:()=>n,UnrepairableJSONError:()=>r,fixLLMJson:()=>p});class n extends Error{constructor(e,t){super(e),this.position=t,this.name="LLMJSONFixError",Error.captureStackTrace&&Error.captureStackTrace(this,n)}}class r extends n{constructor(e,t){super(e,t),this.name="UnrepairableJSONError"}}class o extends n{constructor(e,t){super(e,t),this.name="AmbiguousRepairError"}}class i extends n{constructor(e,t){super(e,t),this.name="BufferLimitExceededError"}}function s(e){const t=e.match(/```(?:json)?\s*\n([\s\S]*?)\n```/);if(t&&t[1])return t[1].trim();const n=e.match(/```(?:\w*)?\s*\n([\s\S]*?)\n```/);if(n&&n[1]){const e=n[1].trim();if(/^\s*[{[]/.test(e))return e}return null}function l(e){let t=e;const n=s(t);return t=n||function(e){return e.replace(/^```json\s*\n/gm,"").replace(/\n```\s*$/gm,"").replace(/^```\s*\n/gm,"")}(t),t=function(e){return e.replace(/\/\/.*$/gm,"").replace(/\/\*[\s\S]*?\*\//g,"").replace(/\(\s*Note:.*?\)/g,"").replace(/\[\s*Note:.*?\]/g,"")}(t),t=function(e){const t=s(e);if(t)return t;const n=e.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);return n&&n[1]?n[1]:e}(t),t=function(e){let t=e;return t=t.replace(/,\s*\.\.\.(\s*])/g,"$1"),t=t.replace(/,\s*\.\.\.(\s*})/g,"$1"),t=t.replace(/,\s*["']\.\.\.["'](\s*[\]}])/g,"$1"),t=t.replace(/,\s*\.\.\.$/gm,""),t}(t),t=function(e){return e.replace(/"{/g,"{").replace(/}"/g,"}").replace(/"\[/g,"[").replace(/\]"/g,"]").replace(/\\"/g,'"').replace(/\\'/g,"'")}(t),t=function(e){const t=function(e){const t=[];let n=!1,r=!1;for(let o=0;o<e.length;o++){const i=e[o];if(r)r=!1;else if("\\"!==i)if('"'!==i||n){if('"'===i&&n)n=!1;else if(!n)if("{"===i||"["===i)t.push({char:i,position:o});else if("}"===i){if(!(t.length>0&&"{"===t[t.length-1].char))return e.substring(0,o);t.pop()}else if("]"===i){if(!(t.length>0&&"["===t[t.length-1].char))return e.substring(0,o);t.pop()}}else n=!0;else r=!0}if(t.length>0){let n=e;for(let e=t.length-1;e>=0;e--)n+="{"===t[e].char?"}":"]";return n}return e}(e);return t!==e?t:e.replace(/,\s*"[^"]*$/g,"").replace(/,\s*'[^']*$/g,"").replace(/,(\s*[\]}])/g,"$1")}(t),t}function c(e){return/\s/.test(e)}function u(e){return"\n"===e||"\r"===e}function f(e){return/[0-9]/.test(e)}function a(e){return/[a-zA-Z]/.test(e)}function g(e,t){const{line:n,column:r}=function(e,t){const n=e.slice(0,t).split("\n");return{line:n.length,column:n[n.length-1].length+1}}(e,t);return`line ${n}, column ${r}`}function p(e,t={}){const{applyModelSpecificFixes:n=!0,model:o="general",preserveComments:i=!1,verbose:s=!1}=t;let p=e;return n&&(p=l(e)),function(e,t={}){if(""===e)return"";const{preserveComments:n=!1,verbose:o=!1}=t;let i=0,s="";const l=new Set;let p=0;const m=[];let h=!1;const d=[];function b(e){o&&d.push({index:i,message:e})}function v(){let t=i+1;for(;t<e.length&&c(e[t]);)t++;return t>=e.length?null:e[t]}function S(e){const t=e.slice(1,-1);let n=!1,r='"';for(let e=0;e<t.length;e++){const o=t[e];n?("'"!==o&&'"'!==o&&"\\"!==o&&"/"!==o&&(r+="\\"),r+=o,n=!1):"\\"!==o?('"'===o&&(r+="\\"),r+=o):(n=!0,r+="\\")}return r+='"',r}for(;i<e.length;){if(l.has(i))throw new r(`Infinite loop detected at ${g(e,i)}`,i);l.add(i);const t=e[i];if(h){if("\\"===t){if(i+1<e.length){s+=t+e[i+1],i+=2;continue}s+='"',b("Added missing closing double quote at end of text"),h=!1,i++;continue}if('"'===t){s+=t,h=!1,i++;continue}s+=t,i++}else if(c(t))i++,u(t)&&(s+=p>0?"\n"+" ".repeat(p):"\n");else{if("/"===t&&i+1<e.length){const t=e[i+1];if("/"===t){if(n){for(;i<e.length&&!u(e[i]);)s+=e[i],i++;b("Preserved single-line comment");continue}for(b("Removing single-line comment"),i+=2;i<e.length&&!u(e[i]);)i++;continue}if("*"===t){if(n){for(s+="/*",i+=2;i+1<e.length&&("*"!==e[i]||"/"!==e[i+1]);)s+=e[i],i++;i+1<e.length&&(s+="*/",i+=2),b("Preserved multi-line comment");continue}for(b("Removing multi-line comment"),i+=2;i+1<e.length&&("*"!==e[i]||"/"!==e[i+1]);)i++;i+=2;continue}}if("{"!==t)if("["!==t)if("}"!==t&&"]"!==t)if(","!==t)if('"'!==t&&"'"!==t)if(":"!==t)if(f(t)||"-"===t||"+"===t||"."===t){let t="";for(;i<e.length&&(f(e[i])||"-"===e[i]||"+"===e[i]||"."===e[i]||"e"===e[i]||"E"===e[i]);)t+=e[i],i++;if(/^-?\d+(\.\d+)?([eE][+-]?\d+)?$/.test(t))s+=t;else{b("Fixing invalid number format");try{const e=parseFloat(t);isNaN(e)?(s+="0",b("Replaced invalid number with 0")):s+=JSON.stringify(e)}catch(e){s+="0",b("Replaced invalid number with 0")}}}else if(a(t)){const t=e.substring(i);if(/^true/i.test(t)){s+="true",i+=t.match(/^true/i)[0].length,b("Normalized to lowercase true");continue}if(/^false/i.test(t)){s+="false",i+=t.match(/^false/i)[0].length,b("Normalized to lowercase false");continue}if(/^null/i.test(t)){s+="null",i+=t.match(/^null/i)[0].length,b("Normalized to lowercase null");continue}if(/^none/i.test(t)){s+="null",i+=t.match(/^none/i)[0].length,b("Converted Python None to null");continue}if("object"===m[m.length-1]){let t="";const n=i;for(;i<e.length&&(a(e[i])||f(e[i])||"_"===e[i]);)t+=e[i],i++;for(;i<e.length&&c(e[i]);)i++;if(i<e.length&&":"===e[i]){s+=`"${t}":`,b("Added quotes around property name"),i++;continue}i=n}b("Skipping unrecognized token"),i++}else{if("="===t){if("="!==v()){s+=":",b("Replaced = with :"),i++;continue}}i++}else s+=t,i++;else{const n=i,r=t;i++;let o=i,l=!1;for(;o<e.length;)if("\\"!==e[o]){if(e[o]===r&&!l)break;l=!1,o++}else l=!l,o++;if(o<e.length){const t=e.substring(n,o+1);if("'"===r){s+=S(t),b("Converted single quotes to double quotes in string")}else s+=t;i=o+1;continue}h=!0,"'"===r?(s+='"',b("Converted single quote to double quote and treating as unclosed string")):s+=t,i++}else{const e=v();if("}"===e||"]"===e){b("Removing trailing comma"),i++;continue}s+=t,i++}else{if(0===m.length){b("Removing unmatched closing "+("}"===t?"curly brace":"square bracket")),i++;continue}const e="object"===m.pop()?"}":"]";t!==e?(b(`Replacing ${t} with ${e}`),s+=e):s+=t,p--,i++}else m.push("array"),s+=t,p++,i++;else m.push("object"),s+=t,p++,i++}}for(;m.length>0;){const e="object"===m.pop()?"}":"]";s+=e,b(`Added missing closing ${e}`)}return s}(p,{preserveComments:i,verbose:s})}return t})()));