@yoannchb/wtf-json
Version:
Parse any kind of broken json for scrapping easily
3 lines (2 loc) • 5.12 kB
JavaScript
!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).wtfJson=t()}(this,(function(){"use strict";class e{getErrorMessage(e){return`"${e.value}" is not valid JSON\nline: ${e.startLine}, column: ${e.startColumn}`}formatStr(e){const t=/(`|'|")/.test(e.charAt(0))&&/(`|'|")/.test(e.charAt(e.length-1))?e.substring(1,e.length-1):e;return t.replace(/\\(.)/g,(function(e,t){return"n"===t?"\n":"r"===t?"\r":"t"===t?"\t":"b"===t?"\b":"f"===t?"\f":"v"===t?"\v":t}))}appendBoolean(e){return{type:"BOOLEAN",value:"TRUE_BOOLEAN"===e.type}}appendNullValue(e){return{type:"NULL_VALUE",value:null}}appendUndefinedValue(e){return{type:"UNDEFINED_VALUE",value:void 0}}appendNaNValue(e){return{type:"NAN_VALUE",value:NaN}}appendString(e){return{type:"STRING",value:this.formatStr(e.value)}}appendArray(e){return{type:"ARRAY",properties:[]}}appendObject(e){return{type:"OBJECT",properties:[]}}appendNumber(e){return{type:"NUMBER",value:Number(e.value)}}appendKey(e){return{type:"OBJECT_KEY",name:this.formatStr(e.value),value:null}}buildAST(e){const t={type:"JSON",properties:[]},n=[t];for(let t=0;t<e.length;++t){const s=e[t],r=n[n.length-1],p=e=>{"value"in r?r.value=e:r.properties.push(e)};if("TRUE_BOOLEAN"===s.type||"FALSE_BOOLEAN"===s.type)p(this.appendBoolean(s));else if("NULL"===s.type)p(this.appendNullValue(s));else if("UNDEFINED"===s.type)p(this.appendUndefinedValue(s));else if("NAN"===s.type)p(this.appendNaNValue(s));else if("NUMBER"===s.type)p(this.appendNumber(s));else if("STRING"===s.type||"UNKNOWN"===s.type)if("OBJECT_KEY"===r.type&&null!==r.value&&n.pop(),"OBJECT"===r.type){const e=this.appendKey(s);p(e),n.push(e)}else p(this.appendString(s));else if("START_BRACKET"===s.type){const e=this.appendArray(s);p(e),n.push(e)}else if("START_BRACE"===s.type){const e=this.appendObject(s);p(e),n.push(e)}else"END_BRACKET"===s.type&&1!==n.length?n.pop():"END_BRACE"===s.type&&1!==n.length?("OBJECT_KEY"===r.type&&n.pop(),n.pop()):"COMA"===s.type?"OBJECT_KEY"===r.type&&n.pop():s.type;s.type}return t}}const t={defaultType:"UNKNOWN",concatDefaultType:!0};class n{constructor(e){if(!e||!e.tokens)throw new Error('Invalide options: "tokens" is required');this.options=Object.assign({},t,e)}getDefaultType(){return this.options.defaultType}getTokens(){return this.options.tokens}getTokensName(){return[...Object.keys(this.options.tokens),this.getDefaultType()]}matcher(e,t,n){n.lastIndex=0;const s=n.exec(e);return s?{type:t,value:s[0],groups:s.groups,index:s.index}:null}tokenize(e){if("string"!=typeof e)return[];const t=[],n=this.getDefaultType(),s=this.options.concatDefaultType,r=Object.entries(this.options.tokens);let p=0,o=0,i=0;for(;p<e.length;){const l=e.slice(p);let a=null;for(const[e,t]of r){t.lastIndex=0;const n=t.exec(l);if(n&&0===n.index){a={type:e,value:n[0],groups:n.groups,index:0};break}}a||(a={type:n,value:l[0],index:0});const u=a.value,h=u.split("\n"),c=o,E=i;let d=o,N=i;h.length>1?(d=o+h.length-1,N=h[h.length-1].length):N=i+u.length,o=d,i=N;let y={type:a.type,value:a.value,...a.groups?{groups:a.groups}:{},startLine:c,startColumn:E,endLine:d,endColumn:N};if(y=this.options.callback?this.options.callback(y,t):y,p+=u.length,null===y)continue;if(!this.options.authorizeAdditionalTokens&&!this.getTokensName().includes(y.type))throw new Error(`Unknown token type returned by the callback: "${y.type}" (If you want to allow other types please set authorizeAdditionalTokens to true)`);const T=y.type===n;if(s&&T&&0!==t.length&&t[t.length-1].type===n){const e=t[t.length-1];e.value+=y.value,e.endColumn=y.endColumn,e.endLine=y.endLine}else t.push(y)}return t}}n.BUILT_IN_RULES={WORD:/\w+/,NUMBER:/\d+(?:\.\d+)?/,ONE_LINE_COMMENT:/\/\/.*/,MULTIPLE_LINE_COMMENT:/\/\*[\s\S]*?\*\//,STRING:/("|'|`)(?<content>(?:\\\1|.)*?)\1/,DOUBLE_QUOTE_STRING:/(")(?<content>(?:\\\1|.)*?)\1/,SINGLE_QUOTE_STRING:/(')(?<content>(?:\\\1|.)*?)\1/,GRAVE_ACCENT_STRING:/(`)(?<content>(?:\\\1|.)*?)\1/,WHITE_SPACES:/\s+/,NEW_LINES:/\n+/};const s={STRING:/^("|'|`)(?:\\\1|.|\n)*?\1/,NUMBER:/^-?(?:\d+(?:\.\d*)?|\.\d+)/,WHITE_SPACE:/^\s+/,COMA:/^,/,COLON:/^:/,TRUE_BOOLEAN:/^true/,FALSE_BOOLEAN:/^false/,NULL:/^null/,UNDEFINED:/^undefined/,NAN:/^NaN/,START_BRACKET:/^\[/,END_BRACKET:/^\]/,START_BRACE:/^\{/,END_BRACE:/^\}/};const r=new class{constructor(){this.tokenizer=new n({tokens:s}),this.astBuilder=new e}parse(e){e=String(e);const t=this.tokenizer.tokenize(e),n=this.astBuilder.buildAST(t);return n.properties.length>1?n.properties.map((e=>this.parseASTBranch(e))):n.properties.length>0?this.parseASTBranch(n.properties[0]):void 0}parseASTBranch(e){return"OBJECT"===e.type?this.parseObject(e):"ARRAY"===e.type?this.parseArray(e):this.parsePrimitif(e)}parseArray(e){const t=[];for(const n of e.properties)t.push(this.parseASTBranch(n));return t}parseObject(e){const t={};for(const n of e.properties)"OBJECT_KEY"===n.type&&(t[n.name]=null===n.value?null:this.parseASTBranch(n.value));return t}parsePrimitif(e){return e.value}};return r.parse.bind(r)}));
//# sourceMappingURL=index.js.map