UNPKG

oparser

Version:

A very forgiving key-value option parser

539 lines (463 loc) 15.7 kB
const { parseJSON } = require('json-alexander') const { ensureWrap } = require('./ensure-wrap') const { replaceInnerCharPattern } = require('./replace-inner') const COMMA = '_COMMA_' // const ARRAY_REGEX = /^\[(.*)\]$/ const ARRAY_REGEX = /^\s*\[([\s\S]*?)\]\s*$/ //const OBJECT_REGEX = /^\{(.*)\}$/ const OBJECT_REGEX = /^\s*\{([\s\S]*?)\}\s*$/ const TRAILING_COMMAS = /,+$/ // https://regex101.com/r/99mkDt/1 old /(?:,*[^\S]*)+?]$/ const TRAILING_ARRAY_COMMAS = /(?:,+[^\S]*)+?](,)*\s*/ // https://regex101.com/r/cy7mLe/4 const TRAILING_OBJECT_COMMAS = /(?:,[^\S]*)+(})(,*)\s*$/ const TRAILING_ARRAY_COMMAS_GLOBAL = /(?:,+[^\S]*)+?](,)*\s*/gm const TRIM_INNER_TRAILING_OBJECT_COMMA = /(,+[^\S]*)*?}\s*/m const HAS_JSON_TOKEN = /[{}\[\]"':,]/ // // Remove trailing object commas // value = value.replace(/(?:,*[^\S]*)*?}(,)*/gm, '}$1') // // Remove trailing array commas // value = value.replace(/(?:,+[^\S]*)+?]\s*$/gm, ']') // console.log(JSON.parse(value)) function isArrayLike(str) { if (typeof str !== 'string') return false return Boolean(ARRAY_REGEX.test(str)) } function isObjectLike(str) { if (typeof str !== 'string') return false return Boolean(OBJECT_REGEX.test(str)) } function replaceDoubleQuotes(input) { return replaceBetweenMarkers(input, /"/g, '__inner_dbl_quote__') } function replaceCommas(input) { return replaceBetweenMarkers(input, /,/g, COMMA) } function replaceBetweenMarkers(input, pattern, replace) { // Match content between __OPEN_JSON__OBJECT__ and __CLOSE_JSON__OBJECT__ const regexMarkers = /(__OPEN_JSON__OBJECT__)([\s\S]*?)(__CLOSE_JSON__OBJECT__)/g // Replace the double quotes within the markers const updatedString = input.replace(regexMarkers, (match, openMarker, content, closeMarker) => { // Replace all double quotes in the content between the markers const updatedContent = content.replace(pattern, replace) // Return the full string with markers and updated content return `${openMarker}${updatedContent}${closeMarker}` }) return updatedString } const CONFLICTING_INNER_JSON_IN_SINGLE = replaceInnerCharPattern("{\\[{", `'`, `'`, 2) const CONFLICTING_INNER_JSON_IN_DOUBLE = replaceInnerCharPattern("{\\[{", `"`, `"`, 2) const CONFLICTING_INNER_JSON_CLOSE_IN_DOUBLE = replaceInnerCharPattern("}\\]}", `"`, `"`, 2) const CONFLICTING_INNER_JSON_CLOSE_IN_SINGLE = replaceInnerCharPattern("}\\]}", `'`, `'`, 2) // const CONFLICTING_INNER_COLONS_SINGLE = replaceInnerCharPattern(":", `'`, `'`, 2) // const CONFLICTING_INNER_COLONS_DOUBLE = replaceInnerCharPattern(":", `"`, `"`, 2) function splitTopLevel(input, delimiter = ',') { const values = [] let quote = '' let escape = false let curlyDepth = 0 let arrayDepth = 0 let parenDepth = 0 let start = 0 for (let i = 0; i < input.length; i++) { const char = input[i] if (quote) { if (escape) { escape = false } else if (char === '\\') { escape = true } else if (char === quote) { quote = '' } continue } if (char === '"' || char === "'" || char === '`') { quote = char continue } if (char === '{') curlyDepth++ else if (char === '}' && curlyDepth) curlyDepth-- else if (char === '[') arrayDepth++ else if (char === ']' && arrayDepth) arrayDepth-- else if (char === '(') parenDepth++ else if (char === ')' && parenDepth) parenDepth-- if ( char === delimiter && curlyDepth === 0 && arrayDepth === 0 && parenDepth === 0 ) { values.push(input.slice(start, i)) start = i + 1 } } values.push(input.slice(start)) return values } function parseArrayString(value) { const inner = value.match(ARRAY_REGEX) if (!inner || typeof inner[1] === 'undefined') { return value } return splitTopLevel(inner[1].replace(TRAILING_COMMAS, '')).map((x) => { return convert(x.trim()) }) } function parseArrayStringByBalance(value) { const inner = value.match(ARRAY_REGEX) if (!inner || !inner[1]) { return value } const composeValue = inner[1] .replace(TRAILING_COMMAS, '') .split(',') .reduce((acc, curr) => { const open = (curr.match(/{/g) || []).length const close = (curr.match(/}/g) || []).length const arrayOpen = (curr.match(/\[/g) || []).length const arrayClose = (curr.match(/\]/g) || []).length acc.objectOpenCount += open acc.objectCloseCount += close acc.arrayOpenCount += arrayOpen acc.arrayCloseCount += arrayClose const sealObject = acc.objectOpenCount > 0 && acc.objectOpenCount === acc.objectCloseCount const sealArray = acc.arrayOpenCount > 0 && acc.arrayOpenCount === acc.arrayCloseCount if (acc.objectOpenCount > 0 && !sealObject || acc.arrayOpenCount > 0 && !sealArray) { acc.next += curr + ',' return acc } if (sealObject || sealArray) { if (sealObject) { acc.objectOpenCount = 0 acc.objectCloseCount = 0 } if (sealArray) { acc.arrayOpenCount = 0 acc.arrayCloseCount = 0 } acc.values.push(acc.next + curr) acc.next = '' return acc } acc.values.push(curr) return acc }, { next: '', values: [], arrayOpenCount: 0, arrayCloseCount: 0, objectOpenCount: 0, objectCloseCount: 0, }) return composeValue.values.map((x) => { return convert(x.trim()) }) } function hasQuotedArrayDelimiter(value) { let quote = '' let escape = false for (let i = 0; i < value.length; i++) { const char = value[i] if (quote) { if (escape) { escape = false } else if (char === '\\') { escape = true } else if (char === quote) { quote = '' } else if (char === ',' || char === '[' || char === ']') { return true } continue } if (char === '"' || char === "'" || char === '`') { quote = char } } return false } function shouldUseArrayFallback(value, parsed) { if (!Array.isArray(parsed) || !hasQuotedArrayDelimiter(value)) { return false } const inner = value.match(ARRAY_REGEX) if (!inner || typeof inner[1] === 'undefined') { return false } const topLevelValues = splitTopLevel(inner[1].replace(TRAILING_COMMAS, '')) return topLevelValues.every((x) => { const item = x.trim() return !isArrayLike(item) && !isObjectLike(item) && item.indexOf('\n') === -1 }) } function normalizeLooseBooleanLiterals(value) { if (!isArrayLike(value) && !isObjectLike(value)) { return value } const booleanLiteralPattern = /\b(?:true|false)\b/ig let booleanLiteral let hasNonLowercaseBoolean = false while ((booleanLiteral = booleanLiteralPattern.exec(value))) { if (booleanLiteral[0] !== booleanLiteral[0].toLowerCase()) { hasNonLowercaseBoolean = true break } } if (!hasNonLowercaseBoolean) { return value } let result = '' let quote = '' let escape = false let previousSignificant = '' for (let i = 0; i < value.length; i++) { const char = value[i] if (quote) { result += char if (escape) { escape = false } else if (char === '\\') { escape = true } else if (char === quote) { quote = '' } continue } if (char === '"' || char === "'" || char === '`') { quote = char result += char continue } const literal = value.slice(i).match(/^(true|false)\b/i) const nextChar = literal ? value[i + literal[0].length] : '' const hasLiteralTerminator = !nextChar || /[\s,\]}]/.test(nextChar) if ( literal && hasLiteralTerminator && (previousSignificant === ':' || previousSignificant === '[' || previousSignificant === ',') ) { result += literal[0].toLowerCase() i += literal[0].length - 1 previousSignificant = literal[0].slice(-1).toLowerCase() continue } result += char if (!/\s/.test(char)) { previousSignificant = char } } return result } function cleanObjectString(value) { return value .replace(/\n/g, '\\n') // .replace(/__INNER_COLON__/g, ':') .replace(/_COMMA_/g, ',') // .replace(/__EQ__/g, '=') .replace(/__inner_dbl_quote__/g, '\\"') .replace(/__OPEN_JSON__OBJECT__/g, '{[{') .replace(/__CLOSE_JSON__OBJECT__/g, '}]}') } function hasLeadingZeroNumberShape(value) { return /^[-+]?0\d/.test(value) } // const CONFLICTING_CURLIES_IN_SINGLE = replaceInnerCharPattern("}", `'`, `'`, 2) // const CONFLICTING_CURLIES_IN_DOUBLE = replaceInnerCharPattern("}", `\\[`, `\\]`, 1) function formatObj(value) { // const hasConflictingInnerColonInSingle = CONFLICTING_INNER_COLONS_SINGLE.test(value) // const hasConflictingInnerColonInDouble = CONFLICTING_INNER_COLONS_DOUBLE.test(value) const hasConflictingInnerJsonInSingle = CONFLICTING_INNER_JSON_IN_SINGLE.test(value) const hasConflictingInnerJsonInDouble = CONFLICTING_INNER_JSON_IN_DOUBLE.test(value) // const hasConflictingCurliesInSingle = CONFLICTING_CURLIES_IN_SINGLE.test(value) // const hasConflictingCurliesInDouble = CONFLICTING_CURLIES_IN_DOUBLE.test(value) const hasConflictingJsonClose = CONFLICTING_INNER_JSON_CLOSE_IN_DOUBLE.test(value) const hasConflictingJsonCloseSingle = CONFLICTING_INNER_JSON_CLOSE_IN_SINGLE.test(value) if (hasConflictingInnerJsonInDouble) { value = value.replace(CONFLICTING_INNER_JSON_IN_DOUBLE, '__OPEN_JSON__OBJECT__') } if (hasConflictingInnerJsonInSingle) { value = value.replace(CONFLICTING_INNER_JSON_IN_SINGLE, '__OPEN_JSON__OBJECT__') } if (hasConflictingJsonClose) { value = value.replace(CONFLICTING_INNER_JSON_CLOSE_IN_DOUBLE, '__CLOSE_JSON__OBJECT__') } if (hasConflictingJsonCloseSingle) { value = value.replace(CONFLICTING_INNER_JSON_CLOSE_IN_SINGLE, '__CLOSE_JSON__OBJECT__') } // if (hasConflictingInnerColonInSingle) { // value = value.replace(CONFLICTING_INNER_COLONS_SINGLE, '__INNER_COLON__') // } // if (hasConflictingInnerColonInDouble) { // value = value.replace(CONFLICTING_INNER_COLONS_DOUBLE, '__INNER_COLON__') // } // console.log('CONFLICTING_INNER_COLONS_SINGLE', CONFLICTING_INNER_COLONS_SINGLE) // console.log('CONFLICTING_INNER_COLONS_DOUBLE', CONFLICTING_INNER_COLONS_DOUBLE) /* console.log('BEFORE', value) console.log('───────────────────────────────') /** */ const inner = replaceDoubleQuotes( replaceCommas( value.replace(/^{|}$/g, '') ) ) const kvs = inner.split(',').map((x) => x.trim()) /* console.log('inner', inner) console.log('kvs', kvs) /** */ const splitter = ':' const newObjectString = kvs.reduce((acc, c, i) => { const curr = c.trim() /* console.log('curr', curr) /** */ const comma = (kvs.length - 1 === i) ? '' : ',\n' if (isObjectLike(curr)) { acc += `{` + formatObj(curr) + `}${comma}` return acc } // const splitter = curr.match(/^([A-Z-a-z0-9]*)__INNER_COLON__/) ? '__INNER_COLON__' : ':' const parts = curr.split(splitter) /* console.log('splitter', splitter) console.log(`parts ${parts.length}`, parts) /** */ // if (parts.length !== 2) { // return acc // } /* Pop off key */ const k = parts.shift().trim() // console.log('k', k) /* Join the rest of the parts */ const value = parts.join(splitter).trim() const v = cleanObjectString(value) /* console.log('New value', value) console.log('Cleaned value', v) /** */ // console.log('v', v) acc += `${ensureWrap(k, '"')}: ${ensureWrap(v, '"')}${comma}` // console.log('parts', parts) return acc }, '') /* console.log('newObjectString', newObjectString) /** */ return newObjectString } function convert(value) { /* console.log('convert value', value) console.log('convert type', typeof value) /** */ if (/^false$/i.test(value)) { return false } if (/^true$/i.test(value)) { return true } if (value === 'null') { return null } // Empty or whitespace-only strings should stay as-is, not become 0 if (value === '' || /^\s+$/.test(value)) { return value } if (hasLeadingZeroNumberShape(value)) { return value } const isNumber = Number(value) if (typeof isNumber === 'number' && !isNaN(isNumber)) { return isNumber } // remove double escaped quotes if (value.indexOf('\\"') > -1) { value = value.replace(/\\"/g, '\"') // .replace(/\\\"/g, '\"') } /* Skip loose JSON parser when value has no JSON structural characters. parseJSON only changes plain strings if they're whitespace-padded booleans, which the regex checks above already handled for trimmed values. */ if (!HAS_JSON_TOKEN.test(value)) { return value } let cleaner try { /* First try simple parse */ return JSON.parse(value) } catch (e) { /* Then try deeper parse */ try { /* Remove trailing commas */ // console.log('value', value) if (isArrayLike(value) || isObjectLike(value)) { /* Clean JSON lines */ const cleanLines = value .split('\n') .map((line) => { return line .replace(TRIM_INNER_TRAILING_OBJECT_COMMA, '}') // .replace(TRAILING_OBJECT_COMMAS, '}$1') .replace(TRAILING_ARRAY_COMMAS, ']') }).join('\n') // console.log('cleanLines', cleanLines) // console.log('isArrayLike', value) /* Trim trailing commas in arrays ,] */ value = cleanLines /* Trim trailing commas in object ,}, */ .replace(TRAILING_OBJECT_COMMAS, '}') /* Trim trailing commas in arrays ,] */ .replace(TRAILING_ARRAY_COMMAS_GLOBAL, ']') // trim inner newlines // .replace(/(,*[^\S]*)*?},\n*\s*/g, '},') cleaner = value // console.log('cleaner', value) } // console.log('value', value) value = normalizeLooseBooleanLiterals(value) const val = parseJSON(value) // last attempt to format an object like { one: two } if (isArrayLike(value) && shouldUseArrayFallback(value, val)) { return parseArrayString(value) } // console.log('Do it', val) return val } catch (err) { // console.log('parse error', err) // console.log('json val', value) /* Convert object looking string into values */ if (isObjectLike(value) && value.indexOf(':') > -1) { // console.log('isObjectLike value', cleaner) const newObjectString = formatObj(cleaner || value) if (!newObjectString) { throw new Error('Could not parse object') } // console.log('newObjectString', newObjectString) const objToTry =`{ ${newObjectString.replace(/,$/, '')} }` /* console.log('objToTry', objToTry) /** */ return parseJSON(objToTry) } /* Convert array looking string into values */ if (typeof value === 'string' && ARRAY_REGEX.test(value)) { return parseArrayStringByBalance(value) } } /* Fix fallthrough strings remove surrounding strings if (value.startsWith('"') && value.endsWith('"')) { return value.replace(/^"|"$/g, '') } if (value.startsWith("'") && value.endsWith("'")) { return value.replace(/^'|'$/g, '') } */ } return value } module.exports = { convert, isObjectLike, isArrayLike, parseJSON, COMMA }