UNPKG

@limetech/lime-elements

Version:
88 lines (87 loc) 2.87 kB
/** * Tokenize a text fragment for syntax highlighting. * Returns the original text as a single plain token when the * language is not supported. * * @param text - the text to tokenize * @param language - the language identifier (e.g. "json") * @returns array of syntax tokens */ export function tokenize(text, language) { if (!language || text.length === 0) { return [{ value: text, type: 'plain' }]; } if (language === 'json') { return tokenizeJson(text); } return [{ value: text, type: 'plain' }]; } // ─── JSON tokenizer ───────────────────────────────────────────────── /** * Regex-based JSON tokenizer. * Handles partial lines (individual lines of a JSON document). */ const JSON_PATTERNS = [ // String literals (keys and values) [/"(?:[^"\\]|\\.)*"/, 'string'], // Numbers [/-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/, 'number'], // Booleans [/\b(?:true|false)\b/, 'boolean'], // Null [/\bnull\b/, 'null'], // Punctuation [/[{}[\]:,]/, 'punctuation'], ]; const JSON_REGEX = new RegExp(JSON_PATTERNS.map(([re]) => `(${re.source})`).join('|'), 'g'); function tokenizeJson(text) { const tokens = []; let lastIndex = 0; JSON_REGEX.lastIndex = 0; let match = JSON_REGEX.exec(text); while (match !== null) { // Plain text before this match if (match.index > lastIndex) { tokens.push({ value: text.slice(lastIndex, match.index), type: 'plain', }); } // Determine which capture group matched const tokenType = getMatchedTokenType(match); const value = match[0]; // Distinguish JSON keys from string values: // A key is a string followed by optional whitespace and a colon if (tokenType === 'string') { const afterMatch = text.slice(match.index + value.length); if (/^\s*:/.test(afterMatch)) { tokens.push({ value, type: 'key' }); } else { tokens.push({ value, type: 'string' }); } } else { tokens.push({ value, type: tokenType }); } lastIndex = match.index + value.length; match = JSON_REGEX.exec(text); } // Remaining plain text if (lastIndex < text.length) { tokens.push({ value: text.slice(lastIndex), type: 'plain' }); } return tokens; } /** * Determine which pattern matched by checking capture groups. * @param match - the regex match result */ function getMatchedTokenType(match) { for (const [index, [, type]] of JSON_PATTERNS.entries()) { if (match[index + 1] !== undefined) { return type; } } return 'plain'; }