UNPKG

ink

Version:
316 lines 12.3 kB
const bellCharacter = '\u0007'; const escapeCharacter = '\u001B'; const stringTerminatorCharacter = '\u009C'; const csiCharacter = '\u009B'; const oscCharacter = '\u009D'; const dcsCharacter = '\u0090'; const pmCharacter = '\u009E'; const apcCharacter = '\u009F'; const sosCharacter = '\u0098'; const isCsiParameterCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x30 && codePoint <= 0x3f; }; const isCsiIntermediateCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x20 && codePoint <= 0x2f; }; const isCsiFinalCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x40 && codePoint <= 0x7e; }; const isEscapeIntermediateCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x20 && codePoint <= 0x2f; }; const isEscapeFinalCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x30 && codePoint <= 0x7e; }; const isC1ControlCharacter = (character) => { const codePoint = character.codePointAt(0); return codePoint !== undefined && codePoint >= 0x80 && codePoint <= 0x9f; }; // Standards references: // ECMA-48 control functions and CSI byte classes: https://ecma-international.org/publications-and-standards/standards/ecma-48/ // xterm CSI parameter/intermediate/final format notes: https://invisible-island.net/xterm/ecma-48-parameter-format.html // xterm/OSC BEL termination behavior: https://davidrg.github.io/ckwin/dev/ctlseqs.html const readCsiSequence = (text, fromIndex) => { let index = fromIndex; while (index < text.length) { const character = text[index]; if (!isCsiParameterCharacter(character)) { break; } index++; } const parameterString = text.slice(fromIndex, index); const intermediateStartIndex = index; while (index < text.length) { const character = text[index]; if (!isCsiIntermediateCharacter(character)) { break; } index++; } const intermediateString = text.slice(intermediateStartIndex, index); const finalCharacter = text[index]; if (finalCharacter === undefined || !isCsiFinalCharacter(finalCharacter)) { return undefined; } return { endIndex: index + 1, parameterString, intermediateString, finalCharacter, }; }; const findControlStringTerminatorIndex = (text, fromIndex, allowBellTerminator) => { for (let index = fromIndex; index < text.length; index++) { const character = text[index]; if (allowBellTerminator && character === bellCharacter) { return index + 1; } if (character === stringTerminatorCharacter) { return index + 1; } if (character === escapeCharacter) { const followingCharacter = text[index + 1]; // Tmux escapes ESC bytes in payload as ESC ESC. if (followingCharacter === escapeCharacter) { index++; continue; } if (followingCharacter === '\\') { return index + 2; } } } return undefined; }; const readEscapeSequence = (text, fromIndex) => { let index = fromIndex; while (index < text.length) { const character = text[index]; if (!isEscapeIntermediateCharacter(character)) { break; } index++; } const intermediateString = text.slice(fromIndex, index); const finalCharacter = text[index]; if (finalCharacter === undefined || !isEscapeFinalCharacter(finalCharacter)) { return undefined; } return { endIndex: index + 1, intermediateString, finalCharacter, }; }; // Centralize control-string rules so ESC and C1 paths do not diverge. const getControlStringFromEscapeIntroducer = (character) => { switch (character) { case ']': { return { type: 'osc', allowBellTerminator: true }; } case 'P': { return { type: 'dcs', allowBellTerminator: false }; } case '^': { return { type: 'pm', allowBellTerminator: false }; } case '_': { return { type: 'apc', allowBellTerminator: false }; } case 'X': { return { type: 'sos', allowBellTerminator: false }; } default: { return undefined; } } }; const getControlStringFromC1Introducer = (character) => { switch (character) { case oscCharacter: { return { type: 'osc', allowBellTerminator: true }; } case dcsCharacter: { return { type: 'dcs', allowBellTerminator: false }; } case pmCharacter: { return { type: 'pm', allowBellTerminator: false }; } case apcCharacter: { return { type: 'apc', allowBellTerminator: false }; } case sosCharacter: { return { type: 'sos', allowBellTerminator: false }; } default: { return undefined; } } }; export const hasAnsiControlCharacters = (text) => { if (text.includes(escapeCharacter)) { return true; } for (const character of text) { if (isC1ControlCharacter(character)) { return true; } } return false; }; const malformedFromIndex = (tokens, text, textStartIndex, fromIndex) => { if (fromIndex > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, fromIndex) }); } // Treat the remainder as invalid so callers can drop it as one unsafe unit. tokens.push({ type: 'invalid', value: text.slice(fromIndex) }); return tokens; }; export const tokenizeAnsi = (text) => { if (!hasAnsiControlCharacters(text)) { return [{ type: 'text', value: text }]; } const tokens = []; let textStartIndex = 0; for (let index = 0; index < text.length;) { const character = text[index]; if (character === undefined) { break; } if (character === escapeCharacter) { const followingCharacter = text[index + 1]; if (followingCharacter === undefined) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (followingCharacter === '[') { const csiSequence = readCsiSequence(text, index + 2); if (csiSequence === undefined) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: 'csi', value: text.slice(index, csiSequence.endIndex), parameterString: csiSequence.parameterString, intermediateString: csiSequence.intermediateString, finalCharacter: csiSequence.finalCharacter, }); index = csiSequence.endIndex; textStartIndex = index; continue; } const escapeControlString = getControlStringFromEscapeIntroducer(followingCharacter); if (escapeControlString !== undefined) { const controlStringTerminatorIndex = findControlStringTerminatorIndex(text, index + 2, escapeControlString.allowBellTerminator); if (controlStringTerminatorIndex === undefined) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: escapeControlString.type, value: text.slice(index, controlStringTerminatorIndex), }); index = controlStringTerminatorIndex; textStartIndex = index; continue; } const escapeSequence = readEscapeSequence(text, index + 1); if (escapeSequence === undefined) { // Incomplete escape sequences with intermediates are malformed control strings. if (isEscapeIntermediateCharacter(followingCharacter)) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } // Ignore lone ESC and continue tokenizing the rest. index++; textStartIndex = index; continue; } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: 'esc', value: text.slice(index, escapeSequence.endIndex), intermediateString: escapeSequence.intermediateString, finalCharacter: escapeSequence.finalCharacter, }); index = escapeSequence.endIndex; textStartIndex = index; continue; } if (character === csiCharacter) { const csiSequence = readCsiSequence(text, index + 1); if (csiSequence === undefined) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: 'csi', value: text.slice(index, csiSequence.endIndex), parameterString: csiSequence.parameterString, intermediateString: csiSequence.intermediateString, finalCharacter: csiSequence.finalCharacter, }); index = csiSequence.endIndex; textStartIndex = index; continue; } const c1ControlString = getControlStringFromC1Introducer(character); if (c1ControlString !== undefined) { const controlStringTerminatorIndex = findControlStringTerminatorIndex(text, index + 1, c1ControlString.allowBellTerminator); if (controlStringTerminatorIndex === undefined) { return malformedFromIndex(tokens, text, textStartIndex, index); } if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: c1ControlString.type, value: text.slice(index, controlStringTerminatorIndex), }); index = controlStringTerminatorIndex; textStartIndex = index; continue; } if (character === stringTerminatorCharacter) { if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: 'st', value: character }); index++; textStartIndex = index; continue; } // Strip remaining C1 controls as standalone functions. if (isC1ControlCharacter(character)) { if (index > textStartIndex) { tokens.push({ type: 'text', value: text.slice(textStartIndex, index) }); } tokens.push({ type: 'c1', value: character }); index++; textStartIndex = index; continue; } index++; } if (textStartIndex < text.length) { tokens.push({ type: 'text', value: text.slice(textStartIndex) }); } return tokens; }; //# sourceMappingURL=ansi-tokenizer.js.map