ansi-stream-parser
Version:
A parser for ANSI escape sequence text (SGR) that works on streaming/partial input
456 lines (394 loc) • 12.9 kB
text/typescript
export type RawColor =
| { type: "16"; code: number }
| { type: "256"; code: number | null } // null = missing color number
| { type: "rgb"; rgb: [number, number, number] | null }; // null = missing/incomplete RGB values
export type Token =
| { type: "text"; text: string }
| { type: "set-fg-color"; color: RawColor }
| { type: "set-bg-color"; color: RawColor }
| { type: "reset-fg-color" }
| { type: "reset-bg-color" }
| { type: "reset-all" }
| { type: "bold"; enable: boolean }
| { type: "dim"; enable: boolean }
| { type: "italic"; enable: boolean }
| { type: "underline"; enable: boolean }
| { type: "blink"; enable: boolean }
| { type: "reverse"; enable: boolean }
| { type: "hidden"; enable: boolean }
| { type: "strikethrough"; enable: boolean }
// other
| { type: "unknown"; sequence: string };
type TokenHandler = (code: number) => Token | Token[];
function createSGRLookup(): Record<number, TokenHandler> {
const fg16 = (code: number): Token => ({
type: "set-fg-color",
color: { type: "16", code: code - 30 },
});
const bg16 = (code: number): Token => ({
type: "set-bg-color",
color: { type: "16", code: code - 40 },
});
const fg16b = (code: number): Token => ({
type: "set-fg-color",
color: { type: "16", code: code - 90 + 8 },
});
const bg16b = (code: number): Token => ({
type: "set-bg-color",
color: { type: "16", code: code - 100 + 8 },
});
return {
// Resets
0: () => ({ type: "reset-all" }),
// Text attributes
1: () => ({ type: "bold", enable: true }),
2: () => ({ type: "dim", enable: true }),
3: () => ({ type: "italic", enable: true }),
4: () => ({ type: "underline", enable: true }),
5: () => ({ type: "blink", enable: true }),
7: () => ({ type: "reverse", enable: true }),
8: () => ({ type: "hidden", enable: true }),
9: () => ({ type: "strikethrough", enable: true }),
// Remove text attributes
21: () => ({ type: "bold", enable: false }),
22: () => [
{ type: "bold", enable: false },
{ type: "dim", enable: false },
],
23: () => ({ type: "italic", enable: false }),
24: () => ({ type: "underline", enable: false }),
25: () => ({ type: "blink", enable: false }),
27: () => ({ type: "reverse", enable: false }),
28: () => ({ type: "hidden", enable: false }),
29: () => ({ type: "strikethrough", enable: false }),
// 16-color foreground (30-37)
30: fg16,
31: fg16,
32: fg16,
33: fg16,
34: fg16,
35: fg16,
36: fg16,
37: fg16,
// 16-color background (40-47)
40: bg16,
41: bg16,
42: bg16,
43: bg16,
44: bg16,
45: bg16,
46: bg16,
47: bg16,
// Bright 16-color foreground (90-97)
90: fg16b,
91: fg16b,
92: fg16b,
93: fg16b,
94: fg16b,
95: fg16b,
96: fg16b,
97: fg16b,
// Bright 16-color background (100-107)
100: bg16b,
101: bg16b,
102: bg16b,
103: bg16b,
104: bg16b,
105: bg16b,
106: bg16b,
107: bg16b,
// Color resets
39: () => ({ type: "reset-fg-color" }),
49: () => ({ type: "reset-bg-color" }),
};
}
const SGR_LOOKUP = createSGRLookup();
const CHAR_CODES = {
ESC: 0x1b, // 27 - Escape character
LEFT_BRACKET: 0x5b, // 91 - '['
SEMICOLON: 0x3b, // 59 - ';'
AT: 0x40, // 64 - '@'
TILDE: 0x7e, // 126 - '~'
LOWER_M: 0x6d, // 109 - 'm'
MINUS: 0x2d, // 45 - '-'
DIGIT_0: 0x30, // 48 - '0'
DIGIT_9: 0x39, // 57 - '9'
} as const;
export type Tokenizer = {
push(input: string): Token[];
reset(): void;
};
const isTerminatorCode = (charCode: number): boolean => {
return charCode >= CHAR_CODES.AT && charCode <= CHAR_CODES.TILDE;
};
const isParameterChar = (charCode: number): boolean => {
const isDigit =
charCode >= CHAR_CODES.DIGIT_0 && charCode <= CHAR_CODES.DIGIT_9;
return (
isDigit ||
charCode === CHAR_CODES.SEMICOLON ||
charCode === CHAR_CODES.MINUS
);
};
// Find next semicolon or end of string within bounds
function findNextSemicolon(
str: string,
start: number,
maxEnd?: number,
): number {
let pos = start;
const end = maxEnd ?? str.length;
while (pos < end && str.charCodeAt(pos) !== CHAR_CODES.SEMICOLON) {
pos++;
}
return pos;
}
// Build ANSI sequence without intermediate slicing
// Includes the full sequence from \x1b[ through the end position (exclusive)
function buildSequence(input: string, start: number, end: number): string {
const chars: string[] = ["\x1b", "["];
for (let i = start; i < end; i++) {
chars.push(input.charAt(i));
}
return chars.join("");
}
// Parse integer from string range without slicing
export function parseIntFromRange(
str: string,
start: number,
end: number,
): number | null {
if (start >= end || start < 0 || start >= str.length) return null;
let pos = start;
let negative = false;
// Clamp end to string length
const actualEnd = Math.min(end, str.length);
if (pos >= actualEnd) return null;
// Check for negative sign
if (str.charCodeAt(pos) === 45) {
// 45 = '-'
negative = true;
pos++;
if (pos >= actualEnd) return null; // Just a minus sign
}
let result = 0;
for (let i = pos; i < actualEnd; i++) {
const digit = str.charCodeAt(i) - 48; // 48 = '0'
if (digit < 0 || digit > 9) return null;
result = result * 10 + digit;
}
// Handle -0 case to return 0
return negative && result !== 0 ? -result : result;
}
export function createTokenizer(): Tokenizer {
let buffer = "";
const handleSGR = (
input: string,
startPos: number,
endPos: number,
): Token[] => {
const tokens: Token[] = [];
// Special case: completely empty params means reset
if (startPos === endPos) {
tokens.push({ type: "reset-all" });
return tokens;
}
// Parse parameters
let start = startPos;
let i = startPos;
// Process each parameter
while (i <= endPos) {
// Found separator or end of string
if (i === endPos || input.charCodeAt(i) === CHAR_CODES.SEMICOLON) {
const code = parseIntFromRange(input, start, i);
if (code !== null && !isNaN(code)) {
// Process the code based on its value
processCode(code, input, start, i, tokens, startPos, endPos);
}
start = i + 1;
}
i++;
}
return tokens;
function processCode(
code: number,
input: string,
segmentStart: number,
segmentEnd: number,
tokens: Token[],
_paramsStart: number,
paramsEnd: number,
) {
// For codes that need to look ahead (38, 48), we need special handling
if (code === 38 || code === 48) {
let pos = segmentEnd + 1; // Skip semicolon after 38/48
// Parse mode (2 or 5)
const modeEnd = findNextSemicolon(input, pos, paramsEnd);
const mode = parseIntFromRange(input, pos, modeEnd);
if (mode === 5) {
// 256-color: parse one more number
pos = modeEnd + 1;
const colorEnd = findNextSemicolon(input, pos, paramsEnd);
const colorCode = parseIntFromRange(input, pos, colorEnd);
tokens.push({
type: code === 38 ? "set-fg-color" : "set-bg-color",
color: { type: "256", code: colorCode },
});
// Update loop position
i = colorEnd - 1;
start = colorEnd + 1;
} else if (mode === 2) {
// RGB: parse three more numbers
const rgbValues: (number | null)[] = [];
pos = modeEnd + 1;
for (let j = 0; j < 3; j++) {
const valueEnd = findNextSemicolon(input, pos, paramsEnd);
rgbValues.push(parseIntFromRange(input, pos, valueEnd));
pos = valueEnd + 1;
}
if (rgbValues.every((v) => v !== null)) {
tokens.push({
type: code === 38 ? "set-fg-color" : "set-bg-color",
color: {
type: "rgb",
rgb: rgbValues as [number, number, number],
},
});
} else {
// Invalid RGB - emit as unknown
// Include any RGB values/semicolons that were present
tokens.push({
type: "unknown",
sequence: buildSequence(input, segmentStart, paramsEnd + 1),
});
}
// Update loop position to continue after RGB values
// pos is now after the 3 RGB values, so continue from there
i = pos - 2; // -2 because loop will increment and we want to continue at pos-1
start = pos - 1;
} else {
// Invalid mode or missing mode
tokens.push({
type: "unknown",
sequence: buildSequence(input, segmentStart, paramsEnd + 1),
});
// Skip to the end since we consumed all params
i = paramsEnd;
start = paramsEnd + 1;
}
return;
}
const handler = SGR_LOOKUP[code];
if (handler) {
const result = handler(code);
if (Array.isArray(result)) {
tokens.push(...result);
} else {
tokens.push(result);
}
}
// If no handler found, code is ignored (unknown SGR codes are typically ignored)
}
};
const push = (input: string): Token[] => {
const tokens: Token[] = [];
const fullInput = buffer + input;
buffer = "";
// Use array for text accumulation instead of string concatenation
const textChunks: string[] = [];
let i = 0;
while (i < fullInput.length) {
const charCode = fullInput.charCodeAt(i);
if (charCode === CHAR_CODES.ESC) {
// Check if we have enough characters for a complete escape sequence start
if (i + 1 >= fullInput.length) {
// Incomplete escape sequence, buffer it
if (textChunks.length > 0) {
tokens.push({ type: "text", text: textChunks.join("") });
textChunks.length = 0; // Clear array
}
buffer = fullInput.slice(i);
break;
}
const nextCharCode = fullInput.charCodeAt(i + 1);
if (nextCharCode === CHAR_CODES.LEFT_BRACKET) {
// Save any accumulated text
if (textChunks.length > 0) {
tokens.push({ type: "text", text: textChunks.join("") });
textChunks.length = 0; // Clear array
}
// Find the end of the escape sequence
let j = i + 2;
let foundTerminator = false;
while (j < fullInput.length) {
const currentCode = fullInput.charCodeAt(j);
if (isTerminatorCode(currentCode)) {
// Found valid terminator
if (currentCode === CHAR_CODES.LOWER_M) {
tokens.push(...handleSGR(fullInput, i + 2, j));
} else {
// Non-SGR sequence - emit as unknown
const sequence = fullInput.slice(i, j + 1);
tokens.push({ type: "unknown", sequence });
}
i = j + 1;
foundTerminator = true;
break;
} else if (isParameterChar(currentCode)) {
// Valid parameter character, continue parsing
j++;
} else {
// Invalid character - not a parameter and not a valid terminator
// Treat as unknown sequence (without the invalid character)
const sequence = fullInput.slice(i, j);
tokens.push({ type: "unknown", sequence });
i = j; // Continue from the invalid character
foundTerminator = true;
break;
}
}
if (!foundTerminator) {
// Incomplete sequence, buffer it
buffer = fullInput.slice(i);
break;
}
} else {
// Not an escape sequence, treat as regular text
// Grab span of plain text instead of character by character
const textStart = i;
while (
i < fullInput.length &&
fullInput.charCodeAt(i) !== CHAR_CODES.ESC
) {
i++;
}
textChunks.push(fullInput.slice(textStart, i));
}
} else {
// Grab span of plain text instead of character by character
const textStart = i;
while (
i < fullInput.length &&
fullInput.charCodeAt(i) !== CHAR_CODES.ESC
) {
i++;
}
if (i > textStart) {
textChunks.push(fullInput.slice(textStart, i));
}
}
}
// Handle any remaining text
if (textChunks.length > 0) {
tokens.push({ type: "text", text: textChunks.join("") });
}
return tokens;
};
const reset = (): void => {
buffer = "";
};
return {
push,
reset,
};
}