@slugkit/sdk
Version:
SlugKit SDK for JavaScript/TypeScript applications
556 lines (478 loc) • 15.3 kB
text/typescript
// Pattern Parser for SlugKit patterns
// Based on the EBNF grammar from the README and C++ implementation
import {
CompareOperator,
NumberBase,
SizeLimit,
Selector,
NumberGen,
SpecialCharGen,
EmojiGen,
PatternElement,
GlobalSettings,
ParsedPattern
} from "./parser-types";
export class PatternParser {
private pos = 0;
private input: string;
constructor(input: string) {
this.input = input;
}
private isEof(): boolean {
return this.pos >= this.input.length;
}
private peek(): string | null {
return this.isEof() ? null : this.input[this.pos];
}
private next(): string | null {
if (this.isEof()) return null;
const char = this.input[this.pos];
this.pos++;
return char;
}
private match(expected: string): boolean {
const char = this.peek();
return char === expected;
}
private expect(expected: string): void {
const char = this.next();
if (char !== expected) {
throw new Error(`Expected '${expected}', got '${char}' at position ${this.pos - 1}`);
}
}
private skipWhitespace(): void {
while (!this.isEof() && /\s/.test(this.peek()!)) {
this.next();
}
}
private parseNumber(): number {
let result = '';
while (!this.isEof()) {
const char = this.peek()!;
if (/\d/.test(char)) {
result += char;
this.next();
} else {
break;
}
}
if (result === '') {
throw new Error(`Expected number at position ${this.pos}`);
}
return parseInt(result, 10);
}
private parseIdentifier(): string {
let result = '';
const firstChar = this.peek();
if (!firstChar || !/[a-zA-Z_]/.test(firstChar)) {
throw new Error(`Expected identifier start at position ${this.pos}`);
}
result += firstChar;
this.next();
while (!this.isEof()) {
const char = this.peek()!;
if (/[a-zA-Z0-9_]/.test(char)) {
result += char;
this.next();
} else {
break;
}
}
return result;
}
private parseTag(): string {
let result = '';
while (!this.isEof()) {
const char = this.peek()!;
if (/[a-zA-Z0-9_]/.test(char)) {
result += char;
this.next();
} else {
break;
}
}
if (result === '') {
throw new Error(`Empty tag at position ${this.pos}`);
}
return result;
}
private parseTags(): { include: string[]; exclude: string[] } {
const include: string[] = [];
const exclude: string[] = [];
while (!this.isEof()) {
const char = this.peek();
if (char === '+') {
this.next(); // consume '+'
include.push(this.parseTag());
this.skipWhitespace(); // Allow optional whitespace between tags
} else if (char === '-') {
this.next(); // consume '-'
exclude.push(this.parseTag());
this.skipWhitespace(); // Allow optional whitespace between tags
} else {
break;
}
}
return { include, exclude };
}
private parseSizeLimit(): SizeLimit {
const op = this.parseComparisonOp();
this.skipWhitespace(); // Skip optional whitespace after comparison operator
const value = this.parseNumber();
return { op, value };
}
private parseComparisonOp(): CompareOperator {
if (this.match('<')) {
this.next();
if (this.match('=')) {
this.next();
return CompareOperator.Le;
}
return CompareOperator.Lt;
} else if (this.match('>')) {
this.next();
if (this.match('=')) {
this.next();
return CompareOperator.Ge;
}
return CompareOperator.Gt;
} else if (this.match('=')) {
this.next();
if (this.match('=')) {
this.next();
return CompareOperator.Eq;
}
throw new Error(`Expected '==' at position ${this.pos - 1}`);
} else if (this.match('!')) {
this.next();
if (this.match('=')) {
this.next();
return CompareOperator.Ne;
}
throw new Error(`Expected '!=' at position ${this.pos - 1}`);
} else {
throw new Error(`Expected comparison operator at position ${this.pos}`);
}
}
private parseOptions(): Record<string, string> {
const options: Record<string, string> = {};
while (!this.isEof()) {
this.skipWhitespace();
const nextChar = this.peek();
if (!nextChar || nextChar === '}' || !/[a-zA-Z_]/.test(nextChar)) {
break;
}
const key = this.parseIdentifier();
this.expect('=');
// Parse value: any non-whitespace, excluding only '}'
let value = '';
while (!this.isEof()) {
const char = this.peek()!;
if (char === ',' || char === ']') {
// Commas are not allowed; ']' cannot be part of option value
break;
}
if (char === '}' || /\s/.test(char)) {
break;
}
value += char;
this.next();
}
options[key] = value;
}
return options;
}
private parseShortNumberBase(): NumberBase {
const char = this.next()!;
switch (char) {
case 'd':
return NumberBase.Dec;
case 'x':
return NumberBase.Hex;
case 'r':
return NumberBase.RomanLower;
default:
throw new Error(`Invalid short number base: ${char}`);
}
}
private parseNumberBase(): NumberBase {
const base = this.parseIdentifier();
switch (base) {
case 'dec':
return NumberBase.Dec;
case 'hex':
return NumberBase.Hex;
case 'HEX':
return NumberBase.HexUpper;
case 'roman':
return NumberBase.RomanLower;
case 'ROMAN':
return NumberBase.Roman;
default:
throw new Error(`Invalid number base: ${base}`);
}
}
private parseSelector(): Selector {
const kind = this.parseIdentifier();
let language: string | undefined;
if (this.match('@')) {
this.next(); // consume '@'
language = this.parseIdentifier();
}
let includeTags: string[] = [];
let excludeTags: string[] = [];
let sizeLimit: SizeLimit | undefined;
let options: Record<string, string> = {};
if (this.match(':')) {
this.next(); // consume ':'
this.skipWhitespace(); // Skip optional whitespace after colon
// Parse tags if present (start with + or -)
if (this.match('+') || this.match('-')) {
const tags = this.parseTags();
includeTags = tags.include;
excludeTags = tags.exclude;
this.skipWhitespace(); // Skip whitespace after tags
}
// Parse size limit if present (starts with comparison operator)
if (!this.isEof() && /[<>=!]/.test(this.peek()!)) {
sizeLimit = this.parseSizeLimit();
this.skipWhitespace(); // Skip whitespace after size limit
}
// Parse options if present (space-separated key=value)
if (!this.isEof() && this.peek() !== '}' && /[a-zA-Z_]/.test(this.peek()!)) {
options = this.parseOptions();
}
}
return {
kind,
language,
includeTags,
excludeTags,
sizeLimit,
options
};
}
private parseElement(): PatternElement {
const identifier = this.parseIdentifier();
// Check for special generator keywords
if (identifier === 'number') {
return this.parseNumberGen();
} else if (identifier === 'special') {
return this.parseSpecialCharGen();
} else if (identifier === 'emoji') {
return this.parseEmojiGen();
} else {
// Reset position to before the identifier was consumed
this.pos -= identifier.length;
return this.parseSelector();
}
}
private parseEmojiGen(): EmojiGen {
// 'emoji' identifier was already consumed by parseElement
// Forbid language
if (this.match('@')) {
throw new Error(`Emoji generator does not support language at position ${this.pos}`);
}
let includeTags: string[] = [];
let excludeTags: string[] = [];
let options: Record<string, string> = {};
if (this.match(':')) {
this.next();
this.skipWhitespace();
// Parse tags if present
if (this.match('+') || this.match('-')) {
const tags = this.parseTags();
includeTags = tags.include;
excludeTags = tags.exclude;
this.skipWhitespace();
}
// Emoji does not support size constraints
if (!this.isEof() && /[<>=!]/.test(this.peek()!)) {
throw new Error(`Emoji generator does not support size constraints at position ${this.pos}`);
}
// Parse options if present
if (!this.isEof() && this.peek() !== '}' && /[a-zA-Z_]/.test(this.peek()!)) {
options = this.parseOptions();
}
}
return {
kind: 'emoji',
includeTags,
excludeTags,
options
};
}
private parseNumberGen(): NumberGen {
// The identifier "number" has already been consumed by parseElement
// so we don't need to consume it again
let maxLength = 1;
let base = NumberBase.Dec;
if (this.match(':')) {
this.next(); // consume ':'
maxLength = this.parseNumber();
// Check for short notation first (no comma)
if (!this.isEof() && /[dxr]/.test(this.peek()!)) {
base = this.parseShortNumberBase();
} else if (this.match(',')) {
// Full notation with comma
this.next();
this.skipWhitespace();
base = this.parseNumberBase();
}
// Do not alter existing base parsing beyond this
this.skipWhitespace();
// Optionally consume space-separated options (ignored by number generator)
if (!this.isEof() && this.peek() !== '}' && /[a-zA-Z_]/.test(this.peek()!)) {
void this.parseOptions();
}
}
return { maxLength, base };
}
private parseSpecialCharGen(): SpecialCharGen {
// The identifier "special" has already been consumed by parseElement
// so we don't need to consume it again
let minLength = 1;
let maxLength = 1;
if (this.match(':')) {
this.next(); // consume ':'
minLength = this.parseNumber();
if (this.match('-')) {
this.next(); // consume '-'
maxLength = this.parseNumber();
// Validate range
if (minLength > maxLength) {
throw new Error(`Invalid range: start (${minLength}) cannot be greater than end (${maxLength})`);
}
} else {
maxLength = minLength;
}
this.skipWhitespace();
// Optionally consume space-separated options (ignored by special generator)
if (!this.isEof() && this.peek() !== '}' && /[a-zA-Z_]/.test(this.peek()!)) {
void this.parseOptions();
}
}
return { minLength, maxLength };
}
private parseGlobalSettings(): GlobalSettings {
let language: string | undefined;
if (this.match('@')) {
this.next(); // consume '@'
language = this.parseIdentifier();
this.skipWhitespace(); // Skip whitespace after language
}
let includeTags: string[] = [];
let excludeTags: string[] = [];
let sizeLimit: SizeLimit | undefined;
let options: Record<string, string> = {};
// Parse tags if present (start with + or -)
if (this.match('+') || this.match('-')) {
const tags = this.parseTags();
includeTags = tags.include;
excludeTags = tags.exclude;
this.skipWhitespace(); // Skip whitespace after tags
}
// Parse size limit if present (starts with comparison operator)
if (!this.isEof() && /[<>=!]/.test(this.peek()!)) {
sizeLimit = this.parseSizeLimit();
this.skipWhitespace(); // Skip whitespace after size limit
}
// Parse options if present (space-separated key=value)
this.skipWhitespace();
if (!this.isEof() && this.peek() !== ']' && /[a-zA-Z_]/.test(this.peek()!)) {
options = this.parseOptions();
}
return {
language,
includeTags,
excludeTags,
sizeLimit,
options
};
}
public parse(): ParsedPattern {
const elements: PatternElement[] = [];
const textChunks: string[] = [];
let globalSettings: GlobalSettings | undefined;
let arbitraryStart = 0;
let arbitraryEnd = this.input.length;
while (!this.isEof()) {
if (this.match('{')) {
// Push arbitrary text before the placeholder
textChunks.push(this.input.slice(arbitraryStart, this.pos));
this.next(); // consume '{'
const element = this.parseElement();
elements.push(element);
this.expect('}');
arbitraryStart = this.pos;
} else if (this.match('[')) {
arbitraryEnd = this.pos;
this.next(); // consume '['
globalSettings = this.parseGlobalSettings();
this.expect(']');
this.skipWhitespace();
if (!this.isEof()) {
throw new Error(`Unexpected character after global settings at position ${this.pos}`);
}
break;
} else if (this.match('\\')) {
this.next(); // consume '\'
if (this.isEof()) {
throw new Error(`Unexpected end of pattern after escape character at position ${this.pos - 1}`);
}
const escapedChar = this.next();
if (!['{', '}', '\\'].includes(escapedChar!)) {
throw new Error(`Invalid escaped character '${escapedChar}' at position ${this.pos - 1}`);
}
// For now, we'll just skip escaped characters in the arbitrary text
} else if (this.match('}')) {
throw new Error(`Unmatched closing brace at position ${this.pos}`);
} else if (this.match(']')) {
throw new Error(`Unmatched closing bracket at position ${this.pos}`);
} else {
this.next(); // consume arbitrary character
}
}
// Add the final text chunk
if (textChunks.length === elements.length) {
textChunks.push(this.input.slice(arbitraryStart, arbitraryEnd));
}
return {
elements,
globalSettings,
textChunks
};
}
public static parse(pattern: string): ParsedPattern {
const parser = new PatternParser(pattern);
return parser.parse();
}
/**
* Validates a pattern string without throwing exceptions.
* This is a non-throwing version of the parse() method.
*
* @param pattern - The pattern string to validate
* @returns true if the pattern is valid according to the EBNF grammar, false otherwise
*
* @example
* ```typescript
* // Valid patterns
* PatternParser.validate('{noun}') // true
* PatternParser.validate('{noun@en}') // true
* PatternParser.validate('{number:5,hex}') // true
*
* // Invalid patterns
* PatternParser.validate('{noun') // false - unclosed brace
* PatternParser.validate('{noun:>abc}') // false - non-numeric size limit
* PatternParser.validate('{number:5,invalid}') // false - invalid base
* ```
*/
public static validate(pattern: string): boolean {
try {
PatternParser.parse(pattern);
return true;
} catch {
return false;
}
}
}