UNPKG

messageformat

Version:

Intl.MessageFormat / Unicode MessageFormat 2 parser, runtime and polyfill

358 lines (357 loc) 10.2 kB
import { parseNameValue, parseUnquotedLiteralValue } from "../cst/names.js"; import { MessageSyntaxError } from "../errors.js"; const bidiChars = new Set('\u061C\u200E\u200F\u2066\u2067\u2068\u2069'); const whitespaceChars = new Set('\t\n\r \u3000'); //// Parser State //// let pos; let source; //// Utilities & Error Wrappers //// // These indirections allow for the function names to be mangled, // while keeping the error class name intact. const MissingSyntax = (pos, expected) => new MessageSyntaxError('missing-syntax', pos, pos + expected.length, expected); const SyntaxError = (...args) => new MessageSyntaxError(...args); function expect(searchString, consume) { if (source.startsWith(searchString, pos)) { if (consume) pos += searchString.length; } else { throw MissingSyntax(pos, searchString); } } export function parseMessage(source_) { pos = 0; source = source_; const decl = declarations(); if (source.startsWith('.match', pos)) return selectMessage(decl); const quoted = decl.length > 0 || source.startsWith('{{', pos); if (!quoted && pos > 0) pos = 0; const pattern_ = pattern(quoted); if (quoted) { ws(); if (pos < source.length) { throw SyntaxError('extra-content', pos, source.length); } } return { type: 'message', declarations: decl, pattern: pattern_ }; } function selectMessage(declarations) { pos += 6; // '.match' ws(true); const selectors = []; while (source[pos] === '$') { selectors.push(variable()); ws(true); } if (selectors.length === 0) throw SyntaxError('empty-token', pos); const variants = []; while (pos < source.length) { variants.push(variant()); ws(); } return { type: 'select', declarations, selectors, variants }; } function variant() { const keys = []; while (pos < source.length) { ws(keys.length ? '{' : false); const next = source[pos]; if (next === '{') break; if (next === '*') { keys.push({ type: '*' }); pos += 1; } else { const key = literal(true); key.value = key.value.normalize(); keys.push(key); } } return { keys, value: pattern(true) }; } function pattern(quoted) { if (quoted) { if (source.startsWith('{{', pos)) pos += 2; else throw MissingSyntax(pos, '{{'); } const pattern = []; loop: while (pos < source.length) { switch (source[pos]) { case '{': { pattern.push(expression(true)); break; } case '}': if (!quoted) throw SyntaxError('parse-error', pos); break loop; default: { pattern.push(text()); } } } if (quoted) { if (source.startsWith('}}', pos)) pos += 2; else throw MissingSyntax(pos, '}}'); } return pattern; } function declarations() { const declarations = []; ws(); loop: while (source[pos] === '.') { const keyword = source.substr(pos, 6); switch (keyword) { case '.input': declarations.push(inputDeclaration()); break; case '.local': declarations.push(localDeclaration()); break; case '.match': break loop; default: throw SyntaxError('parse-error', pos); } ws(); } return declarations; } function inputDeclaration() { pos += 6; // '.input' ws(); expect('{', false); const valueStart = pos; const value = expression(false); if (value.type === 'expression' && value.arg?.type === 'variable') { // @ts-expect-error TS isn't catching that value is Expression<VariableRef> return { type: 'input', name: value.arg.name, value }; } throw SyntaxError('bad-input-expression', valueStart, pos); } function localDeclaration() { pos += 6; // '.local' ws(true); expect('$', true); const name_ = name(); ws(); expect('=', true); ws(); expect('{', false); const value = expression(false); return { type: 'local', name: name_, value }; } function expression(allowMarkup) { const start = pos; pos += 1; // '{' ws(); const arg = value(false); if (arg) ws('}'); const sigil = source[pos]; let functionRef; let markup; switch (sigil) { case '@': case '}': break; case ':': { pos += 1; // ':' functionRef = { type: 'function', name: identifier() }; const options_ = options(); if (options_) functionRef.options = options_; break; } case '#': case '/': { if (arg || !allowMarkup) throw SyntaxError('parse-error', pos); pos += 1; // '#' or '/' const kind = sigil === '#' ? 'open' : 'close'; markup = { type: 'markup', kind, name: identifier() }; const options_ = options(); if (options_) markup.options = options_; break; } default: throw SyntaxError('parse-error', pos); } const attributes_ = attributes(); if (markup?.kind === 'open' && source[pos] === '/') { markup.kind = 'standalone'; pos += 1; // '/' } expect('}', true); if (functionRef) { const exp = arg ? { type: 'expression', arg, functionRef: functionRef } : { type: 'expression', functionRef: functionRef }; if (attributes_) exp.attributes = attributes_; return exp; } if (markup) { if (attributes_) markup.attributes = attributes_; return markup; } if (!arg) throw SyntaxError('empty-token', start, pos); return attributes_ ? { type: 'expression', arg, attributes: attributes_ } : { type: 'expression', arg }; } /** Requires and consumes leading and trailing whitespace. */ function options() { ws('/}'); const options = {}; let isEmpty = true; while (pos < source.length) { const next = source[pos]; if (next === '@' || next === '/' || next === '}') break; const start = pos; const name_ = identifier(); if (Object.hasOwn(options, name_)) { throw SyntaxError('duplicate-option-name', start, pos); } ws(); expect('=', true); ws(); options[name_] = value(true); isEmpty = false; ws('/}'); } return isEmpty ? null : options; } function attributes() { const attributes = {}; let isEmpty = true; while (source[pos] === '@') { const start = pos; pos += 1; // '@' const name_ = identifier(); if (Object.hasOwn(attributes, name_)) { throw SyntaxError('duplicate-attribute', start, pos); } ws('=/}'); if (source[pos] === '=') { pos += 1; // '=' ws(); attributes[name_] = literal(true); ws('/}'); } else { attributes[name_] = true; } isEmpty = false; } return isEmpty ? null : attributes; } function text() { let value = ''; let i = pos; loop: for (; i < source.length; ++i) { switch (source[i]) { case '\\': { const esc = source[i + 1]; if (!'\\{|}'.includes(esc)) throw SyntaxError('bad-escape', i, i + 2); value += source.substring(pos, i) + esc; i += 1; pos = i + 1; break; } case '{': case '}': break loop; } } value += source.substring(pos, i); pos = i; return value; } function value(required) { return source[pos] === '$' ? variable() : literal(required); } function variable() { pos += 1; // '$' return { type: 'variable', name: name() }; } function literal(required) { if (source[pos] === '|') return quotedLiteral(); const value = parseUnquotedLiteralValue(source, pos); if (!value) { if (required) throw SyntaxError('empty-token', pos); else return undefined; } pos += value.length; return { type: 'literal', value }; } function quotedLiteral() { pos += 1; // '|' let value = ''; for (let i = pos; i < source.length; ++i) { switch (source[i]) { case '\\': { const esc = source[i + 1]; if (!'\\{|}'.includes(esc)) throw SyntaxError('bad-escape', i, i + 2); value += source.substring(pos, i) + esc; i += 1; pos = i + 1; break; } case '|': value += source.substring(pos, i); pos = i + 1; return { type: 'literal', value }; } } throw MissingSyntax(source.length, '|'); } function identifier() { const name_ = name(); if (source[pos] === ':') { pos += 1; return name_ + ':' + name(); } return name_; } function name() { const name = parseNameValue(source, pos); if (!name) throw SyntaxError('empty-token', pos); pos = name.end; return name.value; } function ws(req = false) { let next = source[pos]; let hasWS = false; if (req) { while (bidiChars.has(next)) next = source[++pos]; while (whitespaceChars.has(next)) { next = source[++pos]; hasWS = true; } } while (bidiChars.has(next) || whitespaceChars.has(next)) next = source[++pos]; if (req && !hasWS && (req === true || !req.includes(source[pos]))) { throw MissingSyntax(pos, "' '"); } }