@atomic-ehr/ucum
Version:
TypeScript implementation of UCUM (Unified Code for Units of Measure)
374 lines (373 loc) • 12.3 kB
JavaScript
import { Lexer } from './lexer.js';
import { prefixes } from '../prefixes.js';
import { units } from '../units.js';
export class Parser {
tokens;
current = 0;
errors = [];
warnings = [];
input;
constructor(input) {
this.input = input;
const lexer = new Lexer(input);
this.tokens = lexer.tokenize();
}
parse() {
const ast = this.tryParseExpression();
// Check for trailing tokens
if (!this.isAtEnd() && this.errors.length === 0) {
const token = this.peek();
this.reportError('unexpected_token', `Unexpected token: ${token.value}`, token);
}
return {
ast,
errors: this.errors,
warnings: this.warnings,
input: this.input
};
}
tryParseExpression() {
try {
return this.parseMainTerm();
}
catch (e) {
// Error already reported via reportError
return undefined;
}
}
reportError(type, message, token) {
const errorToken = token || this.peek();
this.errors.push({
type,
message,
position: errorToken.position,
length: errorToken.length,
token: errorToken
});
}
reportWarning(type, message, token, suggestion) {
const warnToken = token || this.peek();
this.warnings.push({
type,
message,
position: warnToken.position,
length: warnToken.length,
suggestion
});
}
parseMainTerm() {
// Handle optional leading /
if (this.check('SLASH')) {
this.advance();
const operand = this.parseTerm();
if (!operand) {
throw new Error('Parse failed');
}
return {
type: 'unary',
operator: '/',
operand
};
}
const term = this.parseTerm();
if (!term) {
throw new Error('Parse failed');
}
return term;
}
parseTerm() {
let left = this.parseComponent();
if (!left)
return undefined;
// Handle binary operators with left-to-right associativity
while (this.match('DOT', 'SLASH')) {
const operator = this.previous().value;
const right = this.parseComponent();
if (!right) {
this.reportError('unexpected_eof', 'Expected expression after operator');
return left; // Return partial AST
}
left = {
type: 'binary',
operator,
left,
right
};
}
return left;
}
parseComponent() {
// Handle parentheses
if (this.match('LPAREN')) {
const expr = this.parseTerm();
if (!expr) {
this.reportError('syntax', 'Expected expression inside parentheses');
this.synchronize();
return undefined;
}
if (!this.check('RPAREN')) {
this.reportError('unexpected_eof', 'Missing closing parenthesis');
// Continue as if it was there
}
else {
this.consume('RPAREN', "Expected ')' after expression");
}
return {
type: 'group',
expression: expr
};
}
// Handle annotations without preceding unit (standalone annotation)
if (this.match('LBRACE')) {
const annotation = this.parseAnnotationContent();
if (!this.check('RBRACE')) {
this.reportError('unexpected_eof', 'Missing closing brace');
}
else {
this.consume('RBRACE', "Expected '}' after annotation");
}
// Check for very long annotations
if (annotation.length > 50) {
this.reportWarning('ambiguous', 'Very long annotation might be an error', this.previous(), 'Consider using shorter, clearer annotations');
}
return {
type: 'factor',
value: 1,
annotation
};
}
// Handle special units starting with digits (10*, 10^)
if (this.check('DIGIT') && this.peek().value === '10' && this.peekNext()) {
const next = this.peekNext();
if (next && (next.type === 'STAR' || next.type === 'CARET')) {
return this.parseSpecialUnit();
}
}
// Handle numbers (factors)
if (this.check('DIGIT')) {
return this.parseFactor();
}
// Otherwise, parse as annotatable unit
return this.parseAnnotatable();
}
parseAnnotatable() {
const unit = this.parseSimpleUnit();
if (!unit)
return undefined;
// Check for annotation
if (this.match('LBRACE')) {
const annotation = this.parseAnnotationContent();
if (!this.check('RBRACE')) {
this.reportError('unexpected_eof', 'Missing closing brace');
}
else {
this.consume('RBRACE', "Expected '}' after annotation");
}
// Check for very long annotations
if (annotation.length > 50) {
this.reportWarning('ambiguous', 'Very long annotation might be an error', this.previous(), 'Consider using shorter, clearer annotations');
}
unit.annotation = annotation;
}
return unit;
}
parseSimpleUnit() {
if (this.isAtEnd()) {
this.reportError('unexpected_eof', 'Expected unit');
return undefined;
}
const token = this.advance();
if (token.type !== 'ATOM') {
this.reportError('unexpected_token', `Expected unit atom, got ${token.type}`, token);
return undefined;
}
const value = token.value;
// Try to match prefixes (longest match first)
let prefix;
let atom = value;
// First check if the whole value is a valid unit
if (!units[value]) {
// Check for 2-character prefix (da)
if (value.length >= 2 && prefixes[value.substring(0, 2)]) {
const potentialAtom = value.substring(2);
if (units[potentialAtom]) {
prefix = value.substring(0, 2);
atom = potentialAtom;
}
}
// Check for 1-character prefix
else if (value.length >= 1 && prefixes[value.substring(0, 1)]) {
const potentialAtom = value.substring(1);
if (units[potentialAtom]) {
prefix = value.substring(0, 1);
atom = potentialAtom;
}
}
}
const unit = {
type: 'unit',
atom
};
if (prefix) {
unit.prefix = prefix;
}
// Check for exponent
const exponent = this.parseExponent();
if (exponent !== null) {
unit.exponent = exponent.value;
if (exponent.format) {
unit.exponentFormat = exponent.format;
}
}
return unit;
}
parseSpecialUnit() {
// Handle 10* and 10^
const ten = this.advance(); // consume '10'
const op = this.advance(); // consume '*' or '^'
let atom;
if (op.type === 'STAR') {
atom = '10*';
}
else if (op.type === 'CARET') {
atom = '10^';
}
else {
this.reportError('syntax', `Unexpected special unit format: 10${op.value}`, op);
return undefined;
}
const unit = {
type: 'unit',
atom
};
// Check for exponent
const exponent = this.parseExponent();
if (exponent !== null) {
unit.exponent = exponent.value;
if (exponent.format) {
unit.exponentFormat = exponent.format;
}
}
return unit;
}
parseExponent() {
// Check for different exponent formats
if (this.match('CARET')) {
// ^exponent format
const sign = this.match('MINUS') ? -1 : (this.match('PLUS'), 1);
if (!this.check('DIGIT')) {
this.reportError('syntax', 'Expected digits after ^');
return null;
}
const digits = this.advance();
return { value: sign * parseInt(digits.value), format: '^' };
}
else if (this.match('PLUS', 'MINUS')) {
// +exponent or -exponent format
const isNegative = this.previous().value === '-';
if (!this.check('DIGIT')) {
this.reportError('syntax', 'Expected digits after sign');
return null;
}
const digits = this.advance();
return { value: (isNegative ? -1 : 1) * parseInt(digits.value), format: '+' };
}
else if (this.check('DIGIT')) {
// Direct digit (superscript notation)
const digits = this.advance();
return { value: parseInt(digits.value) };
}
return null;
}
parseFactor() {
if (!this.check('DIGIT')) {
this.reportError('invalid_number', 'Expected number');
return undefined;
}
const digits = this.advance();
const factor = {
type: 'factor',
value: parseInt(digits.value)
};
// Check for annotation
if (this.match('LBRACE')) {
const annotation = this.parseAnnotationContent();
if (!this.check('RBRACE')) {
this.reportError('unexpected_eof', 'Missing closing brace');
}
else {
this.consume('RBRACE', "Expected '}' after annotation");
}
// Check for very long annotations
if (annotation.length > 50) {
this.reportWarning('ambiguous', 'Very long annotation might be an error', this.previous(), 'Consider using shorter, clearer annotations');
}
factor.annotation = annotation;
}
return factor;
}
parseAnnotationContent() {
let content = '';
while (!this.check('RBRACE') && !this.isAtEnd()) {
content += this.advance().value;
}
return content;
}
synchronize() {
this.advance(); // Skip the problematic token
while (!this.isAtEnd()) {
// Look for recovery points
if (this.peek().type === 'SLASH' ||
this.peek().type === 'DOT' ||
this.peek().type === 'RPAREN') {
return;
}
this.advance();
}
}
// Helper methods
match(...types) {
for (const type of types) {
if (this.check(type)) {
this.advance();
return true;
}
}
return false;
}
check(type) {
if (this.isAtEnd())
return false;
return this.peek().type === type;
}
advance() {
if (!this.isAtEnd())
this.current++;
return this.previous();
}
isAtEnd() {
return this.peek().type === 'EOF';
}
peek() {
return this.tokens[this.current] || { type: 'EOF', value: '', position: this.tokens[this.tokens.length - 1]?.position || 0, length: 0 };
}
peekNext() {
if (this.current + 1 < this.tokens.length) {
return this.tokens[this.current + 1] || null;
}
return null;
}
previous() {
return this.tokens[this.current - 1] || { type: 'EOF', value: '', position: 0, length: 0 };
}
consume(type, message) {
if (this.check(type))
return this.advance();
this.reportError('unexpected_token', message);
throw new Error('Parse failed');
}
}
export function parseUnit(input) {
const parser = new Parser(input);
return parser.parse();
}