@composita/compiler
Version:
Composita language compiler.
528 lines (471 loc) • 17.6 kB
text/typescript
import { SourceLocation, SourceRange, SourcePosition } from '../source-location/location';
import {
Token,
IdentifierToken,
TextToken,
FixToken,
ErrorToken,
NumberToken,
HexNumberToken,
IntegerNumberToken,
FloatNumberToken,
Tag,
tagFromString,
} from '../tokens/tokens';
import { Optional } from '@composita/ts-utility-types';
import { Diagnosis, CompilerDiagnostic } from '../diagnosis/diagnosis';
import { DiagnosticSeverity } from 'vscode-languageserver-types';
const enum SpecialCharacter {
Null = '\0',
Backspace = '\b',
Space = ' ',
HorizontalTab = '\t',
VerticalTab = '\v',
FormFeed = '\f',
CarriageReturn = '\r',
LineFeed = '\n',
Backslash = '\\',
SingleQuote = "'",
DoubleQuote = '"',
}
function toSpecialCharacter(character: Optional<string>): Optional<SpecialCharacter> {
switch (character) {
case '0':
return SpecialCharacter.Null;
case 'b':
return SpecialCharacter.Backspace;
case 't':
return SpecialCharacter.HorizontalTab;
case 'v':
return SpecialCharacter.VerticalTab;
case 'f':
return SpecialCharacter.FormFeed;
case 'r':
return SpecialCharacter.CarriageReturn;
case 'n':
return SpecialCharacter.LineFeed;
case '\\':
return SpecialCharacter.Backslash;
case "'":
return SpecialCharacter.SingleQuote;
case '"':
return SpecialCharacter.DoubleQuote;
default:
return undefined;
}
}
class LexerState {
public tokenCache = new Array<Token>();
public currentPosition = 0;
public currentLinePosition = 0;
public currentLine = 0;
copy(): LexerState {
const newState = new LexerState();
newState.tokenCache = new Array<Token>(...this.tokenCache);
newState.currentPosition = this.currentPosition;
newState.currentLinePosition = this.currentLinePosition;
newState.currentLine = this.currentLine;
return newState;
}
}
export class Lexer {
constructor(diagnosis: Diagnosis, uri: string, code: string) {
this.diagnosis = diagnosis;
this.uri = uri;
this.code = Array.from(code);
this.state = new LexerState();
this.stateStack = new Array<LexerState>();
}
private readonly diagnosis: Diagnosis;
private readonly uri: string;
private readonly code: Array<string>;
private state: LexerState;
private stateStack: Array<LexerState>;
private advance(n = 1): void {
while (n > 0 && !this.isEOT()) {
if (this.isLineFeed()) {
this.state.currentLine = this.state.currentLine + 1;
this.state.currentLinePosition = 0;
} else {
this.state.currentLinePosition = this.state.currentLinePosition + 1;
}
this.state.currentPosition = this.state.currentPosition + 1;
n = n - 1;
}
}
private getCurrentCharacter(): Optional<string> {
return !this.isEOT() ? this.code[this.state.currentPosition] : undefined;
}
private createSourceLocation(range: SourceRange): SourceLocation {
return new SourceLocation(this.uri, range);
}
private createSourcePosition(): SourcePosition {
return new SourcePosition(this.state.currentLine, this.state.currentLinePosition);
}
private createEOTToken(): FixToken {
const endPosition = this.createSourcePosition();
return new FixToken(this.createSourceLocation(new SourceRange(endPosition, endPosition)), Tag.EOT);
}
private isSpace(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.Space;
}
private isTab(): boolean {
const currentCharacter = this.getCurrentCharacter();
return currentCharacter === SpecialCharacter.HorizontalTab || currentCharacter === SpecialCharacter.VerticalTab;
}
private isFormFeed(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.FormFeed;
}
private isLineFeed(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.LineFeed;
}
private isCarriageReturn(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.CarriageReturn;
}
private isBackslash(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.Backslash;
}
private isLetter(): boolean {
const currentCharacter = this.getCurrentCharacter();
return (
currentCharacter !== undefined &&
((currentCharacter >= 'a' && currentCharacter <= 'z') ||
(currentCharacter >= 'A' && currentCharacter <= 'Z'))
);
}
private isDot(): boolean {
const currentCharacter = this.getCurrentCharacter();
return currentCharacter !== undefined && currentCharacter === '.';
}
private isEllipsis(): boolean {
return (
this.isDot() &&
this.state.currentPosition + 1 < this.code.length &&
this.code[this.state.currentPosition + 1] === '.'
);
}
private isNumber(): boolean {
const currentCharacter = this.getCurrentCharacter();
return currentCharacter !== undefined && currentCharacter >= '0' && currentCharacter <= '9';
}
private isHexCharacter(): boolean {
const currentCharacter = this.getCurrentCharacter();
return (
currentCharacter !== undefined &&
((currentCharacter >= 'a' && currentCharacter <= 'f') ||
(currentCharacter >= 'A' && currentCharacter <= 'F'))
);
}
private isDoubleQuote(): boolean {
return this.getCurrentCharacter() === SpecialCharacter.DoubleQuote;
}
private isWhitespace(): boolean {
return this.isSpace() || this.isTab() || this.isLineFeed() || this.isCarriageReturn() || this.isFormFeed();
}
private isEOT(): boolean {
return this.state.currentPosition >= this.code.length;
}
private isCommentStart(): boolean {
const currentCharacter = this.getCurrentCharacter();
return (
currentCharacter !== undefined &&
tagFromString(currentCharacter) === Tag.OpenParentheses &&
this.state.currentPosition + 1 < this.code.length &&
tagFromString(this.code[this.state.currentPosition + 1]) === Tag.Asterisk
);
}
private isCommentEnd(): boolean {
const currentCharacter = this.getCurrentCharacter();
return (
currentCharacter !== undefined &&
tagFromString(currentCharacter) === Tag.Asterisk &&
this.state.currentPosition + 1 < this.code.length &&
tagFromString(this.code[this.state.currentPosition + 1]) === Tag.CloseParentheses
);
}
private skipWhitespace(): boolean {
let hasSkipped = false;
while (this.isWhitespace()) {
hasSkipped = true;
this.advance();
}
return hasSkipped;
}
private skipComment(): boolean {
let hasSkipped = false;
if (this.isCommentStart()) {
hasSkipped = true;
this.advance(2);
while (!this.isEOT() && !this.isCommentEnd()) {
this.advance();
}
if (!this.isEOT()) {
this.advance(2);
}
}
return hasSkipped;
}
private skipUntilWhitespace(): void {
while (!this.isWhitespace() && !this.isEOT()) {
this.advance();
}
}
private readCharacter(): Optional<string> {
const character = this.getCurrentCharacter();
if (this.isBackslash()) {
this.advance();
const specialCharacter = toSpecialCharacter(this.getCurrentCharacter());
if (specialCharacter !== undefined) {
this.advance();
return specialCharacter;
}
}
this.advance();
return character;
}
private readText(): Token {
let text = '';
const start = this.createSourcePosition();
this.advance();
while (!this.isDoubleQuote()) {
const character = this.readCharacter();
if (character === undefined) {
return new ErrorToken(this.createSourceLocation(new SourceRange(start, this.createSourcePosition())));
}
text = text + character;
}
if (this.isDoubleQuote()) {
this.advance();
const end = this.createSourcePosition();
return new TextToken(this.createSourceLocation(new SourceRange(start, end)), text);
}
return new ErrorToken(this.createSourceLocation(new SourceRange(start, this.createSourcePosition())));
}
private readIdentifier(): Token {
let identifier = '';
const start = this.createSourcePosition();
while (!this.isEOT() && (this.isLetter() || this.isNumber())) {
identifier = identifier + this.getCurrentCharacter();
this.advance();
}
const location = this.createSourceLocation(new SourceRange(start, this.createSourcePosition()));
const tag = tagFromString(identifier);
return tag !== undefined ? new FixToken(location, tag) : new IdentifierToken(location, identifier);
}
private readFixToken(): Token {
const start = this.createSourcePosition();
const currentCharacter = this.getCurrentCharacter();
if (currentCharacter !== undefined) {
const tag = tagFromString(currentCharacter);
this.advance();
if (tag !== undefined) {
const nextCharacter = this.getCurrentCharacter();
if (nextCharacter !== undefined) {
const nextTag = tagFromString(nextCharacter);
if (nextTag === Tag.Equal) {
switch (tag) {
case Tag.Less:
this.advance();
return new FixToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Tag.LessEqual,
);
case Tag.Greater:
this.advance();
return new FixToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Tag.GreaterEqual,
);
case Tag.Colon:
this.advance();
return new FixToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Tag.ColonEqual,
);
}
}
}
return new FixToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
tag,
);
}
}
return new ErrorToken(this.createSourceLocation(new SourceRange(start, this.createSourcePosition())));
}
private readScaleFactor(): number {
let scaleFactor = 0;
if (!this.isNumber()) {
this.diagnosis.log(
new CompilerDiagnostic(
new SourceRange(this.createSourcePosition(), this.createSourcePosition()),
DiagnosticSeverity.Error,
'Scale factor must be at least one digit long.',
),
);
}
while (this.isNumber()) {
const current = this.getCurrentCharacter();
if (current !== undefined) {
scaleFactor = scaleFactor * 10 + Number.parseInt(current);
this.advance();
}
}
return scaleFactor;
}
private readNumber(): NumberToken {
const start = this.createSourcePosition();
let mantissa: Optional<string> = undefined;
let isHex = false;
let isFloat = false;
let exponent = 0;
while (this.isNumber() || this.isHexCharacter()) {
if (this.isHexCharacter()) {
isHex = true;
}
mantissa = mantissa === undefined ? this.getCurrentCharacter() : mantissa + this.getCurrentCharacter();
this.advance();
if (isFloat && isHex) {
this.skipUntilWhitespace();
this.diagnosis.log(
new CompilerDiagnostic(
new SourceRange(start, this.createSourcePosition()),
DiagnosticSeverity.Error,
'Floating point hex numbers are not supported.',
),
);
break;
}
if (isFloat) {
exponent = exponent - 1;
}
if (this.isEllipsis()) {
break;
}
if (this.isDot()) {
this.advance();
isFloat = true;
}
if (isFloat && this.getCurrentCharacter() === 'E') {
this.advance();
const current = this.getCurrentCharacter();
let prefix = 1;
if (current === '-' || current === '+') {
prefix = current === '-' ? -1 : 1;
this.advance();
}
exponent = prefix * this.readScaleFactor();
break;
}
}
if (mantissa === undefined) {
return new IntegerNumberToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
0,
);
}
const hexModifier = this.getCurrentCharacter() === 'H' || this.getCurrentCharacter() === 'X';
if (hexModifier) {
this.advance();
}
if (isHex && !hexModifier) {
this.skipUntilWhitespace();
this.diagnosis.log(
new CompilerDiagnostic(
new SourceRange(start, this.createSourcePosition()),
DiagnosticSeverity.Error,
'Missing "H" or "X" after hex number',
),
);
}
if (isHex || hexModifier) {
return new HexNumberToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Number.parseInt('0x' + mantissa),
);
}
if (isFloat) {
return new FloatNumberToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Number.parseInt(mantissa),
exponent,
);
}
return new IntegerNumberToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Number.parseInt(mantissa),
);
}
private readEllipsis(): FixToken {
const start = this.createSourcePosition();
this.advance();
this.advance();
return new FixToken(
this.createSourceLocation(new SourceRange(start, this.createSourcePosition())),
Tag.Ellipsis,
);
}
private read(): Token {
while (!this.isEOT() && (this.skipWhitespace() || this.skipComment())) {
/* do nothing, just skip */
}
if (this.isEOT()) {
return this.createEOTToken();
}
if (this.isDoubleQuote()) {
return this.readText();
}
if (this.isLetter()) {
return this.readIdentifier();
}
if (this.isNumber()) {
return this.readNumber();
}
if (this.isEllipsis()) {
return this.readEllipsis();
}
return this.readFixToken();
}
getUri(): string {
return this.uri;
}
saveState(): void {
this.stateStack.push(this.state.copy());
}
popSaveState(): void {
this.stateStack.pop();
}
restoreState(): void {
const lastState = this.stateStack.pop();
if (lastState === undefined) {
console.warn('No lexer state to restore.');
return;
}
this.state = lastState;
}
peek(n: number): Token {
if (n < 1) {
// TODO: replace with something cleverer
throw RangeError('peek range must be greater than 0.');
}
let cacheSize = this.state.tokenCache.length;
if (n <= cacheSize) {
return this.state.tokenCache[n - 1];
}
do {
if (cacheSize > 0) {
const lastToken = this.state.tokenCache[cacheSize - 1];
if (lastToken instanceof FixToken && lastToken.getTag() === Tag.EOT) {
return lastToken;
}
}
cacheSize = this.state.tokenCache.push(this.read());
} while (n > cacheSize);
return this.state.tokenCache[n - 1];
}
next(): Token {
const nextToken = this.peek(1);
this.state.tokenCache.shift();
return nextToken;
}
}