UNPKG

imba

Version:

Intuitive and powerful language for building webapps that fly

681 lines (567 loc) 20.5 kB
/*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ /** * Create a syntax highighter with a fully declarative JSON style lexer description * using regular expressions. */ import { Token, TokenizationResult } from './token'; import { IState } from './types'; import * as monarchCommon from './common'; export interface ITokenizationSupport { getInitialState(): IState; // add offsetDelta to each of the returned indices tokenize(line: string, state: IState, offsetDelta: number): TokenizationResult; } const CACHE_STACK_DEPTH = 10; function statePart(state: string, index: number): string { return state.split('.')[index]; } /** * Reuse the same stack elements up to a certain depth. */ class MonarchStackElementFactory { private static readonly _INSTANCE = new MonarchStackElementFactory(CACHE_STACK_DEPTH); public static create(parent: MonarchStackElement | null, state: string): MonarchStackElement { return this._INSTANCE.create(parent, state); } private readonly _maxCacheDepth: number; private readonly _entries: { [stackElementId: string]: MonarchStackElement; }; constructor(maxCacheDepth: number) { this._maxCacheDepth = maxCacheDepth; this._entries = Object.create(null); } public create(parent: MonarchStackElement | null, state: string): MonarchStackElement { if (parent !== null && parent.depth >= this._maxCacheDepth) { // no caching above a certain depth return new MonarchStackElement(parent, state); } let stackElementId = MonarchStackElement.getStackElementId(parent); if (stackElementId.length > 0) { stackElementId += '|'; } stackElementId += state; let result = this._entries[stackElementId]; if (result) { return result; } result = new MonarchStackElement(parent, state); this._entries[stackElementId] = result; return result; } } class MonarchStackElement { public readonly parent: MonarchStackElement | null; public readonly state: string; public readonly depth: number; constructor(parent: MonarchStackElement | null, state: string) { this.parent = parent; this.state = state; this.depth = (this.parent ? this.parent.depth : 0) + 1; } public static getStackElementId(element: MonarchStackElement | null): string { let result = ''; while (element !== null) { if (result.length > 0) { result += '|'; } result += element.state; element = element.parent; } return result; } private static _equals(a: MonarchStackElement | null, b: MonarchStackElement | null): boolean { while (a !== null && b !== null) { if (a === b) { return true; } if (a.state !== b.state) { return false; } a = a.parent; b = b.parent; } if (a === null && b === null) { return true; } return false; } public get indent(): number { return this.state.lastIndexOf('\t') - this.state.indexOf('\t'); } public get scope(): string { return this.part(2); } public get detail(): string { return this.part(2); } public part(index: number): string { return this.state.split('.')[index] } public equals(other: MonarchStackElement): boolean { return MonarchStackElement._equals(this, other); } public push(state: string): MonarchStackElement { return MonarchStackElementFactory.create(this, state); } public pop(): MonarchStackElement | null { return this.parent; } public popall(): MonarchStackElement { let result: MonarchStackElement = this; while (result.parent) { result = result.parent; } return result; } public switchTo(state: string): MonarchStackElement { return MonarchStackElementFactory.create(this.parent, state); } } /** * Reuse the same line states up to a certain depth. */ class MonarchLineStateFactory { private static readonly _INSTANCE = new MonarchLineStateFactory(CACHE_STACK_DEPTH); public static create(stack: MonarchStackElement): MonarchLineState { return this._INSTANCE.create(stack); } private readonly _maxCacheDepth: number; private readonly _entries: { [stackElementId: string]: MonarchLineState; }; constructor(maxCacheDepth: number) { this._maxCacheDepth = maxCacheDepth; this._entries = Object.create(null); } public create(stack: MonarchStackElement): MonarchLineState { if (stack !== null && stack.depth >= this._maxCacheDepth) { // no caching above a certain depth return new MonarchLineState(stack); } let stackElementId = MonarchStackElement.getStackElementId(stack); let result = this._entries[stackElementId]; if (result) { return result; } result = new MonarchLineState(stack); this._entries[stackElementId] = result; return result; } } class MonarchLineState implements IState { public readonly stack: MonarchStackElement; constructor( stack: MonarchStackElement ) { this.stack = stack; } public clone(): IState { return MonarchLineStateFactory.create(this.stack); } public equals(other: IState): boolean { if (!(other instanceof MonarchLineState)) { return false; } if (!this.stack.equals(other.stack)) { return false; } return true; } } interface IMonarchTokensCollector { enterMode(startOffset: number, modeId: string): void; emit(startOffset: number, type: string, stack?: MonarchStackElement): Token; } class MonarchClassicTokensCollector implements IMonarchTokensCollector { private _tokens: Token[]; private _language: string | null; private _lastTokenType: string | null; private _lastToken: Token; constructor() { this._tokens = []; this._language = null; this._lastToken = new Token(0, 'start', 'imba'); this._lastTokenType = null; } public enterMode(startOffset: number, modeId: string): void { this._language = modeId; } public emit(startOffset: number, type: string, stack?: MonarchStackElement): Token { if (this._lastTokenType === type && false) { console.log('add to last token', type); return this._lastToken; } let token = new Token(startOffset, type, this._language!); this._lastTokenType = type; this._lastToken = token; this._tokens.push(token); return token; } public finalize(endState: MonarchLineState): TokenizationResult { return new TokenizationResult(this._tokens, endState); } } export type ILoadStatus = { loaded: true; } | { loaded: false; promise: Promise<void>; }; export class MonarchTokenizer implements ITokenizationSupport { private readonly _modeId: string; private readonly _lexer: monarchCommon.ILexer; public _profile: boolean; constructor(modeId: string, lexer: monarchCommon.ILexer) { this._modeId = modeId; this._lexer = lexer; this._profile = false; } public dispose(): void { } public getLoadStatus(): ILoadStatus { return { loaded: true }; } public getInitialState(): IState { let rootState = MonarchStackElementFactory.create(null, this._lexer.start!); return MonarchLineStateFactory.create(rootState); } public tokenize(line: string, lineState: IState, offsetDelta: number): TokenizationResult { let tokensCollector = new MonarchClassicTokensCollector(); let endLineState = this._tokenize(line, <MonarchLineState>lineState, offsetDelta, tokensCollector); return tokensCollector.finalize(endLineState); } private _tokenize(line: string, lineState: MonarchLineState, offsetDelta: number, collector: IMonarchTokensCollector): MonarchLineState { return this._myTokenize(line, lineState, offsetDelta, collector); } private _safeRuleName(rule: monarchCommon.IRule | null): string { if (rule) { return rule.name; } return '(unknown)'; } private _rescope(from: string, to: string, tokens: string[], toState: string): void { let a = (from || '').split('-'); // if-body let b = (to || '').split('-'); // if if (from == to) return; let diff = 1; // find out their common base while (a[diff] && a[diff] == b[diff]) { diff++ } // console.log(`rescope ${from} -> ${to}`,a.length,b.length,diff); let level = a.length; while (level > diff) { // console.log('popping',a[a.length - 1]); tokens.push('pop.' + a[--level] + '.' + level); } while (b.length > diff) { // console.log('pushing',b[diff]); let id = 'push.' + b[diff++] + '.' + (diff - 1); if (toState) { let indent = statePart(toState, 1); id += '.' + indent; } tokens.push(id); } } private _myTokenize(line: string, lineState: MonarchLineState, offsetDelta: number, tokensCollector: IMonarchTokensCollector): MonarchLineState { tokensCollector.enterMode(offsetDelta, this._modeId); const lineLength = line.length; let stack = lineState.stack; let lastToken: any = null; let pos = 0; let profile = this._profile; // regular expression group matching // these never need cloning or equality since they are only used within a line match interface GroupMatching { matches: string[]; rule: monarchCommon.IRule | null; groups: { action: monarchCommon.FuzzyAction; matched: string; }[]; } let groupMatching: GroupMatching | null = null; // See https://github.com/Microsoft/monaco-editor/issues/1235: // Evaluate rules at least once for an empty line let forceEvaluation = true; let append: string[] = []; let tries = 0; let rules: monarchCommon.IRule[] | null = []; let rulesState = null; let hangPos = -1; while (forceEvaluation || pos < lineLength) { tries++; if (tries > 1000) { if (pos == hangPos) { console.log('infinite recursion', pos, lineLength, stack, tokensCollector); throw 'infinite recursion in tokenizer?'; } else { hangPos = pos; tries = 0; } } const pos0 = pos; const stackLen0 = stack.depth; const groupLen0 = groupMatching ? groupMatching.groups.length : 0; const state = stack.state; let matches: string[] | null = null; let matched: string | null = null; let action: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null; let rule: monarchCommon.IRule | null = null; // check if we need to process group matches first if (groupMatching) { matches = groupMatching.matches; const groupEntry = groupMatching.groups.shift()!; matched = groupEntry.matched; action = groupEntry.action; rule = groupMatching.rule; // cleanup if necessary if (groupMatching.groups.length === 0) { groupMatching = null; } } else { // otherwise we match on the token stream if (!forceEvaluation && pos >= lineLength) { // nothing to do break; } forceEvaluation = false; // if(state !== rulesState){ // get the rules for this state rules = this._lexer.tokenizer[state]; if (!rules) { rules = monarchCommon.findRules(this._lexer, state); // do parent matching if (!rules) { throw monarchCommon.createError(this._lexer, 'tokenizer state is not defined: ' + state); } } // } // try each rule until we match let restOfLine = line.substr(pos); for (const rule of rules) { if (rule.string !== undefined) { if (restOfLine[0] === rule.string) { matches = [rule.string]; matched = rule.string; action = rule.action; break; } } else if (pos === 0 || !rule.matchOnlyAtLineStart) { if (profile) { rule.stats.count++; let now = performance.now(); matches = restOfLine.match(rule.regex); rule.stats.time += (performance.now() - now); if (matches) { rule.stats.hits++; } } else { matches = restOfLine.match(rule.regex); } if (matches) { matched = matches[0]; action = rule.action; break; } } } } // We matched 'rule' with 'matches' and 'action' if (!matches) { matches = ['']; matched = ''; } if (!action) { // bad: we didn't match anything, and there is no action to take // we need to advance the stream or we get progress trouble if (pos < lineLength) { matches = [line.charAt(pos)]; matched = matches[0]; } action = this._lexer.defaultToken; } if (matched === null) { // should never happen, needed for strict null checking break; } // advance stream pos += matched.length; // maybe call action function (used for 'cases') while (monarchCommon.isFuzzyAction(action) && monarchCommon.isIAction(action) && action.test) { action = action.test(matched, matches, state, pos === lineLength); } let result: monarchCommon.FuzzyAction | monarchCommon.FuzzyAction[] | null = null; // set the result: either a string or an array of actions if (typeof action === 'string' || Array.isArray(action)) { result = action; } else if (action.group) { result = action.group; } else if (action.token !== null && action.token !== undefined) { // do $n replacements? if (action.tokenSubst) { result = monarchCommon.substituteMatches(this._lexer, action.token, matched, matches, state); } else { result = action.token; } // state transformations if (action.goBack) { // back up the stream.. pos = Math.max(0, pos - action.goBack); } if (action.switchTo && typeof action.switchTo === 'string') { // let indenting = action.switchTo.indexOf('\t') > 0; // if(indenting) tokensCollector.emit(pos0 + offsetDelta, 'push', stack); // can do a quick check just for the action? let nextState = monarchCommon.substituteMatches(this._lexer, action.switchTo, matched, matches, state); // switch state without a push... if (nextState[0] === '@') { nextState = nextState.substr(1); // peel off starting '@' } if (!monarchCommon.findRules(this._lexer, nextState)) { throw monarchCommon.createError(this._lexer, 'trying to switch to a state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule)); } else { let from = stack.scope; let to = statePart(nextState, 2); if (from !== to) this._rescope(from, to, append, nextState); stack = stack.switchTo(nextState); } } else if (action.transform && typeof action.transform === 'function') { throw monarchCommon.createError(this._lexer, 'action.transform not supported'); } else if (action.next) { if (action.next === '@push') { if (stack.depth >= this._lexer.maxStack) { throw monarchCommon.createError(this._lexer, 'maximum tokenizer stack size reached: [' + stack.state + ',' + stack.parent!.state + ',...]'); } else { stack = stack.push(state); } } else if (action.next === '@pop') { if (stack.depth <= 1) { throw monarchCommon.createError(this._lexer, 'trying to pop an empty stack in rule: ' + this._safeRuleName(rule)); } else { let prev = stack; stack = stack.pop()!; let from = statePart(prev.state, 2) let to = statePart(stack.state, 2) if (from !== to) this._rescope(from, to, append, stack.state); } } else if (action.next === '@popall') { stack = stack.popall(); } else { // let indenting = action.next.indexOf('\t') > 0; // if(indenting) tokensCollector.emit(pos0 + offsetDelta, 'push', stack); let nextState = monarchCommon.substituteMatches(this._lexer, action.next, matched, matches, state); if (nextState[0] === '@') { nextState = nextState.substr(1); // peel off starting '@' } let nextScope = statePart(nextState, 2); if (!monarchCommon.findRules(this._lexer, nextState)) { throw monarchCommon.createError(this._lexer, 'trying to set a next state \'' + nextState + '\' that is undefined in rule: ' + this._safeRuleName(rule)); } else { if (nextScope != stack.scope) this._rescope(stack.scope || '', nextScope, append, nextState); stack = stack.push(nextState); } } } if (action.log && typeof (action.log) === 'string') { monarchCommon.log(this._lexer, this._lexer.languageId + ': ' + monarchCommon.substituteMatches(this._lexer, action.log, matched, matches, state)); } if (action.mark) { tokensCollector.emit(pos0 + offsetDelta, action.mark, stack); } } // check result if (result === null) { throw monarchCommon.createError(this._lexer, 'lexer rule has no well-defined action in rule: ' + this._safeRuleName(rule)); } // is the result a group match? if (Array.isArray(result)) { if (groupMatching && groupMatching.groups.length > 0) { throw monarchCommon.createError(this._lexer, 'groups cannot be nested: ' + this._safeRuleName(rule)); } if (matches.length !== result.length + 1) { throw monarchCommon.createError(this._lexer, 'matched number of groups does not match the number of actions in rule: ' + this._safeRuleName(rule)); } let totalLen = 0; for (let i = 1; i < matches.length; i++) { totalLen += matches[i].length; } if (totalLen !== matched.length) { throw monarchCommon.createError(this._lexer, 'with groups, all characters should be matched in consecutive groups in rule: ' + this._safeRuleName(rule)); } groupMatching = { rule: rule, matches: matches, groups: [] }; for (let i = 0; i < result.length; i++) { groupMatching.groups[i] = { action: result[i], matched: matches[i + 1] }; } pos -= matched.length; // call recursively to initiate first result match continue; } else { // regular result // check for '@rematch' if (result === '@rematch') { pos -= matched.length; matched = ''; // better set the next state too.. matches = null; result = ''; } // check progress if (matched.length === 0) { if (lineLength === 0 || stackLen0 !== stack.depth || state !== stack.state || (!groupMatching ? 0 : groupMatching.groups.length) !== groupLen0) { if (typeof result == 'string' && result) tokensCollector.emit(pos + offsetDelta, result, stack); while (append.length > 0) { tokensCollector.emit(pos + offsetDelta, append.shift() as string, stack); } continue; } else { throw monarchCommon.createError(this._lexer, 'no progress in tokenizer in rule: ' + this._safeRuleName(rule)); } } // return the result (and check for brace matching) // todo: for efficiency we could pre-sanitize tokenPostfix and substitutions let tokenType: string | null = null; if (monarchCommon.isString(result) && result.indexOf('@brackets') === 0) { let rest = result.substr('@brackets'.length); let bracket = findBracket(this._lexer, matched); if (!bracket) { throw monarchCommon.createError(this._lexer, '@brackets token returned but no bracket defined as: ' + matched); } tokenType = monarchCommon.sanitize(bracket.token + rest); } else { let token = (result === '' ? '' : result + this._lexer.tokenPostfix); tokenType = monarchCommon.sanitize(token); } let token = tokensCollector.emit(pos0 + offsetDelta, tokenType, stack); token.stack = stack; if (lastToken && lastToken != token) { lastToken.value = line.slice(lastToken.offset - offsetDelta, pos0); } lastToken = token; while (append.length > 0) { tokensCollector.emit(pos + offsetDelta, append.shift() as string, stack); } } } if (lastToken && !lastToken.value) { lastToken.value = line.slice(lastToken.offset - offsetDelta); } return MonarchLineStateFactory.create(stack); } } /** * Searches for a bracket in the 'brackets' attribute that matches the input. */ function findBracket(lexer: monarchCommon.ILexer, matched: string) { if (!matched) { return null; } matched = monarchCommon.fixCase(lexer, matched); let brackets = lexer.brackets; for (const bracket of brackets) { if (bracket.open === matched) { return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Open }; } else if (bracket.close === matched) { return { token: bracket.token, bracketType: monarchCommon.MonarchBracket.Close }; } } return null; } export function createTokenizationSupport(modeId: string, lexer: monarchCommon.ILexer): ITokenizationSupport { return new MonarchTokenizer(modeId, lexer); }