UNPKG

llparse

Version:

[![Build Status](https://secure.travis-ci.org/indutny/llparse.svg)](http://travis-ci.org/indutny/llparse) [![NPM version](https://badge.fury.io/js/llparse.svg)](https://badge.fury.io/js/llparse)

531 lines (418 loc) 15.4 kB
'use strict'; const assert = require('assert'); const IR = require('llvm-ir'); const llparse = require('./'); const constants = llparse.constants; const MatchSequence = llparse.MatchSequence; const kOtherwise = llparse.symbols.kOtherwise; const kNoAdvance = llparse.symbols.kNoAdvance; const kCases = llparse.symbols.kCases; const CCONV = constants.CCONV; const BOOL = constants.BOOL; const INT = constants.INT; const TYPE_INPUT = constants.TYPE_INPUT; const TYPE_OUTPUT = constants.TYPE_OUTPUT; const TYPE_MATCH = constants.TYPE_MATCH; const TYPE_INDEX = constants.TYPE_INDEX; const TYPE_ERROR = constants.TYPE_ERROR; const TYPE_REASON = constants.TYPE_REASON; const ATTR_STATE = constants.ATTR_STATE; const ATTR_POS = constants.ATTR_POS; const ATTR_ENDPOS = constants.ATTR_ENDPOS; const ARG_STATE = constants.ARG_STATE; const ARG_POS = constants.ARG_POS; const ARG_ENDPOS = constants.ARG_ENDPOS; const SEQUENCE_COMPLETE = constants.SEQUENCE_COMPLETE; const SEQUENCE_PAUSE = constants.SEQUENCE_PAUSE; const SEQUENCE_MISMATCH = constants.SEQUENCE_MISMATCH; class Compiler { constructor(prefix) { this.prefix = prefix; this.ir = new IR(); this.state = this.ir.struct(`${this.prefix}_state`); this.signature = { node: this.ir.signature(TYPE_OUTPUT, [ [ this.state.ptr(), ATTR_STATE ], [ TYPE_INPUT, ATTR_POS ], [ TYPE_INPUT, ATTR_ENDPOS ] ]), callback: this.ir.signature(INT, [ this.state.ptr(), TYPE_INPUT, TYPE_INPUT ]) }; this.state.field(this.signature.node.ptr(), 'current'); this.state.field(TYPE_ERROR, 'error'); this.state.field(TYPE_REASON, 'reason'); this.state.field(TYPE_INDEX, 'index'); this.state.field(TYPE_MATCH, 'match'); this.nodeMap = new Map(); this.externalMap = new Map(); this.counter = new Map(); // redirect blocks by `fn` and `target` this.redirectCache = new Map(); const matchSequence = new MatchSequence(this.prefix, this.ir, this.state); this.matchSequence = matchSequence.build(); } build(root) { const rootFn = this.buildNode(root); this.buildInit(rootFn.ref()); this.buildParse(); return this.ir.build(); } buildInit(fn) { const sig = IR.signature(IR.void(), [ this.state.ptr() ]); const init = this.ir.fn(sig, this.prefix + '_init', [ ARG_STATE ]); const fields = { current: this.field(init, 'current'), error: this.field(init, 'error'), reason: this.field(init, 'reason'), index: this.field(init, 'index'), match: this.field(init, 'match') }; Object.keys(fields).forEach(key => init.body.push(fields[key])); const store = (field, type, value) => { init.body.push(IR._('store', [ type, value ], [ type.ptr(), field ]).void()); }; store(fields.current, fn.type, fn); store(fields.error, TYPE_ERROR, TYPE_ERROR.v(0)); store(fields.reason, TYPE_REASON, TYPE_REASON.v(null)); store(fields.index, TYPE_INDEX, TYPE_INDEX.v(0)); store(fields.match, TYPE_MATCH, TYPE_MATCH.v(0)); init.body.terminate('ret', IR.void()); return init; } buildParse() { const sig = IR.signature(TYPE_ERROR, [ this.state.ptr(), TYPE_INPUT, TYPE_INPUT ]); const parse = this.ir.fn(sig, this.prefix + '_execute', [ ARG_STATE, ARG_POS, ARG_ENDPOS ]); const body = parse.body; const nodeSig = this.signature.node; const currentPtr = this.field(parse, 'current'); body.push(currentPtr); const current = IR._('load', nodeSig.ptr(), [ nodeSig.ptr().ptr(), currentPtr ]); body.push(current); const call = IR._(`call ${CCONV}`, [ TYPE_OUTPUT, current, '(', this.state.ptr(), parse.arg(ARG_STATE), ',', TYPE_INPUT, parse.arg(ARG_POS), ',', TYPE_INPUT, parse.arg(ARG_ENDPOS), ')' ]); body.push(call); const errorPtr = this.field(parse, 'error'); body.push(errorPtr); const error = IR._('load', TYPE_ERROR, [ TYPE_ERROR.ptr(), errorPtr ]); body.push(error); const bitcast = IR._('bitcast', [ TYPE_OUTPUT, call, 'to', nodeSig.ptr() ]); body.push(bitcast); body.push(IR._('store', [ nodeSig.ptr(), bitcast ], [ nodeSig.ptr().ptr(), currentPtr ]).void()); body.terminate('ret', [ TYPE_ERROR, error ]); } createFn(node) { let index; if (this.counter.has(node.name)) index = this.counter.get(node.name); else index = 0; this.counter.set(node.name, index + 1); const name = `${this.prefix}__${node.name}` + `${index === 0 ? '' : '_' + index}`; const fn = this.ir.fn(this.signature.node, name, [ ARG_STATE, ARG_POS, ARG_ENDPOS ]); fn.visibility = 'internal'; fn.cconv = CCONV; // TODO(indutny): reassess `minsize`. Looks like it gives best performance // results right now, though. fn.attributes = 'nounwind minsize'; // Errors are assumed to be rarely called if (node instanceof llparse.node.Error) fn.attributes += ' cold writeonly'; return fn; } buildNode(node) { if (this.nodeMap.has(node)) return this.nodeMap.get(node); const fn = this.createFn(node); this.nodeMap.set(node, fn); let body = this.buildPrologue(node, fn); if (node instanceof llparse.node.Error) { const info = { node, fn, otherwise: null }; this.buildError(info, body); return fn; } const otherwise = node[kOtherwise]; assert.notStrictEqual(otherwise, null, `Node "${node.name}" must have \`.otherwise()\`/\`.skipTo()\``); const info = { node, fn, otherwise: otherwise.next, skip: otherwise.skip }; const trie = new llparse.Trie(node.name); const combined = trie.combine(node[kCases]); this.buildTrie(info, body, combined); return fn; } buildInvoke(info, body, pos, callback) { let external; if (this.externalMap.has(callback)) { external = this.externalMap.get(callback); } else { external = this.ir.declare(this.signature.callback, callback); external.attributes = 'alwaysinline'; this.externalMap.set(callback, external); } const returnType = this.signature.callback.ret; const call = IR._('call', [ returnType, external, '(', this.state.ptr(), info.fn.arg(ARG_STATE), ',', TYPE_INPUT, info.fn.arg(ARG_POS), ',', TYPE_INPUT, info.fn.arg(ARG_ENDPOS), ')' ]); body.push(call); const keys = Object.keys(info.node.map).map(key => key | 0); const s = this.buildSwitch(body, returnType, call, keys); s.cases.forEach((body, i) => { const subNode = info.node.map[keys[i]]; this.buildRedirect(info, body, pos, this.buildNode(subNode)); }); return s.otherwise; } buildSubTrie(info, body, pos, trie) { if (trie.type === 'next') return this.buildNext(info, body, pos, trie); const subFn = this.createFn(info.node); const subInfo = { fn: subFn, node: info.node, otherwise: info.otherwise }; const subBody = this.buildPrologue(info.node, subFn); this.buildTrie(subInfo, subBody, trie); this.buildRedirect(info, body, pos, subFn); return subFn; } buildPrologue(node, fn) { if (node[kNoAdvance]) return fn.body; // Check that we have enough chars to do the read fn.body.comment('--- Prologue ---'); fn.body.comment('if (pos != endpos)'); const cmp = IR._('icmp', [ 'ne', TYPE_INPUT, fn.arg(ARG_POS) ], fn.arg(ARG_ENDPOS)); fn.body.push(cmp); const branch = fn.body.branch('br', [ BOOL, cmp ]); // Return self when `pos === endpos` branch.right.name = 'prologue_end'; this.buildSelfReturn(fn, branch.right, true); branch.left.name = 'prologue_normal'; return branch.left; } buildSelfReturn(fn, body) { const bitcast = IR._('bitcast', [ fn.type.ptr(), fn, 'to', TYPE_OUTPUT ]); body.push(bitcast); body.terminate('ret', [ TYPE_OUTPUT, bitcast ]); } buildTrie(info, body, trie) { const fn = info.fn; // Increment `pos` if not invoking external callback let pos = { current: fn.arg(ARG_POS), next: null }; // NOTE: `sequence` has loop inside it - so it isn't going to use // `pos.next` anyway (as a matter of fact it doesn't get `pos` as an // argument at all) if (info.node[kNoAdvance] || trie && trie.type === 'sequence') { pos.next = pos.current; } else { body.comment('next = pos + 1'); pos.next = IR._('getelementptr', TYPE_INPUT.to, [ TYPE_INPUT, fn.arg(ARG_POS) ], [ INT, INT.v(1) ]); body.push(pos.next); } if (info.node instanceof llparse.node.Invoke) body = this.buildInvoke(info, body, pos, info.node.callback); // Traverse the `trie` if (trie === null) { // no-op } else if (trie.type === 'single') { body = this.buildSingle(info, body, pos, trie.children); } else if (trie.type === 'sequence') { // NOTE: do not send `pos` here! (see comment above) const seq = this.buildSequence(info, body, trie); // NOTE: `sequence` implementation loops if there's enough data body = seq.body; pos = seq.pos; } else { // NOTE: `next` type must be parsed in `buildSubTrie` throw new Error('Unexpected trie node type: ' + trie.type); } // Do not increment `pos` when falling through, unless we're skipping const otherwisePos = { current: pos.current, next: pos.current }; if (info.skip) otherwisePos.next = pos.next; this.buildRedirect(info, body, otherwisePos, this.buildNode(info.otherwise)); return body; } buildSwitch(body, type, what, values) { const cases = []; cases.push(IR.label('otherwise')); cases.push('['); values.forEach((value, i) => { cases.push(type, type.v(value)); cases.push(',', IR.label(`case_${i}`)); }); cases.push(']'); const blocks = body.terminate('switch', [ type, what ], cases); blocks[0].name = 'switch_otherwise'; for (let i = 0; i < values.length; i++) { const v = values[i] < 0 ? 'm' + (-values[i]) : values[i]; blocks[i + 1].name = 'case_' + v; } return { otherwise: blocks[0], cases: blocks.slice(1) }; } buildSingle(info, body, pos, children) { // Load the character const current = IR._('load', TYPE_INPUT.to, [ TYPE_INPUT, info.fn.arg(ARG_POS) ]); body.push(current); const keys = children.map(child => child.key); const s = this.buildSwitch(body, TYPE_INPUT.to, current, keys); const otherwise = s.otherwise; const cases = s.cases; cases.forEach((target, i) => { this.buildSubTrie(info, target, pos, children[i].child); }); return otherwise; } buildSequence(info, body, trie) { assert(!info.node[kNoAdvance]); const seq = this.ir.data(trie.select); const cast = IR._('getelementptr inbounds', seq.type.to, [ seq.type, seq ], [ INT, INT.v(0) ], [ INT, INT.v(0) ]); body.push(cast); const returnType = this.matchSequence.type.ret; const call = IR._(`call ${CCONV}`, [ returnType, this.matchSequence, '(', this.state.ptr(), info.fn.arg(ARG_STATE), ',', TYPE_INPUT, info.fn.arg(ARG_POS), ',', TYPE_INPUT, info.fn.arg(ARG_ENDPOS), ',', TYPE_INPUT, cast, ',', INT, INT.v(seq.type.to.length), ')' ]); body.push(call); const status = IR._('extractvalue', [ returnType, call ], INT.v(returnType.lookup('status'))); body.push(status); const current = IR._('extractvalue', [ returnType, call ], INT.v(returnType.lookup('current'))); body.push(current); // This is lame, but it is easier to do it this way // (Optimizer will remove it, if it isn't needed) body.comment('next = pos + 1'); const next = IR._('getelementptr', TYPE_INPUT.to, [ TYPE_INPUT, current ], [ INT, INT.v(1) ]); body.push(next); const pos = { current, next }; const s = this.buildSwitch(body, INT, status, [ SEQUENCE_COMPLETE, SEQUENCE_PAUSE, SEQUENCE_MISMATCH ]); // No other values are allowed s.otherwise.terminate('unreachable'); const complete = s.cases[0]; const pause = s.cases[1]; const mismatch = s.cases[2]; this.buildSubTrie(info, complete, pos, trie.children); this.buildSelfReturn(info.fn, pause); // Not equal // Reset `state.index` on mismatch return { pos, body: mismatch }; } buildNext(info, body, pos, trie) { return this.buildRedirect(info, body, pos, this.buildNode(trie.next), trie.value); } buildRedirect(info, body, pos, target, value = null) { const fn = info.fn; if (this.redirectCache.has(fn) && this.redirectCache.get(fn).has(target)) { const cached = this.redirectCache.get(fn).get(target); if (cached.phi) { assert(value, '`.match()` and `.select()` with the same target'); cached.phi.append([ '[', TYPE_MATCH.v(value), ',', body.ref(), ']' ]); } else { assert(!value, '`.match()` and `.select()` with the same target'); } body.terminate('br', cached.target); return; } // Split, so that others could join us from code block above const redirect = body.jump('br'); redirect.name = body.name + '_redirect'; let phi = null; // Set `state.match` if needed if (value !== null) { redirect.comment('state.match = phi'); phi = IR._('phi', [ TYPE_MATCH, '[', TYPE_MATCH.v(value), ',', body.ref(), ']' ]); redirect.push(phi); const matchField = this.field(fn, 'match'); redirect.push(matchField); redirect.push(IR._('store', [ TYPE_MATCH, phi ], [ TYPE_MATCH.ptr(), matchField ]).void()); } if (!this.redirectCache.has(fn)) this.redirectCache.set(fn, new Map()); this.redirectCache.get(fn).set(target, { phi, target: redirect }); // TODO(indutny): looks like `musttail` gives worse performance when calling // Invoke nodes (possibly others too). const call = IR._(`musttail call ${CCONV}`, [ TYPE_OUTPUT, target, '(', this.state.ptr(), fn.arg(ARG_STATE), ',', TYPE_INPUT, pos.next, ',', TYPE_INPUT, fn.arg(ARG_ENDPOS), ')' ]); redirect.push(call); redirect.terminate('ret', [ TYPE_OUTPUT, call ]); } buildError(info, body) { const code = info.node.code; const reason = this.ir.cstr(info.node.reason); const codeField = this.field(info.fn, 'error'); body.push(codeField); const reasonField = this.field(info.fn, 'reason'); body.push(reasonField); const castReason = IR._('bitcast', [ reason.type, reason, 'to', TYPE_REASON ]); body.push(castReason); body.push(IR._('store', [ TYPE_ERROR, TYPE_ERROR.v(code) ], [ TYPE_ERROR.ptr(), codeField ]).void()); body.push(IR._('store', [ TYPE_REASON, castReason ], [ TYPE_REASON.ptr(), reasonField ]).void()); return body.terminate('ret', [ TYPE_OUTPUT, TYPE_OUTPUT.v(null) ]); } field(fn, name) { const stateArg = fn.arg(ARG_STATE); return IR._('getelementptr', this.state, [ stateArg.type, stateArg ], [ INT, INT.v(0) ], [ INT, INT.v(this.state.lookup(name)) ]); } } module.exports = Compiler;