UNPKG

ccgjs

Version:

A combinatory categorial grammar (CCG) library for the web.

238 lines (193 loc) 5.61 kB
import Reader, { MachineReadableCCGNodeT, MachineReadableCCGNodeL, MachineReadableCCG, } from './ccg.reader'; export type NodeT = MachineReadableCCGNodeT & { nIndex?: number }; export type NodeL = MachineReadableCCGNodeL & { nIndex?: number; wIndex?: number; }; export type Node = { value: NodeT | NodeL; left?: Node; right?: Node; }; export type Derivation = { from: number; to: number; ccgCat: string; opr?: string; }; export type Metadata = { isParsed: boolean; sentence: string; words: Array<string>; ccgCats: Array<string>; derivations?: Array<Array<Derivation>>; height: number; nodes: Array<Node>; }; export type IndexedWordMapper = { [key: number]: Node }; export type ToStringOptions = { pretty?: boolean }; export default class Tree { public root?: Node; public metadata?: Metadata; public mappedIndexedWords?: IndexedWordMapper; private stringBuilder?: string; constructor(str: string = '') { if (str.length > 0) { this.metadata = { isParsed: false, sentence: '', words: [], ccgCats: [], height: 0, nodes: [], }; this.constructTree(str); } } public traverse(fn: Function): any { return fn(this.root); } public toString(opts?: ToStringOptions): string { if (this.root) { this.stringBuilder = ''; this.toStringUtil(this.root, 0, opts); return this.stringBuilder.substring(1); } return ''; } public buildDerivations(): Array<Array<Derivation>> { if (!this.root) { return []; } this.metadata!.derivations = []; for (let i = 0; i < this.metadata!.height!; i++) { this.metadata!.derivations.push([]); } this.buildDerivUtil(this.root!); return this.metadata!.derivations; } private constructTree(str: string): void { const reader = new Reader(str); if (reader.read()) { const result: MachineReadableCCG = reader.result as MachineReadableCCG; this.buildTree(result); this.metadata!.isParsed = true; } } private buildTree(obj: MachineReadableCCG): void { this.buildTreeUtil(obj, 1); } private buildTreeUtil( obj: MachineReadableCCG, level: number, parent?: Node, dir?: string ): void { const node: Node = { value: obj.node }; if (this.root === undefined) { this.mappedIndexedWords = {}; this.root = node; } const nIndex = this.metadata?.nodes.length; node.value.nIndex = nIndex; this.metadata?.nodes.push(node); if (this.isNodeL(node)) { const nodeL: NodeL = node.value as NodeL; const wIndex = this.metadata!.words.length; nodeL.wIndex = wIndex; this.metadata!.words.push(nodeL.word); this.metadata!.ccgCats.push(nodeL.ccgCat); this.mappedIndexedWords![wIndex] = node; if (this.metadata?.sentence === '') { this.metadata!.sentence = nodeL.word; } else { this.metadata!.sentence += ` ${nodeL.word}`; } } this.metadata!.height = Math.max(this.metadata?.height!, level); if (dir === 'left') { parent!.left = node; } if (dir === 'right') { parent!.right = node; } obj.left && this.buildTreeUtil(obj.left, level + 1, node, 'left'); obj.right && this.buildTreeUtil(obj.right, level + 1, node, 'right'); } private toStringUtil( node: Node, level: number, opts?: ToStringOptions ): void { if (opts?.pretty) { const indents = 2 * level; this.stringBuilder += '\n' + ' '.repeat(indents) + '('; } else { this.stringBuilder += ' ('; } if (this.isNodeT(node)) { const n: NodeT = node.value as NodeT; this.stringBuilder += [ '<T', `${n.ccgCat}`, `${n.head}`, `${n.dtrs}>`, ].join(' '); } else if (this.isNodeL(node)) { const n: NodeL = node.value as NodeL; this.stringBuilder += [ '<L', `${n.ccgCat}`, `${n.modPOSTag}`, `${n.origPOSTag}`, `${n.word}`, `${n.predArgCat}>`, ].join(' '); } else { this.stringBuilder += '<ill-formed CCG Node>'; } node.left && this.toStringUtil(node.left, level + 1, opts); node.right && this.toStringUtil(node.right, level + 1, opts); this.stringBuilder += ')'; } private isNodeT(node: Node): boolean { return (node.value as NodeT).head !== undefined; } private isNodeL(node: Node): boolean { return (node.value as NodeL).word !== undefined; } private buildDerivUtil(node: Node, dir?: string): Array<number> { if (this.isNodeL(node)) { const nodeL: NodeL = node.value as NodeL; const index = nodeL.wIndex!; const derivation: Derivation = { from: index, to: index, ccgCat: nodeL.ccgCat, }; this.metadata!.derivations![0].push(derivation); if (dir === 'left') { return [index, -1, 1]; } return [-1, index, 1]; } const derivLeft = this.buildDerivUtil(node.left!, 'left'); let derivRight: Array<null> | Array<number> = [null, null, null]; if (node.right) { derivRight = this.buildDerivUtil(node.right!, 'right'); } const derivation: Derivation = { from: derivLeft[0], to: derivRight[1] ?? derivLeft[0], ccgCat: node.value.ccgCat, opr: (node.value as NodeT).head !== 0 ? '<' : '>', }; const bottom = Math.max(derivLeft[2], derivRight[2] ?? -1); this.metadata!.derivations![bottom].push(derivation); return [derivation.from, derivation.to, bottom + 1]; } }