UNPKG

@keymanapp/kmc-model

Version:

Keyman Developer lexical model compiler

120 lines 4.05 kB
/** * Returns a word breaker that joins spans of an existing word breaker. * Spans are joined if they are connected by a delimiter. * * @param breaker The word breaker whose results will be decorated. * @param joiners What delimiters should be used to join spans. */ export function decorateWithJoin(breaker, joiners) { // Make a copy so that if the original array is accidentally mutated, it // won't affect the joiner. const delimiters = joiners.concat(); return function (input) { let originalSpans = breaker(input); // Implements a finite-state transducer (FST) where: // - Transductions are pushed onto a stack // - There are three states: // - empty stack (initial state) // - unjoined // - joined // - all three states are accepting states // - there is NO backtracking on the input // (hence the for-loop over the input tape) // - each state is a JavaScript callback (function) let state = emptyStack; let stack = []; for (let span of originalSpans) { state = state(span); } return stack; /******************* States *******************/ function emptyStack(span) { stack.push(span); if (isJoiner(span)) { return joined; } else { return unjoined; } } function unjoined(span) { // NB: stack has at least one span in it if (isJoiner(span)) { if (spansAreBackToBack(lastFrom(stack), span)) { concatLastSpanInStackWith(span); } else { // Spans are non-contiguous, so don't join them! stack.push(span); } return joined; } else { // Span cannot be joined stack.push(span); return unjoined; } } function joined(span) { // NB: stack has at least one span in it if (!spansAreBackToBack(lastFrom(stack), span)) { // Spans are non-contiguous and cannot be joined: stack.push(span); return unjoined; } // Spans are contiguous concatLastSpanInStackWith(span); if (isJoiner(span)) { return joined; } else { return unjoined; } } /****************** Helpers ******************/ function concatLastSpanInStackWith(span) { let lastIndex = stack.length - 1; let top = stack[lastIndex]; let joinedSpan = concatenateSpans(top, span); stack[lastIndex] = joinedSpan; } }; function isJoiner(span) { return includes(delimiters, span.text); } /** * Returns true when the spans are contiguous. * Order matters when calling this function! */ function spansAreBackToBack(former, latter) { return former.end === latter.start; } function concatenateSpans(former, latter) { if (latter.start !== former.end) { throw new Error(`Cannot concatenate non-contiguous spans: ${JSON.stringify(former)}/${JSON.stringify(latter)}`); } return { start: former.start, end: latter.end, length: former.length + latter.length, text: former.text + latter.text }; } /** * When Array.prototype.include() doesn't exist: */ function includes(haystack, needle) { for (let item of haystack) { if (item === needle) return true; } return false; } /** * Get the last element from the array. */ function lastFrom(array) { return array[array.length - 1]; } } //# sourceMappingURL=join-word-breaker-decorator.js.map