UNPKG

@keymanapp/kmc-model

Version:

Keyman Developer lexical model compiler

68 lines (66 loc) 2.93 kB
!function(){try{var e="undefined"!=typeof window?window:"undefined"!=typeof global?global:"undefined"!=typeof self?self:{},n=(new Error).stack;n&&(e._sentryDebugIds=e._sentryDebugIds||{},e._sentryDebugIds[n]="38aea5d1-2e0d-5cee-a124-9c5c6624cbf7")}catch(e){}}(); import { ModelCompilerError, ModelCompilerMessages } from "./model-compiler-messages.js"; export function decorateWithScriptOverrides(breaker, option) { if (option !== 'break-words-at-spaces') { throw new ModelCompilerError(ModelCompilerMessages.Error_UnsupportedScriptOverride({ option })); } /** * Matches if when a span contains a Southeast-Asian letter or mark anywhere. * This makes it a candidate for joining. * * See: tools/create-override-script-regexp.ts for how this RegExp was * generated. * * Last updated for Unicode 13.0.0. */ const HAS_SOUTHEAST_ASIAN_LETTER = /[\u0E01-\u0E3A\u0E40-\u0E4E\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECD\u0EDC-\u0EDF\u1000-\u103F\u1050-\u108F\u109A-\u109D\u1780-\u17D3\u17D7\u17DC\u17DD\u30A1-\u30FA\u30FC-\u30FF]/; return function enhancedBreaker(phrase) { let originalSpans = breaker(phrase); if (originalSpans.length === 0) { return []; } let outputSpans = [originalSpans.shift()]; for (let currentSpan of originalSpans) { let previousSpan = lastFrom(outputSpans); if (spansAreBackToBack(previousSpan, currentSpan) && hasSouthEastAsianLetter(previousSpan) && hasSouthEastAsianLetter(currentSpan)) { // previous span SHOULD be joined with current! outputSpans[outputSpans.length - 1] = concatenateSpans(previousSpan, currentSpan); } else { outputSpans.push(currentSpan); } } return outputSpans; }; function hasSouthEastAsianLetter(span) { return HAS_SOUTHEAST_ASIAN_LETTER.test(span.text); } /** * Returns true when the spans are contiguous. * Order matters when calling this function! */ function spansAreBackToBack(former, latter) { return former.end === latter.start; } function concatenateSpans(former, latter) { if (latter.start !== former.end) { throw new Error(`Cannot concatenate non-contiguous spans: ${JSON.stringify(former)}/${JSON.stringify(latter)}`); } return { start: former.start, end: latter.end, length: former.length + latter.length, text: former.text + latter.text }; } /** * Get the last element from the array. */ function lastFrom(array) { return array[array.length - 1]; } } //# sourceMappingURL=script-overrides-decorator.js.map //# debugId=38aea5d1-2e0d-5cee-a124-9c5c6624cbf7