@keymanapp/kmc-model
Version:
Keyman Developer lexical model compiler
265 lines • 11.4 kB
JavaScript
/*
lexical-model-compiler.ts: base file for lexical model compiler.
*/
import ts from "typescript";
import { createTrieDataStructure } from "./build-trie.js";
import { ModelDefinitions } from "./model-definitions.js";
import { decorateWithJoin } from "./join-word-breaker-decorator.js";
import { decorateWithScriptOverrides } from "./script-overrides-decorator.js";
import { ModelCompilerError, ModelCompilerMessageContext, ModelCompilerMessages } from "./model-compiler-messages.js";
import { callbacks, setCompilerCallbacks } from "./compiler-callbacks.js";
;
;
;
/**
* @public
* Compiles a .model.ts file to a .model.js. The compiler does not read or write
* from filesystem or network directly, but relies on callbacks for all external
* IO.
*/
export class LexicalModelCompiler {
/**
* Initialize the compiler. There are currently no options
* specific to the lexical model compiler
* @param callbacks - Callbacks for external interfaces, including message
* reporting and file io
* @param options - Compiler options
* @returns always succeeds and returns true
*/
async init(callbacks, _options) {
setCompilerCallbacks(callbacks);
return true;
}
/**
* Compiles a .model.ts file to .model.js. Returns an object containing binary
* artifacts on success. The files are passed in by name, and the compiler
* will use callbacks as passed to the {@link LexicalModelCompiler.init}
* function to read any input files by disk.
* @param infile - Path to source file. Path will be parsed to find relative
* references in the .kmn file, such as icon or On Screen
* Keyboard file
* @param outfile - Path to output file. The file will not be written to, but
* will be included in the result for use by
* {@link LexicalModelCompiler.write}.
* @returns Binary artifacts on success, null on failure.
*/
async run(inputFilename, outputFilename) {
try {
let modelSource = this.loadFromFilename(inputFilename);
let containingDirectory = callbacks.path.dirname(inputFilename);
let code = this.generateLexicalModelCode('<unknown>', modelSource, containingDirectory);
const result = {
artifacts: {
js: {
data: new TextEncoder().encode(code),
filename: outputFilename ?? inputFilename.replace(/\.model\.ts$/, '.model.js')
}
}
};
return result;
}
catch (e) {
callbacks.reportMessage(e instanceof ModelCompilerError
? e.event
: ModelCompilerMessages.Fatal_UnexpectedException({ e: e }));
return null;
}
}
/**
* Write artifacts from a successful compile to disk, via callbacks methods.
* The artifacts written may include:
*
* - .model.js file - Javascript lexical model for web and touch platforms
*
* @param artifacts - object containing artifact binary data to write out
* @returns always returns true
*/
async write(artifacts) {
callbacks.fs.writeFileSync(artifacts.js.filename, artifacts.js.data);
return true;
}
/**
* @internal
* Loads a lexical model's source module from the given filename.
*
* @param filename - path to the model source file.
*/
loadFromFilename(filename) {
const data = callbacks.loadFile(filename);
if (!data) {
throw new ModelCompilerError(ModelCompilerMessages.Error_ModelFileNotFound({ filename }));
}
let sourceCode = new TextDecoder().decode(data);
// Compile the module to JavaScript code.
// NOTE: transpile module does a very simple TS to JS compilation.
// It DOES NOT check for types!
let compilationOutput = ts.transpile(sourceCode, {
// Our runtime only supports ES3 with Node/CommonJS modules on Android 5.0.
// When we drop Android 5.0 support, we can update this to a `ScriptTarget`
// matrix against target version of Keyman, here and in
// lexical-model-compiler.ts.
target: ts.ScriptTarget.ES3,
module: ts.ModuleKind.CommonJS,
});
// Turn the module into a function in which we can inject a global.
let moduleCode = '(function(exports){' + compilationOutput + '})';
// Run the module; its exports will be assigned to `moduleExports`.
let moduleExports = {};
let module = eval(moduleCode);
module(moduleExports);
if (!moduleExports['__esModule'] || !moduleExports['default']) {
ModelCompilerMessageContext.filename = filename;
throw new ModelCompilerError(ModelCompilerMessages.Error_NoDefaultExport());
}
return moduleExports['default'];
}
/**
* @internal
* Returns the generated code for the model that will ultimately be loaded by
* the LMLayer worker. This code contains all model parameters, and specifies
* word breakers and auxilary functions that may be required.
*
* @param model_id - The model ID. TODO: not sure if this is actually required!
* @param modelSource - A specification of the model to compile
* @param sourcePath - Where to find auxilary sources files
*/
generateLexicalModelCode(model_id, modelSource, sourcePath) {
// TODO: add metadata in comment
const filePrefix = `(function() {\n'use strict';\n`;
const fileSuffix = `})();`;
let func = filePrefix;
//
// Emit the model as code and data
//
switch (modelSource.format) {
case "custom-1.0":
let sources = modelSource.sources.map(function (source) {
return new TextDecoder().decode(callbacks.loadFile(callbacks.path.join(sourcePath, source)));
});
func += this.transpileSources(sources).join('\n');
func += `LMLayerWorker.loadModel(new ${modelSource.rootClass}());\n`;
break;
case "fst-foma-1.0":
throw new ModelCompilerError(ModelCompilerMessages.Error_UnimplementedModelFormat({ format: modelSource.format }));
case "trie-1.0":
// Convert all relative path names to paths relative to the enclosing
// directory. This way, we'll read the files relative to the model.ts
// file, rather than the current working directory.
let filenames = modelSource.sources.map(filename => callbacks.path.join(sourcePath, filename));
let definitions = new ModelDefinitions(modelSource);
func += definitions.compileDefinitions();
// Needs the actual searchTermToKey closure...
// Which needs the actual applyCasing closure as well.
func += `LMLayerWorker.loadModel(new models.TrieModel(${createTrieDataStructure(filenames, definitions.searchTermToKey)}, {\n`;
let wordBreakerSourceCode = compileWordBreaker(normalizeWordBreakerSpec(modelSource.wordBreaker));
func += ` wordBreaker: ${wordBreakerSourceCode},\n`;
// START - the lexical mapping option block
func += ` searchTermToKey: ${definitions.compileSearchTermToKey()},\n`;
if (modelSource.languageUsesCasing != null) {
func += ` languageUsesCasing: ${modelSource.languageUsesCasing},\n`;
} // else leave undefined.
if (modelSource.languageUsesCasing) {
func += ` applyCasing: ${definitions.compileApplyCasing()},\n`;
}
// END - the lexical mapping option block.
if (modelSource.punctuation) {
func += ` punctuation: ${JSON.stringify(modelSource.punctuation)},\n`;
}
func += `}));\n`;
break;
default:
throw new ModelCompilerError(ModelCompilerMessages.Error_UnknownModelFormat({ format: modelSource.format }));
}
func += fileSuffix;
return func;
}
/**
* @internal
*/
transpileSources(sources) {
return sources.map((source) => ts.transpileModule(source, {
compilerOptions: {
target: ts.ScriptTarget.ES3,
module: ts.ModuleKind.None,
}
}).outputText);
}
;
}
;
/**
* @internal
* Returns a JavaScript expression (as a string) that can serve as a word
* breaking function.
*/
function compileWordBreaker(spec) {
let wordBreakerCode = compileInnerWordBreaker(spec.use);
if (spec.joinWordsAt) {
wordBreakerCode = compileJoinDecorator(spec, wordBreakerCode);
}
if (spec.overrideScriptDefaults) {
wordBreakerCode = compileScriptOverrides(spec, wordBreakerCode);
}
return wordBreakerCode;
}
/**
* @internal
*/
function compileJoinDecorator(spec, existingWordBreakerCode) {
// Bundle the source of the join decorator, as an IIFE,
// like this: (function join(breaker, joiners) {/*...*/}(breaker, joiners))
// The decorator will run IMMEDIATELY when the model is loaded,
// by the LMLayer returning the decorated word breaker to the
// LMLayer model.
let joinerExpr = JSON.stringify(spec.joinWordsAt);
return `(${decorateWithJoin.toString()}(${existingWordBreakerCode}, ${joinerExpr}))`;
}
function compileScriptOverrides(spec, existingWordBreakerCode) {
return `(${decorateWithScriptOverrides.toString()}(${existingWordBreakerCode}, '${spec.overrideScriptDefaults}'))`;
}
/**
* @internal
* Compiles the base word breaker, that may be decorated later.
* Returns the source code of a JavaScript expression.
*/
function compileInnerWordBreaker(spec) {
if (typeof spec === "string") {
// It must be a builtin word breaker, so just instantiate it.
return `wordBreakers['${spec}']`;
}
else {
// It must be a function:
return spec.toString()
// Note: the .toString() might just be the property name, but we want a
// plain function:
.replace(/^wordBreak(ing|er)\b/, 'function');
}
}
/**
* @internal
* Given a word breaker specification in any of the messy ways,
* normalizes it to a common form that the compiler can deal with.
*/
function normalizeWordBreakerSpec(wordBreakerSpec) {
if (wordBreakerSpec == undefined) {
// Use the default word breaker when it's unspecified
return { use: 'default' };
}
else if (isSimpleWordBreaker(wordBreakerSpec)) {
// The word breaker was passed as a literal function; use its source code.
return { use: wordBreakerSpec };
}
else if (wordBreakerSpec.use) {
return wordBreakerSpec;
}
else {
throw new ModelCompilerError(ModelCompilerMessages.Error_UnknownWordBreaker({ spec: wordBreakerSpec.toString() }));
}
}
/**
* @internal
*/
function isSimpleWordBreaker(spec) {
return typeof spec === "function" || spec === "default" || spec === "ascii";
}
//# sourceMappingURL=lexical-model-compiler.js.map