@cspell/cspell-tools
Version:
Tools to assist with the development of cSpell
199 lines • 7.15 kB
JavaScript
import { mkdir } from 'node:fs/promises';
import * as path from 'node:path';
import { opAppend, opMap, pipe } from '@cspell/cspell-pipe/sync';
import * as Trie from 'cspell-trie-lib';
import { writeSeqToFile } from './fileWriter.js';
import { getLogger } from './logger.js';
import { normalizeTargetWords } from './wordListParser.js';
const mkdirp = async (p) => {
await mkdir(p, { recursive: true });
};
// Indicate that a word list has already been processed.
const wordListHeader = `
# cspell-tools: keep-case no-split`;
const wordListHeaderLines = wordListHeader.split('\n').map((a) => a.trim());
export async function compileWordList(lines, destFilename, options) {
const finalLines = normalize(lines, options);
const directives = options.dictionaryDirectives ?? [];
const directivesLines = directives.map((a) => `# cspell-dictionary: ${a}`);
const finalSeq = pipe([...wordListHeaderLines, ...directivesLines, ''], opAppend(finalLines));
return createWordListTarget(destFilename)(finalSeq);
}
function normalize(lines, options) {
const filter = normalizeTargetWords(options);
const cleanLines = options.removeDuplicates ? removeDuplicates(lines) : lines;
const iter = pipe(cleanLines, filter);
if (!options.sort)
return iter;
const result = new Set(iter);
return [...result].sort();
}
function stripCompoundAFix(word) {
return word.replaceAll('*', '').replaceAll('+', '');
}
function* removeDuplicates(words) {
const wordSet = new Set(words);
const wordForms = new Map();
for (const word of wordSet) {
const lc = stripCompoundAFix(word.toLowerCase());
const forms = wordForms.get(lc) ?? [];
forms.push(word);
wordForms.set(lc, forms);
}
for (const forms of wordForms.values()) {
if (forms.length <= 1) {
yield* forms;
continue;
}
const mForms = removeDuplicateForms(forms);
// if (forms.some((a) => /^[*+]?col[*+]?$/.test(a))) {
// console.warn('Found col %o', { forms, mForms });
// }
if (mForms.size <= 1) {
for (const form of mForms.values()) {
yield* form;
}
continue;
}
// Handle upper / lower mix.
const words = [...mForms.keys()];
const lc = words[0].toLowerCase();
const lcForm = mForms.get(lc);
if (!lcForm) {
for (const form of mForms.values()) {
yield* form;
}
continue;
}
mForms.delete(lc);
const sLcForms = new Set(lcForm);
yield* lcForm;
if (sLcForms.has('*' + lc + '*'))
continue;
for (const forms of mForms.values()) {
for (const form of forms) {
if (sLcForms.has(form.toLowerCase()))
continue;
yield form;
}
}
}
}
/**
* solo
* optional_prefix*
* optional_suffix*
* required_prefix+
* required_suffix+
*/
var Flags;
(function (Flags) {
Flags[Flags["base"] = 0] = "base";
Flags[Flags["none"] = 1] = "none";
Flags[Flags["both"] = 2] = "both";
Flags[Flags["pfx"] = 4] = "pfx";
Flags[Flags["sfx"] = 8] = "sfx";
Flags[Flags["all"] = 15] = "all";
})(Flags || (Flags = {}));
function applyFlags(word, flags) {
if (flags === Flags.none)
return [word];
if (flags === Flags.all)
return ['*' + word + '*'];
if (flags === Flags.both)
return ['+' + word + '+'];
if (flags === Flags.pfx)
return [word + '+'];
if (flags === Flags.sfx)
return ['+' + word];
if (flags === (Flags.none | Flags.sfx))
return ['*' + word];
if (flags === (Flags.none | Flags.pfx))
return [word + '*'];
if (flags === (Flags.none | Flags.pfx | Flags.sfx))
return [word + '*', '*' + word];
if (flags === (Flags.none | Flags.both)) {
// the "correct" answer is [word, '+' + word + '+']
// but practically it makes sense to allow all combinations.
return ['*' + word + '*'];
}
if (flags === (Flags.none | Flags.both | Flags.sfx))
return [word, '+' + word + '*'];
if (flags === (Flags.none | Flags.both | Flags.pfx))
return [word, '*' + word + '+'];
if (flags === (Flags.both | Flags.pfx))
return ['*' + word + '+'];
if (flags === (Flags.both | Flags.sfx))
return ['+' + word + '*'];
if (flags === (Flags.both | Flags.pfx | Flags.sfx))
return ['+' + word + '*', '*' + word + '+'];
return ['+' + word, word + '+'];
}
function removeDuplicateForms(forms) {
function flags(word, flag = 0) {
const canBePrefix = word.endsWith('*');
const mustBePrefix = !canBePrefix && word.endsWith('+');
const isPrefix = canBePrefix || mustBePrefix;
const canBeSuffix = word.startsWith('*');
const mustBeSuffix = !canBeSuffix && word.startsWith('+');
const isSuffix = canBeSuffix || mustBeSuffix;
if (canBePrefix && canBeSuffix)
return flag | Flags.all;
if (mustBePrefix && mustBeSuffix)
return flag | Flags.both;
if (!isPrefix && !isSuffix)
return flag | Flags.none;
flag |= isPrefix && !isSuffix ? Flags.pfx : 0;
flag |= isSuffix && !isPrefix ? Flags.sfx : 0;
flag |= canBePrefix && !mustBeSuffix ? Flags.none : 0;
flag |= canBeSuffix && !mustBePrefix ? Flags.none : 0;
return flag;
}
const m = new Map();
for (const form of forms) {
const k = stripCompoundAFix(form);
m.set(k, flags(form, m.get(k)));
}
return new Map([...m.entries()].map(([form, flag]) => {
return [form, applyFlags(form, flag)];
}));
}
function createWordListTarget(destFilename) {
const target = createTarget(destFilename);
return (seq) => target(pipe(seq, opMap((a) => a + '\n')));
}
function createTarget(destFilename) {
const destDir = path.dirname(destFilename);
const pDir = mkdirp(destDir);
return async (seq) => {
await pDir;
await writeSeqToFile(seq, destFilename);
};
}
export async function compileTrie(words, destFilename, options) {
await createTrieTarget(destFilename, options)(words);
}
function createTrieTarget(destFilename, options) {
const target = createTarget(destFilename);
return async (words) => {
const log = getLogger();
log('Reading Words into Trie');
const base = options.base ?? 32;
const version = options.trie4 ? 4 : options.trie3 ? 3 : 1;
const root = Trie.buildTrie(words).root;
log('Reduce duplicate word endings');
const trie = Trie.consolidate(root);
log(`Writing to file ${path.basename(destFilename)}`);
await target(Trie.serializeTrie(trie, {
base,
comment: 'Built by cspell-tools.',
version,
}));
log(`Done writing to file ${path.basename(destFilename)}`);
};
}
export const __testing__ = {
wordListHeader,
removeDuplicates,
};
//# sourceMappingURL=wordListCompiler.js.map