@cspell/cspell-tools
Version:
Tools to assist with the development of cSpell
263 lines • 11.4 kB
JavaScript
import * as path from 'node:path';
import { pipeAsync, toArray } from '@cspell/cspell-pipe';
import { opAwaitAsync, opMapAsync } from '@cspell/cspell-pipe/operators';
import { opConcatMap, opMap, pipe } from '@cspell/cspell-pipe/sync';
import { isFileListSource, isFilePath, isFileSource } from '../config/index.js';
import { checkShasumFile, updateChecksumForFiles } from '../shasum/index.js';
import { stringToRegExp } from '../util/textRegex.js';
import { createAllowedSplitWordsFromFiles, createWordsCollectionFromFiles } from './createWordsCollection.js';
import { logWithTimestamp } from './logWithTimestamp.js';
import { readTextFile } from './readers/readTextFile.js';
import { streamSourceWordsFromFile } from './streamSourceWordsFromFile.js';
import { compileTrie, compileWordList } from './wordListCompiler.js';
import { normalizeTargetWords } from './wordListParser.js';
export async function compile(request, options) {
const { targets } = request;
// console.warn('Request: %o', request);
const rootDir = path.resolve(request.rootDir || '.');
const cwd = options?.cwd;
const targetOptions = {
sort: request.sort,
generateNonStrict: request.generateNonStrict,
removeDuplicates: request.removeDuplicates,
};
const conditional = options?.conditionalBuild || false;
const checksumFile = resolveChecksumFile(request.checksumFile || conditional, rootDir);
const dictionaryDirectives = request.dictionaryDirectives;
const dependencies = new Set();
for (const target of targets) {
const keep = options?.filter?.(target) ?? true;
if (!keep)
continue;
const adjustedTarget = { ...targetOptions, ...target };
// console.warn('adjustedTarget: %o', adjustedTarget);
const deps = await compileTarget(adjustedTarget, request, {
rootDir,
cwd,
conditional,
checksumFile,
dictionaryDirectives,
});
deps.forEach((dep) => dependencies.add(dep));
}
if (checksumFile && dependencies.size) {
logWithTimestamp('%s', `Update checksum: ${checksumFile}`);
await updateChecksumForFiles(checksumFile, [...dependencies], { root: path.dirname(checksumFile) });
}
logWithTimestamp(`Complete.`);
return;
}
function resolveChecksumFile(checksumFile, root) {
const cFilename = (typeof checksumFile === 'string' && checksumFile) || (checksumFile && './checksum.txt') || undefined;
const file = cFilename && path.resolve(root, cFilename);
// console.warn('%o', { checksumFile, cFilename, file });
return file;
}
export async function compileTarget(target, options, compileOptions) {
logWithTimestamp(`Start compile: ${target.name}`);
const { rootDir, cwd, checksumFile, conditional } = compileOptions;
const { format, sources, trieBase, sort = true, generateNonStrict = false, excludeWordsFrom = [], excludeWordsNotFoundIn = [], excludeWordsMatchingRegex, } = target;
const targetDirectory = path.resolve(rootDir, target.targetDirectory ?? cwd ?? process.cwd());
const dictionaryDirectives = target.dictionaryDirectives ?? compileOptions.dictionaryDirectives;
const removeDuplicates = target.removeDuplicates ?? false;
const excludeFromFilter = await createExcludeFilter(excludeWordsFrom);
const includeFromFilter = await createIncludeFilter(excludeWordsNotFoundIn);
const excludeRegexFilter = createExcludeRegexFilter(excludeWordsMatchingRegex);
const excludeFilter = (word) => {
return excludeFromFilter(word) && includeFromFilter(word) && excludeRegexFilter(word);
};
const generateNonStrictTrie = target.generateNonStrict ?? true;
const name = normalizeTargetName(target.name);
const useTrie = format.startsWith('trie');
const filename = resolveTarget(name, targetDirectory, useTrie, target.compress ?? false);
const filesToProcessAsync = pipeAsync(readSourceList(sources, rootDir), opMapAsync((src) => readFileSource(src, options)), opAwaitAsync());
const filesToProcess = await toArray(filesToProcessAsync);
const normalizer = normalizeTargetWords({
sort: useTrie || sort,
generateNonStrict,
filter: excludeFilter,
dictionaryDirectives,
// removeDuplicates, // Add this in if we use it.
});
const checksumRoot = (checksumFile && path.dirname(checksumFile)) || rootDir;
const deps = [
...calculateDependencies(filename, filesToProcess, [...excludeWordsFrom, ...excludeWordsNotFoundIn], checksumRoot),
];
if (conditional && checksumFile) {
const check = await checkShasumFile(checksumFile, deps, checksumRoot).catch(() => undefined);
if (check?.passed) {
logWithTimestamp(`Skip ${target.name}, nothing changed.`);
return [];
}
}
const action = useTrie
? async (words, dst) => {
return compileTrie(pipe(words, normalizer), dst, {
base: trieBase,
sort: false,
trie3: format === 'trie3',
trie4: format === 'trie4',
generateNonStrict: generateNonStrictTrie,
dictionaryDirectives: undefined,
// removeDuplicates, // Add this in if we use it.
});
}
: async (words, dst) => {
return compileWordList(pipe(words, normalizer), dst, {
sort,
generateNonStrict,
dictionaryDirectives,
removeDuplicates,
});
};
await processFiles(action, filesToProcess, filename);
logWithTimestamp(`Done compile: ${target.name}`);
return deps;
}
function calculateDependencies(targetFile, filesToProcess, excludeFiles, rootDir) {
const dependencies = new Set();
addDependency(targetFile);
excludeFiles?.forEach((f) => addDependency(f));
filesToProcess.forEach((f) => addDependency(f.src));
return dependencies;
function addDependency(filename) {
const rel = path.relative(rootDir, filename);
dependencies.add(rel);
dependencies.add(rel.replace(/\.aff$/, '.dic'));
dependencies.add(rel.replace(/\.dic$/, '.aff'));
}
}
function rel(filePath) {
return path.relative(process.cwd(), filePath);
}
async function processFiles(action, filesToProcess, mergeTarget) {
const toProcess = filesToProcess;
const dst = mergeTarget;
const words = pipe(toProcess, opMap((ftp) => {
const { src } = ftp;
logWithTimestamp('Process "%s" to "%s"', rel(src), rel(dst));
return ftp;
}), opConcatMap(function* (ftp) {
yield* ftp.words;
logWithTimestamp('Done processing %s', rel(ftp.src));
}),
// opMap((a) => (console.warn(a), a))
logProgress());
await action(words, dst);
logWithTimestamp('Done "%s"', rel(dst));
}
function resolveTarget(name, directory, useTrie, useGzCompress) {
const ext = ((useTrie && '.trie') || '.txt') + ((useGzCompress && '.gz') || '');
const filename = name + ext;
return path.resolve(directory, filename);
}
function readSourceList(sources, rootDir) {
async function* mapSrc() {
for (const src of sources) {
if (isFilePath(src)) {
yield { filename: path.resolve(rootDir, src) };
continue;
}
if (isFileSource(src)) {
yield { ...src, filename: path.resolve(rootDir, src.filename) };
continue;
}
if (isFileListSource(src)) {
const { listFile, ...rest } = src;
const absListFile = path.resolve(rootDir, listFile);
const listFileDir = path.dirname(absListFile);
const files = await readFileList(absListFile);
for (const filename of files) {
yield { ...rest, filename: path.resolve(listFileDir, filename) };
}
}
}
}
return mapSrc();
}
async function readFileList(fileList) {
const content = await readTextFile(fileList);
return content
.split('\n')
.map((a) => a.trim())
.filter((a) => !!a);
}
async function readFileSource(fileSource, sourceOptions) {
const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, minCompoundLength, } = fileSource;
const legacy = split === 'legacy';
const splitWords = legacy ? false : split;
// console.warn('fileSource: %o,\n targetOptions %o, \n opt: %o', fileSource, targetOptions, opt);
const allowedSplitWords = await createAllowedSplitWordsFromFiles(fileSource.allowedSplitWords || sourceOptions.allowedSplitWords);
const readerOptions = {
maxDepth,
legacy,
splitWords,
keepCase: keepRawCase,
allowedSplitWords,
storeSplitWordsAsCompounds,
minCompoundLength,
};
logWithTimestamp(`Reading ${path.basename(filename)}`);
const stream = await streamSourceWordsFromFile(filename, readerOptions);
logWithTimestamp(`Done reading ${path.basename(filename)}`);
const f = {
src: filename,
words: stream,
};
return f;
}
function normalizeTargetName(name) {
return name.replace(/((\.txt|\.dic|\.aff|\.trie)(\.gz)?)?$/, '').replaceAll(/[^\p{L}\p{M}.\w\\/-]/gu, '_');
}
function logProgress(freq = 100_000) {
function* logProgress(iter) {
const _freq = freq;
let count = 0;
for (const v of iter) {
++count;
if (!(count % _freq)) {
logWithTimestamp('Progress: Words Processed - %s', count.toLocaleString());
}
yield v;
}
}
return logProgress;
}
/**
* @param excludeWordsFrom - List of files to read words from.
* @returns a function that returns true if the word is not in the exclude list.
*/
async function createExcludeFilter(excludeWordsFrom) {
if (!excludeWordsFrom || !excludeWordsFrom.length)
return () => true;
const excludeWords = await createWordsCollectionFromFiles(excludeWordsFrom);
return (word) => !excludeWords.has(word, word.toUpperCase() !== word);
}
/**
* @param includeWordsFrom - List of files to read words from.
* @returns a function that returns true if the word is in the include list.
*/
async function createIncludeFilter(includeWordsFrom) {
if (!includeWordsFrom || !includeWordsFrom.length)
return () => true;
const excludeWords = await createWordsCollectionFromFiles(includeWordsFrom);
return (word) => excludeWords.has(word, word.toUpperCase() !== word);
}
/**
* @param excludeWordsMatchingRegex - List of regex patterns to exclude.
* @returns a function that returns true if the word does not match any of the regex patterns.
*/
function createExcludeRegexFilter(excludeWordsMatchingRegex) {
if (!excludeWordsMatchingRegex || !excludeWordsMatchingRegex.length)
return () => true;
const regexes = excludeWordsMatchingRegex
.map((a) => stringToRegExp(a))
.filter((a, i) => {
if (a)
return true;
console.warn('Invalid regex: "%s"', excludeWordsMatchingRegex[i]);
return false;
});
return (word) => !regexes.some((r) => r.test(word));
}
//# sourceMappingURL=compile.js.map