UNPKG

@cspell/cspell-tools

Version:
263 lines 11.4 kB
import * as path from 'node:path'; import { pipeAsync, toArray } from '@cspell/cspell-pipe'; import { opAwaitAsync, opMapAsync } from '@cspell/cspell-pipe/operators'; import { opConcatMap, opMap, pipe } from '@cspell/cspell-pipe/sync'; import { isFileListSource, isFilePath, isFileSource } from '../config/index.js'; import { checkShasumFile, updateChecksumForFiles } from '../shasum/index.js'; import { stringToRegExp } from '../util/textRegex.js'; import { createAllowedSplitWordsFromFiles, createWordsCollectionFromFiles } from './createWordsCollection.js'; import { logWithTimestamp } from './logWithTimestamp.js'; import { readTextFile } from './readers/readTextFile.js'; import { streamSourceWordsFromFile } from './streamSourceWordsFromFile.js'; import { compileTrie, compileWordList } from './wordListCompiler.js'; import { normalizeTargetWords } from './wordListParser.js'; export async function compile(request, options) { const { targets } = request; // console.warn('Request: %o', request); const rootDir = path.resolve(request.rootDir || '.'); const cwd = options?.cwd; const targetOptions = { sort: request.sort, generateNonStrict: request.generateNonStrict, removeDuplicates: request.removeDuplicates, }; const conditional = options?.conditionalBuild || false; const checksumFile = resolveChecksumFile(request.checksumFile || conditional, rootDir); const dictionaryDirectives = request.dictionaryDirectives; const dependencies = new Set(); for (const target of targets) { const keep = options?.filter?.(target) ?? true; if (!keep) continue; const adjustedTarget = { ...targetOptions, ...target }; // console.warn('adjustedTarget: %o', adjustedTarget); const deps = await compileTarget(adjustedTarget, request, { rootDir, cwd, conditional, checksumFile, dictionaryDirectives, }); deps.forEach((dep) => dependencies.add(dep)); } if (checksumFile && dependencies.size) { logWithTimestamp('%s', `Update checksum: ${checksumFile}`); await updateChecksumForFiles(checksumFile, [...dependencies], { root: path.dirname(checksumFile) }); } logWithTimestamp(`Complete.`); return; } function resolveChecksumFile(checksumFile, root) { const cFilename = (typeof checksumFile === 'string' && checksumFile) || (checksumFile && './checksum.txt') || undefined; const file = cFilename && path.resolve(root, cFilename); // console.warn('%o', { checksumFile, cFilename, file }); return file; } export async function compileTarget(target, options, compileOptions) { logWithTimestamp(`Start compile: ${target.name}`); const { rootDir, cwd, checksumFile, conditional } = compileOptions; const { format, sources, trieBase, sort = true, generateNonStrict = false, excludeWordsFrom = [], excludeWordsNotFoundIn = [], excludeWordsMatchingRegex, } = target; const targetDirectory = path.resolve(rootDir, target.targetDirectory ?? cwd ?? process.cwd()); const dictionaryDirectives = target.dictionaryDirectives ?? compileOptions.dictionaryDirectives; const removeDuplicates = target.removeDuplicates ?? false; const excludeFromFilter = await createExcludeFilter(excludeWordsFrom); const includeFromFilter = await createIncludeFilter(excludeWordsNotFoundIn); const excludeRegexFilter = createExcludeRegexFilter(excludeWordsMatchingRegex); const excludeFilter = (word) => { return excludeFromFilter(word) && includeFromFilter(word) && excludeRegexFilter(word); }; const generateNonStrictTrie = target.generateNonStrict ?? true; const name = normalizeTargetName(target.name); const useTrie = format.startsWith('trie'); const filename = resolveTarget(name, targetDirectory, useTrie, target.compress ?? false); const filesToProcessAsync = pipeAsync(readSourceList(sources, rootDir), opMapAsync((src) => readFileSource(src, options)), opAwaitAsync()); const filesToProcess = await toArray(filesToProcessAsync); const normalizer = normalizeTargetWords({ sort: useTrie || sort, generateNonStrict, filter: excludeFilter, dictionaryDirectives, // removeDuplicates, // Add this in if we use it. }); const checksumRoot = (checksumFile && path.dirname(checksumFile)) || rootDir; const deps = [ ...calculateDependencies(filename, filesToProcess, [...excludeWordsFrom, ...excludeWordsNotFoundIn], checksumRoot), ]; if (conditional && checksumFile) { const check = await checkShasumFile(checksumFile, deps, checksumRoot).catch(() => undefined); if (check?.passed) { logWithTimestamp(`Skip ${target.name}, nothing changed.`); return []; } } const action = useTrie ? async (words, dst) => { return compileTrie(pipe(words, normalizer), dst, { base: trieBase, sort: false, trie3: format === 'trie3', trie4: format === 'trie4', generateNonStrict: generateNonStrictTrie, dictionaryDirectives: undefined, // removeDuplicates, // Add this in if we use it. }); } : async (words, dst) => { return compileWordList(pipe(words, normalizer), dst, { sort, generateNonStrict, dictionaryDirectives, removeDuplicates, }); }; await processFiles(action, filesToProcess, filename); logWithTimestamp(`Done compile: ${target.name}`); return deps; } function calculateDependencies(targetFile, filesToProcess, excludeFiles, rootDir) { const dependencies = new Set(); addDependency(targetFile); excludeFiles?.forEach((f) => addDependency(f)); filesToProcess.forEach((f) => addDependency(f.src)); return dependencies; function addDependency(filename) { const rel = path.relative(rootDir, filename); dependencies.add(rel); dependencies.add(rel.replace(/\.aff$/, '.dic')); dependencies.add(rel.replace(/\.dic$/, '.aff')); } } function rel(filePath) { return path.relative(process.cwd(), filePath); } async function processFiles(action, filesToProcess, mergeTarget) { const toProcess = filesToProcess; const dst = mergeTarget; const words = pipe(toProcess, opMap((ftp) => { const { src } = ftp; logWithTimestamp('Process "%s" to "%s"', rel(src), rel(dst)); return ftp; }), opConcatMap(function* (ftp) { yield* ftp.words; logWithTimestamp('Done processing %s', rel(ftp.src)); }), // opMap((a) => (console.warn(a), a)) logProgress()); await action(words, dst); logWithTimestamp('Done "%s"', rel(dst)); } function resolveTarget(name, directory, useTrie, useGzCompress) { const ext = ((useTrie && '.trie') || '.txt') + ((useGzCompress && '.gz') || ''); const filename = name + ext; return path.resolve(directory, filename); } function readSourceList(sources, rootDir) { async function* mapSrc() { for (const src of sources) { if (isFilePath(src)) { yield { filename: path.resolve(rootDir, src) }; continue; } if (isFileSource(src)) { yield { ...src, filename: path.resolve(rootDir, src.filename) }; continue; } if (isFileListSource(src)) { const { listFile, ...rest } = src; const absListFile = path.resolve(rootDir, listFile); const listFileDir = path.dirname(absListFile); const files = await readFileList(absListFile); for (const filename of files) { yield { ...rest, filename: path.resolve(listFileDir, filename) }; } } } } return mapSrc(); } async function readFileList(fileList) { const content = await readTextFile(fileList); return content .split('\n') .map((a) => a.trim()) .filter((a) => !!a); } async function readFileSource(fileSource, sourceOptions) { const { filename, keepRawCase = sourceOptions.keepRawCase || false, split = sourceOptions.split || false, maxDepth, storeSplitWordsAsCompounds, minCompoundLength, } = fileSource; const legacy = split === 'legacy'; const splitWords = legacy ? false : split; // console.warn('fileSource: %o,\n targetOptions %o, \n opt: %o', fileSource, targetOptions, opt); const allowedSplitWords = await createAllowedSplitWordsFromFiles(fileSource.allowedSplitWords || sourceOptions.allowedSplitWords); const readerOptions = { maxDepth, legacy, splitWords, keepCase: keepRawCase, allowedSplitWords, storeSplitWordsAsCompounds, minCompoundLength, }; logWithTimestamp(`Reading ${path.basename(filename)}`); const stream = await streamSourceWordsFromFile(filename, readerOptions); logWithTimestamp(`Done reading ${path.basename(filename)}`); const f = { src: filename, words: stream, }; return f; } function normalizeTargetName(name) { return name.replace(/((\.txt|\.dic|\.aff|\.trie)(\.gz)?)?$/, '').replaceAll(/[^\p{L}\p{M}.\w\\/-]/gu, '_'); } function logProgress(freq = 100_000) { function* logProgress(iter) { const _freq = freq; let count = 0; for (const v of iter) { ++count; if (!(count % _freq)) { logWithTimestamp('Progress: Words Processed - %s', count.toLocaleString()); } yield v; } } return logProgress; } /** * @param excludeWordsFrom - List of files to read words from. * @returns a function that returns true if the word is not in the exclude list. */ async function createExcludeFilter(excludeWordsFrom) { if (!excludeWordsFrom || !excludeWordsFrom.length) return () => true; const excludeWords = await createWordsCollectionFromFiles(excludeWordsFrom); return (word) => !excludeWords.has(word, word.toUpperCase() !== word); } /** * @param includeWordsFrom - List of files to read words from. * @returns a function that returns true if the word is in the include list. */ async function createIncludeFilter(includeWordsFrom) { if (!includeWordsFrom || !includeWordsFrom.length) return () => true; const excludeWords = await createWordsCollectionFromFiles(includeWordsFrom); return (word) => excludeWords.has(word, word.toUpperCase() !== word); } /** * @param excludeWordsMatchingRegex - List of regex patterns to exclude. * @returns a function that returns true if the word does not match any of the regex patterns. */ function createExcludeRegexFilter(excludeWordsMatchingRegex) { if (!excludeWordsMatchingRegex || !excludeWordsMatchingRegex.length) return () => true; const regexes = excludeWordsMatchingRegex .map((a) => stringToRegExp(a)) .filter((a, i) => { if (a) return true; console.warn('Invalid regex: "%s"', excludeWordsMatchingRegex[i]); return false; }); return (word) => !regexes.some((r) => r.test(word)); } //# sourceMappingURL=compile.js.map