cspell
Version:
A Spelling Checker for Code!
134 lines (118 loc) • 5.45 kB
text/typescript
import * as Text from './util/text';
import * as TextRange from './util/TextRange';
import { SpellingDictionary } from './SpellingDictionary';
import { Sequence } from 'gensequence';
import * as RxPat from './Settings/RegExpPatterns';
export interface ValidationOptions extends IncludeExcludeOptions {
maxNumberOfProblems?: number;
maxDuplicateProblems?: number;
minWordLength?: number;
// words to always flag as an error
flagWords?: string[];
ignoreWords?: string[];
words?: string[];
userWords?: string[];
allowCompoundWords?: boolean;
}
export interface IncludeExcludeOptions {
ignoreRegExpList?: (RegExp|string)[];
includeRegExpList?: (RegExp|string)[];
}
export interface WordRangeAcc {
textOffset: Text.TextOffset;
isIncluded: boolean;
rangePos: number;
}
export const defaultMaxNumberOfProblems = 200;
export const defaultMaxDuplicateProblems = 5;
export const defaultMinWordLength = 4;
export const minWordSplitLen = 3;
export function validateText(
text: string,
dict: SpellingDictionary,
options: ValidationOptions
): Sequence<Text.TextOffset> {
const {
maxNumberOfProblems = defaultMaxNumberOfProblems,
maxDuplicateProblems = defaultMaxDuplicateProblems,
minWordLength = defaultMinWordLength,
flagWords = [],
ignoreWords = [],
allowCompoundWords = false,
} = options;
const setOfFlagWords = new Set(flagWords);
const mapOfProblems = new Map<string, number>();
const includeRanges = calcTextInclusionRanges(text, options);
const ignoreWordsSet = new Set(ignoreWords.map(a => a.toLowerCase()));
return Text.extractWordsFromCode(text)
// Filter out any words that are NOT in the include ranges.
.scan<WordRangeAcc>((acc, textOffset) => {
let { rangePos } = acc;
const wordEndPos = textOffset.offset + textOffset.text.length;
const wordStartPos = textOffset.offset;
while (includeRanges[rangePos] && includeRanges[rangePos].endPos <= wordStartPos) {
rangePos += 1;
}
const range = includeRanges[rangePos];
const isIncluded = range && range.startPos < wordEndPos;
const isPartial = isIncluded && (range.endPos < wordEndPos || range.startPos > wordStartPos);
if (isPartial) {
// We need to chop the text.
const offset = Math.max(range.startPos, wordStartPos);
const offsetEnd = Math.min(range.endPos, wordEndPos);
const a = offset - wordStartPos;
const b = offsetEnd - wordStartPos;
const text = textOffset.text.slice(a, b);
return { rangePos, isIncluded, textOffset: { ...textOffset, text, offset } };
}
return { rangePos, isIncluded, textOffset };
}, { textOffset: { text: '', offset: 0 }, isIncluded: false, rangePos: 0})
.filter(wr => wr.isIncluded)
.map(wr => wr.textOffset)
.map(wo => ({...wo, isFlagged: setOfFlagWords.has(wo.text) }))
.filter(wo => wo.isFlagged || wo.text.length >= minWordLength )
.map(wo => ({
...wo,
isFound: isWordValid(dict, wo, text, allowCompoundWords)
}))
.filter(wo => wo.isFlagged || ! wo.isFound )
.filter(wo => !ignoreWordsSet.has(wo.text.toLowerCase()))
.filter(wo => !RxPat.regExHexDigits.test(wo.text)) // Filter out any hex numbers
.filter(wo => !RxPat.regExRepeatedChar.test(wo.text)) // Filter out any repeated characters like xxxxxxxxxx
// Remove anything that is in the ignore list.
.filter(wo => {
const word = wo.text.toLowerCase();
// Keep track of the number of times we have seen the same problem
mapOfProblems.set(word, (mapOfProblems.get(word) || 0) + 1);
// Filter out if there is too many
return mapOfProblems.get(word)! < maxDuplicateProblems;
})
.take(maxNumberOfProblems);
}
export function calcTextInclusionRanges(
text: string,
options: IncludeExcludeOptions
): TextRange.MatchRange[] {
const {
ignoreRegExpList = [],
includeRegExpList = [],
} = options;
const filteredIncludeList = includeRegExpList.filter(a => !!a);
const finalIncludeList = filteredIncludeList.length ? filteredIncludeList : ['.*'];
const includeRanges = TextRange.excludeRanges(
TextRange.findMatchingRangesForPatterns(finalIncludeList, text),
TextRange.findMatchingRangesForPatterns(ignoreRegExpList, text)
);
return includeRanges;
}
export function isWordValid(dict: SpellingDictionary, wo: Text.TextOffset, text: string, allowCompounds: boolean) {
const firstTry = hasWordCheck(dict, wo.text, allowCompounds);
return firstTry
// Drop the first letter if it is preceded by a '\'.
|| (text[wo.offset - 1] === '\\') && hasWordCheck(dict, wo.text.slice(1), allowCompounds);
}
export function hasWordCheck(dict: SpellingDictionary, word: string, allowCompounds: boolean) {
word = word.replace(/\\/g, '');
// Do not pass allowCompounds down if it is false, that allows for the dictionary to override the value based upon its own settings.
return allowCompounds ? dict.has(word, allowCompounds) : dict.has(word);
}