cspell
Version:
A Spelling Checker for Code!
211 lines (174 loc) • 6.81 kB
text/typescript
import * as XRegExp from 'xregexp';
import {Observable, from} from 'rxjs';
import {concatMap} from 'rxjs/operators';
import {scanMap, Sequence, sequenceFromRegExpMatch } from 'gensequence';
import {binarySearch} from './search';
// CSpell:ignore ings ning gimuy tsmerge
export interface TextOffset {
text: string;
offset: number;
}
export interface TextDocumentOffset extends TextOffset {
uri?: string;
doc: string;
row: number;
col: number;
}
const regExLines = /.*\r?\n/g;
// const regExIdentifiers = XRegExp('(?:\\p{L}|[0-9_\'])+', 'gi');
const regExUpperSOrIng = XRegExp('(\\p{Lu}+\\\\?[\'’]?(?:s|ing|ies|es|ings|ed|ning))(?!\\p{Ll})', 'g');
const regExSplitWords = XRegExp('(\\p{Ll})(\\p{Lu})', 'g');
const regExSplitWords2 = XRegExp('(\\p{Lu})(\\p{Lu}\\p{Ll})', 'g');
const regExWords = XRegExp("\\p{L}(?:\\\\?['’]\\p{L}|\\p{L})+|\\p{L}", 'g');
const regExIgnoreCharacters = XRegExp('\\p{Hiragana}|\\p{Han}|\\p{Katakana}', 'g');
const regExFirstUpper = XRegExp('^\\p{Lu}\\p{Ll}+$');
const regExAllUpper = XRegExp('^\\p{Lu}+$');
const regExAllLower = XRegExp('^\\p{Ll}+$');
const regExMatchRegExParts = /^\/(.*)\/([gimuy]*)$/;
export function splitCamelCaseWordWithOffsetRx(wo: TextOffset): Observable<TextOffset> {
return from(splitCamelCaseWordWithOffset(wo));
}
export function splitCamelCaseWordWithOffset(wo: TextOffset): Array<TextOffset> {
return splitCamelCaseWord(wo.text)
.map(scanMap<string, TextOffset>(
(last, text) => ({ text, offset: last.offset + last.text.length }),
{ text: '', offset: wo.offset }
));
}
/**
* Split camelCase words into an array of strings.
*/
export function splitCamelCaseWord(word: string): string[] {
const wPrime = word.replace(regExUpperSOrIng, s => s[0] + s.substr(1).toLowerCase());
const separator = '_<^*_*^>_';
const pass1 = XRegExp.replace(wPrime, regExSplitWords, '$1' + separator + '$2');
const pass2 = XRegExp.replace(pass1, regExSplitWords2, '$1' + separator + '$2');
return XRegExp.split(pass2, separator);
}
/**
* This function lets you iterate over regular expression matches.
*/
export function match(reg: RegExp, text: string): Sequence<RegExpExecArray> {
return sequenceFromRegExpMatch(reg, text);
}
export function matchStringToTextOffset(reg: RegExp, text: string) {
return matchToTextOffset(reg, { text, offset: 0 });
}
export function matchToTextOffset(reg: RegExp, text: TextOffset): Sequence<TextOffset> {
const textOffset = text;
const fnOffsetMap = offsetMap(textOffset.offset);
return match(reg, textOffset.text)
.map(m => fnOffsetMap({ text: m[0], offset: m.index }));
}
export function extractLinesOfText(text: string): Sequence<TextOffset> {
return matchStringToTextOffset(regExLines, text);
}
export function extractLinesOfTextRx(text: string): Observable<TextOffset> {
return from(extractLinesOfText(text));
}
/**
* Extract out whole words from a string of text.
*/
export function extractWordsFromTextRx(text: string): Observable<TextOffset> {
// Comment out the correct implementation until rxjs types get fixed.
// return Rx.Observable.from(extractWordsFromText(text));
return from(extractWordsFromText(text));
}
/**
* Extract out whole words from a string of text.
*/
export function extractWordsFromText(text: string): Sequence<TextOffset> {
const reg = XRegExp(regExWords);
return matchStringToTextOffset(reg, text)
// remove characters that match against \p{L} but are not letters (Chinese characters are an example).
.map(wo => ({
text: XRegExp.replace(wo.text, regExIgnoreCharacters, (match: string) => ' '.repeat(match.length)).trim(),
offset: wo.offset
}))
.filter(wo => !!wo.text);
}
export function extractWordsFromCodeRx(text: string): Observable<TextOffset> {
return extractWordsFromTextRx(text)
.pipe(concatMap(word => splitCamelCaseWordWithOffsetRx(word)));
}
export function extractWordsFromCode(text: string): Sequence<TextOffset> {
return extractWordsFromText(text)
.concatMap(splitCamelCaseWordWithOffset);
}
export function isUpperCase(word: string) {
return !!word.match(regExAllUpper);
}
export function isLowerCase(word: string) {
return !!word.match(regExAllLower);
}
export function isFirstCharacterUpper(word: string) {
return isUpperCase(word.slice(0, 1));
}
export function isFirstCharacterLower(word: string) {
return isLowerCase(word.slice(0, 1));
}
export function ucFirst(word: string) {
return word.slice(0, 1).toUpperCase() + word.slice(1);
}
export function lcFirst(word: string) {
return word.slice(0, 1).toLowerCase() + word.slice(1);
}
export function snakeToCamel(word: string) {
return word.split('_').map(ucFirst).join('');
}
export function camelToSnake(word: string) {
return splitCamelCaseWord(word).join('_').toLowerCase();
}
export function matchCase(example: string, word: string): string {
if (example.match(regExFirstUpper)) {
return word.slice(0, 1).toUpperCase() + word.slice(1).toLowerCase();
}
if (example.match(regExAllLower)) {
return word.toLowerCase();
}
if (example.match(regExAllUpper)) {
return word.toUpperCase();
}
if (isFirstCharacterUpper(example)) {
return ucFirst(word);
}
if (isFirstCharacterLower(example)) {
return lcFirst(word);
}
return word;
}
interface OffsetMap {
offset: number;
}
function offsetMap(offset: number) {
return <T extends OffsetMap>(xo: T) => ({...(xo as Object), offset: xo.offset + offset }) as T;
}
export function stringToRegExp(pattern: string | RegExp, defaultFlags = 'gim', forceFlags = 'g') {
if (pattern instanceof RegExp) {
return pattern;
}
try {
const [, pat, flag] = [...(pattern.match(regExMatchRegExParts) || ['', pattern, defaultFlags]), forceFlags];
// Make sure the flags are unique.
const flags = [...(new Set(forceFlags + flag))].join('').replace(/[^gimuy]/g, '');
if (pat) {
const regex = new RegExp(pat, flags);
return regex;
}
} catch (e) {
}
return undefined;
}
export function calculateTextDocumentOffsets(uri: string, doc: string, wordOffsets: TextOffset[]): TextDocumentOffset[] {
const lines = [-1, ...match(/\n/g, doc).map(a => a.index), doc.length];
function findRowCol(offset: number): [number, number] {
const row = binarySearch(lines, offset);
const col = offset - lines[Math.max(0, row - 1)];
return [row, col];
}
return wordOffsets
.map(wo => {
const [row, col] = findRowCol(wo.offset);
return { ...wo, row, col, doc, uri };
});
}