monaco-editor-core
Version:
A browser based code editor
186 lines (185 loc) • 8.97 kB
JavaScript
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Range } from '../core/range.js';
import { Searcher } from '../model/textModelSearch.js';
import * as strings from '../../../base/common/strings.js';
import { assertNever } from '../../../base/common/assert.js';
import { DEFAULT_WORD_REGEXP, getWordAtText } from '../core/wordHelper.js';
export class UnicodeTextModelHighlighter {
static computeUnicodeHighlights(model, options, range) {
const startLine = range ? range.startLineNumber : 1;
const endLine = range ? range.endLineNumber : model.getLineCount();
const codePointHighlighter = new CodePointHighlighter(options);
const candidates = codePointHighlighter.getCandidateCodePoints();
let regex;
if (candidates === 'allNonBasicAscii') {
regex = new RegExp('[^\\t\\n\\r\\x20-\\x7E]', 'g');
}
else {
regex = new RegExp(`${buildRegExpCharClassExpr(Array.from(candidates))}`, 'g');
}
const searcher = new Searcher(null, regex);
const ranges = [];
let hasMore = false;
let m;
let ambiguousCharacterCount = 0;
let invisibleCharacterCount = 0;
let nonBasicAsciiCharacterCount = 0;
forLoop: for (let lineNumber = startLine, lineCount = endLine; lineNumber <= lineCount; lineNumber++) {
const lineContent = model.getLineContent(lineNumber);
const lineLength = lineContent.length;
// Reset regex to search from the beginning
searcher.reset(0);
do {
m = searcher.next(lineContent);
if (m) {
let startIndex = m.index;
let endIndex = m.index + m[0].length;
// Extend range to entire code point
if (startIndex > 0) {
const charCodeBefore = lineContent.charCodeAt(startIndex - 1);
if (strings.isHighSurrogate(charCodeBefore)) {
startIndex--;
}
}
if (endIndex + 1 < lineLength) {
const charCodeBefore = lineContent.charCodeAt(endIndex - 1);
if (strings.isHighSurrogate(charCodeBefore)) {
endIndex++;
}
}
const str = lineContent.substring(startIndex, endIndex);
let word = getWordAtText(startIndex + 1, DEFAULT_WORD_REGEXP, lineContent, 0);
if (word && word.endColumn <= startIndex + 1) {
// The word does not include the problematic character, ignore the word
word = null;
}
const highlightReason = codePointHighlighter.shouldHighlightNonBasicASCII(str, word ? word.word : null);
if (highlightReason !== 0 /* SimpleHighlightReason.None */) {
if (highlightReason === 3 /* SimpleHighlightReason.Ambiguous */) {
ambiguousCharacterCount++;
}
else if (highlightReason === 2 /* SimpleHighlightReason.Invisible */) {
invisibleCharacterCount++;
}
else if (highlightReason === 1 /* SimpleHighlightReason.NonBasicASCII */) {
nonBasicAsciiCharacterCount++;
}
else {
assertNever(highlightReason);
}
const MAX_RESULT_LENGTH = 1000;
if (ranges.length >= MAX_RESULT_LENGTH) {
hasMore = true;
break forLoop;
}
ranges.push(new Range(lineNumber, startIndex + 1, lineNumber, endIndex + 1));
}
}
} while (m);
}
return {
ranges,
hasMore,
ambiguousCharacterCount,
invisibleCharacterCount,
nonBasicAsciiCharacterCount
};
}
static computeUnicodeHighlightReason(char, options) {
const codePointHighlighter = new CodePointHighlighter(options);
const reason = codePointHighlighter.shouldHighlightNonBasicASCII(char, null);
switch (reason) {
case 0 /* SimpleHighlightReason.None */:
return null;
case 2 /* SimpleHighlightReason.Invisible */:
return { kind: 1 /* UnicodeHighlighterReasonKind.Invisible */ };
case 3 /* SimpleHighlightReason.Ambiguous */: {
const codePoint = char.codePointAt(0);
const primaryConfusable = codePointHighlighter.ambiguousCharacters.getPrimaryConfusable(codePoint);
const notAmbiguousInLocales = strings.AmbiguousCharacters.getLocales().filter((l) => !strings.AmbiguousCharacters.getInstance(new Set([...options.allowedLocales, l])).isAmbiguous(codePoint));
return { kind: 0 /* UnicodeHighlighterReasonKind.Ambiguous */, confusableWith: String.fromCodePoint(primaryConfusable), notAmbiguousInLocales };
}
case 1 /* SimpleHighlightReason.NonBasicASCII */:
return { kind: 2 /* UnicodeHighlighterReasonKind.NonBasicAscii */ };
}
}
}
function buildRegExpCharClassExpr(codePoints, flags) {
const src = `[${strings.escapeRegExpCharacters(codePoints.map((i) => String.fromCodePoint(i)).join(''))}]`;
return src;
}
class CodePointHighlighter {
constructor(options) {
this.options = options;
this.allowedCodePoints = new Set(options.allowedCodePoints);
this.ambiguousCharacters = strings.AmbiguousCharacters.getInstance(new Set(options.allowedLocales));
}
getCandidateCodePoints() {
if (this.options.nonBasicASCII) {
return 'allNonBasicAscii';
}
const set = new Set();
if (this.options.invisibleCharacters) {
for (const cp of strings.InvisibleCharacters.codePoints) {
if (!isAllowedInvisibleCharacter(String.fromCodePoint(cp))) {
set.add(cp);
}
}
}
if (this.options.ambiguousCharacters) {
for (const cp of this.ambiguousCharacters.getConfusableCodePoints()) {
set.add(cp);
}
}
for (const cp of this.allowedCodePoints) {
set.delete(cp);
}
return set;
}
shouldHighlightNonBasicASCII(character, wordContext) {
const codePoint = character.codePointAt(0);
if (this.allowedCodePoints.has(codePoint)) {
return 0 /* SimpleHighlightReason.None */;
}
if (this.options.nonBasicASCII) {
return 1 /* SimpleHighlightReason.NonBasicASCII */;
}
let hasBasicASCIICharacters = false;
let hasNonConfusableNonBasicAsciiCharacter = false;
if (wordContext) {
for (const char of wordContext) {
const codePoint = char.codePointAt(0);
const isBasicASCII = strings.isBasicASCII(char);
hasBasicASCIICharacters = hasBasicASCIICharacters || isBasicASCII;
if (!isBasicASCII &&
!this.ambiguousCharacters.isAmbiguous(codePoint) &&
!strings.InvisibleCharacters.isInvisibleCharacter(codePoint)) {
hasNonConfusableNonBasicAsciiCharacter = true;
}
}
}
if (
/* Don't allow mixing weird looking characters with ASCII */ !hasBasicASCIICharacters &&
/* Is there an obviously weird looking character? */ hasNonConfusableNonBasicAsciiCharacter) {
return 0 /* SimpleHighlightReason.None */;
}
if (this.options.invisibleCharacters) {
// TODO check for emojis
if (!isAllowedInvisibleCharacter(character) && strings.InvisibleCharacters.isInvisibleCharacter(codePoint)) {
return 2 /* SimpleHighlightReason.Invisible */;
}
}
if (this.options.ambiguousCharacters) {
if (this.ambiguousCharacters.isAmbiguous(codePoint)) {
return 3 /* SimpleHighlightReason.Ambiguous */;
}
}
return 0 /* SimpleHighlightReason.None */;
}
}
function isAllowedInvisibleCharacter(character) {
return character === ' ' || character === '\n' || character === '\t';
}