UNPKG

@ckeditor/ckeditor5-utils

Version:

Miscellaneous utilities used by CKEditor 5.

ckeditor.com/ckeditor-5

ckeditor/ckeditor5

86 lines (85 loc) • 3.32 kB

JavaScript

/** * @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved. * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options */ /** * Set of utils to handle unicode characters. * * @module utils/unicode */ /** * Checks whether given `character` is a combining mark. * * @param character Character to check. */ export function isCombiningMark(character) { // eslint-disable-next-line no-misleading-character-class return !!character && character.length == 1 && /[\u0300-\u036f\u1ab0-\u1aff\u1dc0-\u1dff\u20d0-\u20ff\ufe20-\ufe2f]/.test(character); } /** * Checks whether given `character` is a high half of surrogate pair. * * Using UTF-16 terminology, a surrogate pair denotes UTF-16 character using two UTF-8 characters. The surrogate pair * consist of high surrogate pair character followed by low surrogate pair character. * * @param character Character to check. */ export function isHighSurrogateHalf(character) { return !!character && character.length == 1 && /[\ud800-\udbff]/.test(character); } /** * Checks whether given `character` is a low half of surrogate pair. * * Using UTF-16 terminology, a surrogate pair denotes UTF-16 character using two UTF-8 characters. The surrogate pair * consist of high surrogate pair character followed by low surrogate pair character. * * @param character Character to check. */ export function isLowSurrogateHalf(character) { return !!character && character.length == 1 && /[\udc00-\udfff]/.test(character); } /** * Checks whether given offset in a string is inside a surrogate pair (between two surrogate halves). * * @param string String to check. * @param offset Offset to check. */ export function isInsideSurrogatePair(string, offset) { return isHighSurrogateHalf(string.charAt(offset - 1)) && isLowSurrogateHalf(string.charAt(offset)); } /** * Checks whether given offset in a string is between base character and combining mark or between two combining marks. * * @param string String to check. * @param offset Offset to check. */ export function isInsideCombinedSymbol(string, offset) { return isCombiningMark(string.charAt(offset)); } const EMOJI_PATTERN = /* #__PURE__ */ buildEmojiRegexp(); /** * Checks whether given offset in a string is inside multi-character emoji sequence. * * @param string String to check. * @param offset Offset to check. */ export function isInsideEmojiSequence(string, offset) { const matches = String(string).matchAll(EMOJI_PATTERN); return Array.from(matches).some(match => match.index < offset && offset < match.index + match[0].length); } function buildEmojiRegexp() { const parts = [ // Emoji Tag Sequence (ETS) /\p{Emoji}[\u{E0020}-\u{E007E}]+\u{E007F}/u, // Emoji Keycap Sequence /\p{Emoji}\u{FE0F}?\u{20E3}/u, // Emoji Presentation Sequence /\p{Emoji}\u{FE0F}/u, // Single-Character Emoji / Emoji Modifier Sequence /(?=\p{General_Category=Other_Symbol})\p{Emoji}\p{Emoji_Modifier}*/u ]; const flagSequence = /\p{Regional_Indicator}{2}/u.source; const emoji = '(?:' + parts.map(part => part.source).join('|') + ')'; const sequence = `${flagSequence}|${emoji}(?:\u{200D}${emoji})*`; return new RegExp(sequence, 'ug'); }