UNPKG

compare-utf8

Version:

Compares JS strings using UTF-8 bitwise semantics

138 lines (122 loc) 2.96 kB
// @ts-check /** * Compares two JavaScript strings as if they were UTF-8 encoded byte arrays. * @param {string} a * @param {string} b * @returns {number} */ export function compareUTF8(a, b) { const aLength = a.length; const bLength = b.length; const length = Math.min(aLength, bLength); for (let i = 0; i < length; ) { const aCodePoint = /** @type {number} */ (a.codePointAt(i)); const bCodePoint = /** @type {number} */ (b.codePointAt(i)); if (aCodePoint !== bCodePoint) { // Code points below 0x80 are represented the same way in UTF-8 as in // UTF-16. if (aCodePoint < 0x80 && bCodePoint < 0x80) { return aCodePoint - bCodePoint; } // get the UTF-8 bytes for the code points const aLength = utf8Bytes(aCodePoint, aBytes); const bLength = utf8Bytes(bCodePoint, bBytes); return compareArrays(aBytes, aLength, bBytes, bLength); } i += utf16LengthForCodePoint(aCodePoint); } return aLength - bLength; } /** * @param {number[]} a * @param {number} aLength * @param {number[]} b * @param {number} bLength * @returns {number} */ function compareArrays(a, aLength, b, bLength) { const length = Math.min(aLength, bLength); for (let i = 0; i < length; i++) { const aValue = a[i]; const bValue = b[i]; if (aValue !== bValue) { return aValue - bValue; } } return aLength - bLength; } /** * @param {number} aCodePoint * @returns {number} */ export function utf16LengthForCodePoint(aCodePoint) { return aCodePoint > 0xffff ? 2 : 1; } // 2 preallocated arrays for utf8Bytes. const arr = () => Array.from({ length: 4 }, () => 0); const aBytes = arr(); const bBytes = arr(); /** * @param {number} codePoint * @param {number[]} bytes * @returns {number} */ function utf8Bytes(codePoint, bytes) { if (codePoint < 0x80) { bytes[0] = codePoint; return 1; } let count; let offset; if (codePoint <= 0x07ff) { count = 1; offset = 0xc0; } else if (codePoint <= 0xffff) { count = 2; offset = 0xe0; } else if (codePoint <= 0x10ffff) { count = 3; offset = 0xf0; } else { throw new Error("Invalid code point"); } bytes[0] = (codePoint >> (6 * count)) + offset; let i = 1; for (; count > 0; count--) { const temp = codePoint >> (6 * (count - 1)); bytes[i++] = 0x80 | (temp & 0x3f); } return i; } /** * @param {string} a * @param {string} b * @returns {boolean} */ export function greaterThan(a, b) { return compareUTF8(a, b) > 0; } /** * @param {string} a * @param {string} b * @returns {boolean} */ export function greaterThanEq(a, b) { return compareUTF8(a, b) >= 0; } /** * @param {string} a * @param {string} b * @returns {boolean} */ export function lessThan(a, b) { return compareUTF8(a, b) < 0; } /** * @param {string} a * @param {string} b * @returns {boolean} */ export function lessThanEq(a, b) { return compareUTF8(a, b) <= 0; }