mora-scripts
Version:
Some collection scripts by myself
53 lines (50 loc) • 1.75 kB
JavaScript
/**
* Creates an array containing the numeric code points of each Unicode
* character in the string. While JavaScript uses UCS-2 internally,
* this function will convert a pair of surrogate halves (each of which
* UCS-2 exposes as separate characters) into a single code point,
* matching UTF-16.
* @see `punycode.ucs2.encode`
* @see <https://mathiasbynens.be/notes/javascript-encoding>
* @memberOf punycode.ucs2
* @name decode
* @param {String} string The Unicode input string (UCS-2).
* @returns {Array} The new array of code points.
*/
function ucs2decode(string) {
var output = []
var counter = 0
var length = string.length
/* istanbul ignore next */
while (counter < length) {
var value = string.charCodeAt(counter++)
if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
// It's a high surrogate, and there is a next character.
var extra = string.charCodeAt(counter++)
if ((extra & 0xFC00) === 0xDC00) { // Low surrogate.
output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000)
} else {
// It's an unmatched surrogate; only append this code unit, in case the
// next code unit is the high surrogate of a surrogate pair.
output.push(value)
counter--
}
} else {
output.push(value)
}
}
return output
}
/**
* Creates a string based on an array of numeric code points.
* @see `punycode.ucs2.decode`
* @memberOf punycode.ucs2
* @name encode
* @param {Array} codePoints The array of numeric code points.
* @returns {String} The new Unicode string (UCS-2).
*/
function ucs2encode(codePoints) {
return String.fromCodePoint.apply(String, codePoints)
}
exports.ucs2decode = ucs2decode
exports.ucs2encode = ucs2encode