surrog8
Version:
Surrogate pair converter with dev-friendly source code.
158 lines (132 loc) • 5.51 kB
JavaScript
/* -*- coding: UTF-8, tab-width: 2 -*- */
/*jslint indent: 2, maxlen: 80, browser: true */
/*globals define:true*/
(function unifiedExport(moduleName, factory, exports, backup) {
'use strict';
if (('function' === typeof define) && define.amd) { return define(factory); }
if (('object' === typeof module) && module && module.exports) {
module.exports = factory();
return;
}
if (('object' === typeof window) && window.navigator) {
exports = factory();
backup = window[moduleName];
exports.noConflict = function (key) {
window[moduleName] = backup;
if (key) { window[key] = exports; }
return exports;
};
window[moduleName] = exports;
}
}('surrog8', function () {/*require, exports, module*/
'use strict';
var sg, c;
sg = function surrog8(cpnOrStr) {
if ('string' === typeof cpnOrStr) { return sg.ord(cpnOrStr); }
if ('number' === typeof cpnOrStr) { return sg.chr(cpnOrStr); }
throw new TypeError('surrog8 needs a number for chr or a string for ord.');
};
//==BEGIN consts==
sg.consts = c = {
highSrgStart: 0xD800,
highSrgEnd: 0xDBFF,
lowSrgStart: 0xDC00,
lowSrgEnd: 0xDFFF,
overFFFFh: 0x10000,
rxAllHighSrg: /[\uD800-\uDBFF]/g,
rxAllLowSrg: /[\uDC00-\uDFFF]/g,
rxAllPairs: /[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
};
c.lowSrgCnt = c.lowSrgEnd + 1 - c.lowSrgStart; // 1024 = 0x400
// trivia:
c.highSrgCnt = c.highSrgEnd + 1 - c.highSrgStart; // 1024
c.maxPairCnt = c.highSrgCnt * c.lowSrgCnt; // 1048576 = 0x100000
c.expansionFactor = c.maxPairCnt / c.overFFFFh; // 16 // ^654321
//==ENDOF consts==
sg.isSurrogateChar = function (cNum) {
if ('string' === typeof cNum) { cNum = cNum.charCodeAt(0); }
if ('number' !== typeof cNum) { return false; }
if ((cNum >= c.highSrgStart) && (cNum <= c.highSrgEnd)) { return 1; }
if ((cNum >= c.lowSrgStart) && (cNum <= c.lowSrgEnd)) { return 2; }
return false;
};
sg.ordShim = function surrog8_ord(surrogatePairStr, pos) {
var cNum1 = surrogatePairStr.charCodeAt(pos), cNum2, codePointNumber;
if (sg.isSurrogateChar(cNum1) !== 1) { return cNum1; }
cNum2 = surrogatePairStr.charCodeAt(pos + 1);
if (sg.isSurrogateChar(cNum2) !== 2) { return cNum1; }
/* strip their surrogate range offsets */
cNum1 -= c.highSrgStart;
cNum2 -= c.lowSrgStart;
/* shift the high number part into the range it represents */
cNum1 *= c.lowSrgCnt;
/* compose original CPN */
codePointNumber = cNum1 + cNum2 + c.overFFFFh;
return codePointNumber;
};
sg.ord = (((typeof String.prototype.codePointAt) === 'function')
? function ord(s, i) { return String(s).codePointAt(i); } : sg.chrShim);
sg.chrShim = function surrog8_chr(codePointNumber) {
var cNum1, cNum2, shifted;
if (arguments.length > 1) {
cNum1 = '';
for (cNum2 = 0; cNum2 < arguments.length; cNum2 += 1) {
cNum1 += surrog8_chr(arguments[cNum2]);
}
return cNum1;
}
if (codePointNumber < c.overFFFFh) {
/* CPN is in low range so we don't need a surrogate pair. */
return String.fromCharCode(codePointNumber);
}
/* shift codepoint numbers to start counting in high range */
shifted = codePointNumber - c.overFFFFh;
/* split the (possibly large) code point number into smaller numbers */
cNum1 = Math.floor(shifted / c.lowSrgCnt);
cNum2 = (shifted % c.lowSrgCnt);
/* add both CPN parts into surrogate range */
cNum1 += c.highSrgStart;
cNum2 += c.lowSrgStart;
return String.fromCharCode(cNum1, cNum2);
};
sg.chr = (((typeof String.fromCodePoint) === 'function')
? String.fromCodePoint.bind(String) : sg.chrShim);
sg.uHHHH = function surrog8_uHHHH(str) {
if ('number' === typeof str) { str = sg.chr(str); }
return String(str).replace(surrog8_uHHHH.unsafe, surrog8_uHHHH.escape);
};
sg.uHHHH.unsafe = /[\x00-\x1F\x7F-\uFFFF]/g;
sg.uHHHH.escape = function (cNum) {
if ('number' !== typeof cNum) { cNum = String(cNum).charCodeAt(0); }
cNum = cNum.toString(16).toUpperCase();
return ('\\u' + '0000'.substr(cNum.length, 4) + cNum);
};
sg.lpad = function (data, minlen, padding) {
if (data.length >= (minlen || 0)) { return data; }
return ((0).toFixed(minlen).replace(/\S/g, (padding || '0')
).substr(0, minlen - data.length) + data);
};
sg.esc = function hexEscape(data, opts) {
var escFunc;
if (!opts) { opts = {}; }
if ('number' === typeof data) {
data = data.toString(Math.abs(opts.base || 10));
data = ((opts.base < 0) ? data.toLowerCase() : data.toUpperCase());
if (opts.minlen) { data = sg.lpad(data, opts.minlen, opts.padding); }
return ((opts.prefix || '') + data + (opts.suffix || ''));
}
data = String(data);
escFunc = function (pair) { return sg.esc(sg.ord(pair), opts); };
if (opts.preEscape) { data = data.replace(opts.preEscape, escFunc); }
data = data.replace(c.rxAllPairs, escFunc);
data = data.replace(sg.uHHHH.unsafe, escFunc);
return data;
};
sg.css = function cssEscape(data) { return sg.esc(data, sg.css.opts); };
sg.css.opts = { prefix: '\\', base: -16, suffix: ' ', preEscape: /\\|'|"/g,
minlen: 2, /* "\n" -> "\0a" avoids confusion with "\a". */
};
sg.xml = function xmlEscape(data) { return sg.esc(data, sg.xml.opts); };
sg.xml.opts = { prefix: '&#', base: 10, suffix: ';', preEscape: /[&<>'"]/g };
return sg;
}));