transliteration
Version:
Unicode to ACSII transliteration / slugify module. Works in node.js, web browser and command line.
131 lines (115 loc) • 3.67 kB
JavaScript
;
exports.__esModule = true;
exports.replaceStr = undefined;
var _utils = require('./utils');
var codemap = {};
var defaultOptions = {
unknown: '[?]',
replace: [],
replaceAfter: [],
ignore: []
};
var configOptions = {};
/* istanbul ignore next */
var replaceStr = exports.replaceStr = function replaceStr(str, replace) {
for (var _iterator = replace, _isArray = Array.isArray(_iterator), _i = 0, _iterator = _isArray ? _iterator : _iterator[Symbol.iterator]();;) {
var _ref;
if (_isArray) {
if (_i >= _iterator.length) break;
_ref = _iterator[_i++];
} else {
_i = _iterator.next();
if (_i.done) break;
_ref = _i.value;
}
var item = _ref;
if (item[0] instanceof RegExp) {
var flag = item[0].flags;
if (!item[0].global) {
flag += 'g';
item[0] = new RegExp(item[0].toString().replace(/^\/|\/$/), flag);
}
} else if (typeof item[0] === 'string') {
item[0] = new RegExp((0, _utils.escapeRegExp)(item[0]), 'g');
}
if (item[0] instanceof RegExp) {
str = str.replace(item[0], item[1]);
}
}
return str;
};
/**
* @param {string} str The string which is being transliterated
* @param {object} options options
*/
/* istanbul ignore next */
var transliterate = function transliterate(str, options) {
var opt = options ? (0, _utils.mergeOptions)(defaultOptions, options) : (0, _utils.mergeOptions)(defaultOptions, configOptions);
str = String(str);
if (opt.ignore instanceof Array && opt.ignore.length > 0) {
for (var i in opt.ignore) {
var splitted = str.split(opt.ignore[i]);
var result = [];
for (var j in splitted) {
var ignore = opt.ignore.slice(0);
ignore.splice(i, 1);
result.push(transliterate(splitted[j], (0, _utils.mergeOptions)(opt, { ignore: ignore })));
}
return result.join(opt.ignore[i]);
}
}
str = replaceStr(str, opt.replace);
var strArr = (0, _utils.ucs2decode)((0, _utils.fixChineseSpace)(str));
var strArrNew = [];
for (var _iterator2 = strArr, _isArray2 = Array.isArray(_iterator2), _i2 = 0, _iterator2 = _isArray2 ? _iterator2 : _iterator2[Symbol.iterator]();;) {
var _ref2;
if (_isArray2) {
if (_i2 >= _iterator2.length) break;
_ref2 = _iterator2[_i2++];
} else {
_i2 = _iterator2.next();
if (_i2.done) break;
_ref2 = _i2.value;
}
var ord = _ref2;
// These characters are also transliteratable. Will improve it later if needed
if (ord > 0xffff) {
strArrNew.push(opt.unknown);
} else {
var offset = ord >> 8;
if (typeof codemap[offset] === 'undefined') {
try {
codemap[offset] = require('../../data/x' + offset.toString(16) + '.json');
} catch (e) {
codemap[offset] = [];
}
}
ord &= 0xff;
var t = codemap[offset][ord];
if (typeof t === 'undefined' || t === null) {
strArrNew.push(opt.unknown);
} else {
strArrNew.push(codemap[offset][ord]);
}
}
}
// trim spaces at the begining and ending of the string
if (strArrNew.length > 1) {
opt.replaceAfter.push([/(^ +?)|( +?$)/g, '']);
}
var strNew = strArrNew.join('');
strNew = replaceStr(strNew, opt.replaceAfter);
return strNew;
};
transliterate.setCodemap = function (customCodemap) {
codemap = customCodemap || codemap;
return codemap;
};
transliterate.config = function (options) {
if (options === undefined) {
return configOptions;
}
configOptions = (0, _utils.mergeOptions)(defaultOptions, options);
return configOptions;
};
exports.default = transliterate;