kanji.js
Version:
漢字 search/lookup library for Node.js and browsers
1,658 lines (1,498 loc) • 1.93 MB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.Kanji = factory());
})(this, (function () { 'use strict';
/**
* Returns detailed type as string (instead of just 'object' for arrays etc)
* @private
* @param {any} value js value
* @returns {String} type of value
* @example
* typeOf({}); // 'object'
* typeOf([]); // 'array'
* typeOf(function() {}); // 'function'
* typeOf(/a/); // 'regexp'
* typeOf(new Date()); // 'date'
* typeOf(null); // 'null'
* typeOf(undefined); // 'undefined'
* typeOf('a'); // 'string'
* typeOf(1); // 'number'
* typeOf(true); // 'boolean'
* typeOf(new Map()); // 'map'
* typeOf(new Set()); // 'map'
*/
function typeOf(value) {
if (value === null) {
return 'null';
}
if (value !== Object(value)) {
return typeof value;
}
return {}.toString
.call(value)
.slice(8, -1)
.toLowerCase();
}
/**
* Checks if input string is empty
* @param {String} input text input
* @return {Boolean} true if no input
*/
function isEmpty(input) {
if (typeOf(input) !== 'string') {
return true;
}
return !input.length;
}
/**
* Takes a character and a unicode range. Returns true if the char is in the range.
* @param {String} char unicode character
* @param {Number} start unicode start range
* @param {Number} end unicode end range
* @return {Boolean}
*/
function isCharInRange(char = '', start, end) {
if (isEmpty(char)) return false;
const code = char.charCodeAt(0);
return start <= code && code <= end;
}
const TO_KANA_METHODS = {
HIRAGANA: 'toHiragana',
KATAKANA: 'toKatakana',
};
const ROMANIZATIONS = {
HEPBURN: 'hepburn',
};
/**
* Default config for WanaKana, user passed options will be merged with these
* @type {DefaultOptions}
* @name defaultOptions
* @property {Boolean} [useObsoleteKana=false] - Set to true to use obsolete characters, such as ゐ and ゑ.
* @example
* toHiragana('we', { useObsoleteKana: true })
* // => 'ゑ'
* @property {Boolean} [passRomaji=false] - Set to true to pass romaji when using mixed syllabaries with toKatakana() or toHiragana()
* @example
* toHiragana('only convert the katakana: ヒラガナ', { passRomaji: true })
* // => "only convert the katakana: ひらがな"
* @property {Object} [convertLongVowelMark=true] - Set to false to prevent conversions of 'ー' to extended vowels with toHiragana()
* @example
* toHiragana('ラーメン', { convertLongVowelMark: false });
* // => 'らーめん
* @property {Boolean} [upcaseKatakana=false] - Set to true to convert katakana to uppercase using toRomaji()
* @example
* toRomaji('ひらがな カタカナ', { upcaseKatakana: true })
* // => "hiragana KATAKANA"
* @property {Boolean|String} [IMEMode=false] - Set to true, 'toHiragana', or 'toKatakana' to handle conversion while it is being typed.
* @property {String} [romanization='hepburn'] - choose toRomaji() romanization map (currently only 'hepburn')
* @property {Object} [customKanaMapping] - custom map will be merged with default conversion
* @example
* toKana('wanakana', { customKanaMapping: { na: 'に', ka: 'Bana' }) };
* // => 'わにBanaに'
* @property {Object} [customRomajiMapping] - custom map will be merged with default conversion
* @example
* toRomaji('つじぎり', { customRomajiMapping: { じ: 'zi', つ: 'tu', り: 'li' }) };
* // => 'tuzigili'
*/
const DEFAULT_OPTIONS = {
useObsoleteKana: false,
passRomaji: false,
upcaseKatakana: false,
IMEMode: false,
convertLongVowelMark: true,
romanization: ROMANIZATIONS.HEPBURN,
};
const LATIN_UPPERCASE_START = 0x41;
const LATIN_UPPERCASE_END = 0x5a;
const HIRAGANA_START = 0x3041;
const HIRAGANA_END = 0x3096;
const KATAKANA_START = 0x30a1;
const KATAKANA_END = 0x30fc;
const KANJI_START = 0x4e00;
const KANJI_END = 0x9faf;
const PROLONGED_SOUND_MARK = 0x30fc;
const KANA_SLASH_DOT = 0x30fb;
const MODERN_ENGLISH = [0x0000, 0x007f];
const HEPBURN_MACRON_RANGES = [
[0x0100, 0x0101], // Ā ā
[0x0112, 0x0113], // Ē ē
[0x012a, 0x012b], // Ī ī
[0x014c, 0x014d], // Ō ō
[0x016a, 0x016b], // Ū ū
];
const SMART_QUOTE_RANGES = [
[0x2018, 0x2019], // ‘ ’
[0x201c, 0x201d], // “ ”
];
const ROMAJI_RANGES = [MODERN_ENGLISH, ...HEPBURN_MACRON_RANGES];
const EN_PUNCTUATION_RANGES = [
[0x20, 0x2f],
[0x3a, 0x3f],
[0x5b, 0x60],
[0x7b, 0x7e],
...SMART_QUOTE_RANGES,
];
var safeIsNaN = Number.isNaN ||
function ponyfill(value) {
return typeof value === 'number' && value !== value;
};
function isEqual(first, second) {
if (first === second) {
return true;
}
if (safeIsNaN(first) && safeIsNaN(second)) {
return true;
}
return false;
}
function areInputsEqual(newInputs, lastInputs) {
if (newInputs.length !== lastInputs.length) {
return false;
}
for (var i = 0; i < newInputs.length; i++) {
if (!isEqual(newInputs[i], lastInputs[i])) {
return false;
}
}
return true;
}
function memoizeOne(resultFn, isEqual) {
if (isEqual === void 0) { isEqual = areInputsEqual; }
var cache = null;
function memoized() {
var newArgs = [];
for (var _i = 0; _i < arguments.length; _i++) {
newArgs[_i] = arguments[_i];
}
if (cache && cache.lastThis === this && isEqual(newArgs, cache.lastArgs)) {
return cache.lastResult;
}
var lastResult = resultFn.apply(this, newArgs);
cache = {
lastResult: lastResult,
lastArgs: newArgs,
lastThis: this,
};
return lastResult;
}
memoized.clear = function clear() {
cache = null;
};
return memoized;
}
var has = Object.prototype.hasOwnProperty;
function find(iter, tar, key) {
for (key of iter.keys()) {
if (dequal(key, tar)) return key;
}
}
function dequal(foo, bar) {
var ctor, len, tmp;
if (foo === bar) return true;
if (foo && bar && (ctor=foo.constructor) === bar.constructor) {
if (ctor === Date) return foo.getTime() === bar.getTime();
if (ctor === RegExp) return foo.toString() === bar.toString();
if (ctor === Array) {
if ((len=foo.length) === bar.length) {
while (len-- && dequal(foo[len], bar[len]));
}
return len === -1;
}
if (ctor === Set) {
if (foo.size !== bar.size) {
return false;
}
for (len of foo) {
tmp = len;
if (tmp && typeof tmp === 'object') {
tmp = find(bar, tmp);
if (!tmp) return false;
}
if (!bar.has(tmp)) return false;
}
return true;
}
if (ctor === Map) {
if (foo.size !== bar.size) {
return false;
}
for (len of foo) {
tmp = len[0];
if (tmp && typeof tmp === 'object') {
tmp = find(bar, tmp);
if (!tmp) return false;
}
if (!dequal(len[1], bar.get(tmp))) {
return false;
}
}
return true;
}
if (ctor === ArrayBuffer) {
foo = new Uint8Array(foo);
bar = new Uint8Array(bar);
} else if (ctor === DataView) {
if ((len=foo.byteLength) === bar.byteLength) {
while (len-- && foo.getInt8(len) === bar.getInt8(len));
}
return len === -1;
}
if (ArrayBuffer.isView(foo)) {
if ((len=foo.byteLength) === bar.byteLength) {
while (len-- && foo[len] === bar[len]);
}
return len === -1;
}
if (!ctor || typeof foo === 'object') {
len = 0;
for (ctor in foo) {
if (has.call(foo, ctor) && ++len && !has.call(bar, ctor)) return false;
if (!(ctor in bar) || !dequal(foo[ctor], bar[ctor])) return false;
}
return Object.keys(bar).length === len;
}
}
return foo !== foo && bar !== bar;
}
/**
* Easy re-use of merging with default options
* @param {Object} opts user options
* @returns user options merged over default options
*/
const mergeWithDefaultOptions = (opts = {}) => Object.assign({}, DEFAULT_OPTIONS, opts);
function applyMapping(string, mapping, convertEnding) {
const root = mapping;
function nextSubtree(tree, nextChar) {
const subtree = tree[nextChar];
if (subtree === undefined) {
return undefined;
}
// if the next child node does not have a node value, set its node value to the input
return Object.assign({ '': tree[''] + nextChar }, tree[nextChar]);
}
function newChunk(remaining, currentCursor) {
// start parsing a new chunk
const firstChar = remaining.charAt(0);
return parse(
Object.assign({ '': firstChar }, root[firstChar]),
remaining.slice(1),
currentCursor,
currentCursor + 1
);
}
function parse(tree, remaining, lastCursor, currentCursor) {
if (!remaining) {
if (convertEnding || Object.keys(tree).length === 1) {
// nothing more to consume, just commit the last chunk and return it
// so as to not have an empty element at the end of the result
return tree[''] ? [[lastCursor, currentCursor, tree['']]] : [];
}
// if we don't want to convert the ending, because there are still possible continuations
// return null as the final node value
return [[lastCursor, currentCursor, null]];
}
if (Object.keys(tree).length === 1) {
return [[lastCursor, currentCursor, tree['']]].concat(
newChunk(remaining, currentCursor)
);
}
const subtree = nextSubtree(tree, remaining.charAt(0));
if (subtree === undefined) {
return [[lastCursor, currentCursor, tree['']]].concat(
newChunk(remaining, currentCursor)
);
}
// continue current branch
return parse(subtree, remaining.slice(1), lastCursor, currentCursor + 1);
}
return newChunk(string, 0);
}
// transform the tree, so that for example hepburnTree['ゔ']['ぁ'][''] === 'va'
// or kanaTree['k']['y']['a'][''] === 'きゃ'
function transform(tree) {
return Object.entries(tree).reduce((map, [char, subtree]) => {
const endOfBranch = typeOf(subtree) === 'string';
// eslint-disable-next-line no-param-reassign
map[char] = endOfBranch ? { '': subtree } : transform(subtree);
return map;
}, {});
}
function getSubTreeOf(tree, string) {
return string.split('').reduce((correctSubTree, char) => {
if (correctSubTree[char] === undefined) {
// eslint-disable-next-line no-param-reassign
correctSubTree[char] = {};
}
return correctSubTree[char];
}, tree);
}
/**
* Creates a custom mapping tree, returns a function that accepts a defaultMap which the newly created customMapping will be merged with and returned
* (customMap) => (defaultMap) => mergedMap
* @param {Object} customMap { 'ka' : 'な' }
* @return {Function} (defaultMap) => defaultMergedWithCustomMap
* @example
* const sillyMap = createCustomMapping({ 'ちゃ': 'time', '茎': 'cookie' });
* // sillyMap is passed defaultMapping to merge with when called in toRomaji()
* toRomaji("It's 茎 ちゃ よ", { customRomajiMapping: sillyMap });
* // => 'It's cookie time yo';
*/
function createCustomMapping(customMap = {}) {
const customTree = {};
if (typeOf(customMap) === 'object') {
Object.entries(customMap).forEach(([roma, kana]) => {
let subTree = customTree;
roma.split('').forEach((char) => {
if (subTree[char] === undefined) {
subTree[char] = {};
}
subTree = subTree[char];
});
subTree[''] = kana;
});
}
return function makeMap(map) {
const mapCopy = JSON.parse(JSON.stringify(map));
function transformMap(mapSubtree, customSubtree) {
if (mapSubtree === undefined || typeOf(mapSubtree) === 'string') {
return customSubtree;
}
return Object.entries(customSubtree).reduce(
(newSubtree, [char, subtree]) => {
// eslint-disable-next-line no-param-reassign
newSubtree[char] = transformMap(mapSubtree[char], subtree);
return newSubtree;
},
mapSubtree
);
}
return transformMap(mapCopy, customTree);
};
}
// allow consumer to pass either function or object as customMapping
function mergeCustomMapping(map, customMapping) {
if (!customMapping) {
return map;
}
return typeOf(customMapping) === 'function'
? customMapping(map)
: createCustomMapping(customMapping)(map);
}
// NOTE: not exactly kunrei shiki, for example ぢゃ -> dya instead of zya, to avoid name clashing
/* eslint-disable */
// prettier-ignore
const BASIC_KUNREI = {
a: 'あ', i: 'い', u: 'う', e: 'え', o: 'お',
k: { a: 'か', i: 'き', u: 'く', e: 'け', o: 'こ', },
s: { a: 'さ', i: 'し', u: 'す', e: 'せ', o: 'そ', },
t: { a: 'た', i: 'ち', u: 'つ', e: 'て', o: 'と', },
n: { a: 'な', i: 'に', u: 'ぬ', e: 'ね', o: 'の', },
h: { a: 'は', i: 'ひ', u: 'ふ', e: 'へ', o: 'ほ', },
m: { a: 'ま', i: 'み', u: 'む', e: 'め', o: 'も', },
y: { a: 'や', u: 'ゆ', o: 'よ' },
r: { a: 'ら', i: 'り', u: 'る', e: 'れ', o: 'ろ', },
w: { a: 'わ', i: 'ゐ', e: 'ゑ', o: 'を', },
g: { a: 'が', i: 'ぎ', u: 'ぐ', e: 'げ', o: 'ご', },
z: { a: 'ざ', i: 'じ', u: 'ず', e: 'ぜ', o: 'ぞ', },
d: { a: 'だ', i: 'ぢ', u: 'づ', e: 'で', o: 'ど', },
b: { a: 'ば', i: 'び', u: 'ぶ', e: 'べ', o: 'ぼ', },
p: { a: 'ぱ', i: 'ぴ', u: 'ぷ', e: 'ぺ', o: 'ぽ', },
v: { a: 'ゔぁ', i: 'ゔぃ', u: 'ゔ', e: 'ゔぇ', o: 'ゔぉ', },
};
const SPECIAL_SYMBOLS$1 = {
'.': '。',
',': '、',
':': ':',
'/': '・',
'!': '!',
'?': '?',
'~': '〜',
'-': 'ー',
'‘': '「',
'’': '」',
'“': '『',
'”': '』',
'[': '[',
']': ']',
'(': '(',
')': ')',
'{': '{',
'}': '}',
};
const CONSONANTS = {
k: 'き',
s: 'し',
t: 'ち',
n: 'に',
h: 'ひ',
m: 'み',
r: 'り',
g: 'ぎ',
z: 'じ',
d: 'ぢ',
b: 'び',
p: 'ぴ',
v: 'ゔ',
q: 'く',
f: 'ふ',
};
const SMALL_Y$1 = { ya: 'ゃ', yi: 'ぃ', yu: 'ゅ', ye: 'ぇ', yo: 'ょ' };
const SMALL_VOWELS = { a: 'ぁ', i: 'ぃ', u: 'ぅ', e: 'ぇ', o: 'ぉ' };
// typing one should be the same as having typed the other instead
const ALIASES = {
sh: 'sy', // sha -> sya
ch: 'ty', // cho -> tyo
cy: 'ty', // cyo -> tyo
chy: 'ty', // chyu -> tyu
shy: 'sy', // shya -> sya
j: 'zy', // ja -> zya
jy: 'zy', // jye -> zye
// exceptions to above rules
shi: 'si',
chi: 'ti',
tsu: 'tu',
ji: 'zi',
fu: 'hu',
};
// xtu -> っ
const SMALL_LETTERS = Object.assign(
{
tu: 'っ',
wa: 'ゎ',
ka: 'ヵ',
ke: 'ヶ',
},
SMALL_VOWELS,
SMALL_Y$1
);
// don't follow any notable patterns
const SPECIAL_CASES = {
yi: 'い',
wu: 'う',
ye: 'いぇ',
wi: 'うぃ',
we: 'うぇ',
kwa: 'くぁ',
whu: 'う',
// because it's not thya for てゃ but tha
// and tha is not てぁ, but てゃ
tha: 'てゃ',
thu: 'てゅ',
tho: 'てょ',
dha: 'でゃ',
dhu: 'でゅ',
dho: 'でょ',
};
const AIUEO_CONSTRUCTIONS = {
wh: 'う',
kw: 'く',
qw: 'く',
q: 'く',
gw: 'ぐ',
sw: 'す',
ts: 'つ',
th: 'て',
tw: 'と',
dh: 'で',
dw: 'ど',
fw: 'ふ',
f: 'ふ',
};
/* eslint-enable */
function createRomajiToKanaMap$1() {
const kanaTree = transform(BASIC_KUNREI);
// pseudo partial application
const subtreeOf = (string) => getSubTreeOf(kanaTree, string);
// add tya, sya, etc.
Object.entries(CONSONANTS).forEach(([consonant, yKana]) => {
Object.entries(SMALL_Y$1).forEach(([roma, kana]) => {
// for example kyo -> き + ょ
subtreeOf(consonant + roma)[''] = yKana + kana;
});
});
Object.entries(SPECIAL_SYMBOLS$1).forEach(([symbol, jsymbol]) => {
subtreeOf(symbol)[''] = jsymbol;
});
// things like うぃ, くぃ, etc.
Object.entries(AIUEO_CONSTRUCTIONS).forEach(([consonant, aiueoKana]) => {
Object.entries(SMALL_VOWELS).forEach(([vowel, kana]) => {
const subtree = subtreeOf(consonant + vowel);
subtree[''] = aiueoKana + kana;
});
});
// different ways to write ん
['n', "n'", 'xn'].forEach((nChar) => {
subtreeOf(nChar)[''] = 'ん';
});
// c is equivalent to k, but not for chi, cha, etc. that's why we have to make a copy of k
kanaTree.c = JSON.parse(JSON.stringify(kanaTree.k));
Object.entries(ALIASES).forEach(([string, alternative]) => {
const allExceptLast = string.slice(0, string.length - 1);
const last = string.charAt(string.length - 1);
const parentTree = subtreeOf(allExceptLast);
// copy to avoid recursive containment
parentTree[last] = JSON.parse(JSON.stringify(subtreeOf(alternative)));
});
function getAlternatives(string) {
return [...Object.entries(ALIASES), ...[['c', 'k']]].reduce(
(list, [alt, roma]) => (string.startsWith(roma) ? list.concat(string.replace(roma, alt)) : list),
[]
);
}
Object.entries(SMALL_LETTERS).forEach(([kunreiRoma, kana]) => {
const last = (char) => char.charAt(char.length - 1);
const allExceptLast = (chars) => chars.slice(0, chars.length - 1);
const xRoma = `x${kunreiRoma}`;
const xSubtree = subtreeOf(xRoma);
xSubtree[''] = kana;
// ltu -> xtu -> っ
const parentTree = subtreeOf(`l${allExceptLast(kunreiRoma)}`);
parentTree[last(kunreiRoma)] = xSubtree;
// ltsu -> ltu -> っ
getAlternatives(kunreiRoma).forEach((altRoma) => {
['l', 'x'].forEach((prefix) => {
const altParentTree = subtreeOf(prefix + allExceptLast(altRoma));
altParentTree[last(altRoma)] = subtreeOf(prefix + kunreiRoma);
});
});
});
Object.entries(SPECIAL_CASES).forEach(([string, kana]) => {
subtreeOf(string)[''] = kana;
});
// add kka, tta, etc.
function addTsu(tree) {
return Object.entries(tree).reduce((tsuTree, [key, value]) => {
if (!key) {
// we have reached the bottom of this branch
// eslint-disable-next-line no-param-reassign
tsuTree[key] = `っ${value}`;
} else {
// more subtrees
// eslint-disable-next-line no-param-reassign
tsuTree[key] = addTsu(value);
}
return tsuTree;
}, {});
}
// have to explicitly name c here, because we made it a copy of k, not a reference
[...Object.keys(CONSONANTS), 'c', 'y', 'w', 'j'].forEach((consonant) => {
const subtree = kanaTree[consonant];
subtree[consonant] = addTsu(subtree);
});
// nn should not be っん
delete kanaTree.n.n;
// solidify the results, so that there there is referential transparency within the tree
return Object.freeze(JSON.parse(JSON.stringify(kanaTree)));
}
let romajiToKanaMap = null;
function getRomajiToKanaTree() {
if (romajiToKanaMap == null) {
romajiToKanaMap = createRomajiToKanaMap$1();
}
return romajiToKanaMap;
}
const USE_OBSOLETE_KANA_MAP = createCustomMapping({
wi: 'ゐ',
we: 'ゑ',
});
function IME_MODE_MAP(map) {
// in IME mode, we do not want to convert single ns
const mapCopy = JSON.parse(JSON.stringify(map));
mapCopy.n.n = { '': 'ん' };
mapCopy.n[' '] = { '': 'ん' };
return mapCopy;
}
/**
* Tests if char is in English unicode uppercase range
* @param {String} char
* @return {Boolean}
*/
function isCharUpperCase(char = '') {
if (isEmpty(char)) return false;
return isCharInRange(char, LATIN_UPPERCASE_START, LATIN_UPPERCASE_END);
}
/**
* Returns true if char is 'ー'
* @param {String} char to test
* @return {Boolean}
*/
function isCharLongDash(char = '') {
if (isEmpty(char)) return false;
return char.charCodeAt(0) === PROLONGED_SOUND_MARK;
}
/**
* Tests if char is '・'
* @param {String} char
* @return {Boolean} true if '・'
*/
function isCharSlashDot(char = '') {
if (isEmpty(char)) return false;
return char.charCodeAt(0) === KANA_SLASH_DOT;
}
/**
* Tests a character. Returns true if the character is [Hiragana](https://en.wikipedia.org/wiki/Hiragana).
* @param {String} char character string to test
* @return {Boolean}
*/
function isCharHiragana(char = '') {
if (isEmpty(char)) return false;
if (isCharLongDash(char)) return true;
return isCharInRange(char, HIRAGANA_START, HIRAGANA_END);
}
/**
* Convert [Hiragana](https://en.wikipedia.org/wiki/Hiragana) to [Katakana](https://en.wikipedia.org/wiki/Katakana)
* Passes through any non-hiragana chars
* @private
* @param {String} [input=''] text input
* @return {String} converted text
* @example
* hiraganaToKatakana('ひらがな')
* // => "ヒラガナ"
* hiraganaToKatakana('ひらがな is a type of kana')
* // => "ヒラガナ is a type of kana"
*/
function hiraganaToKatakana(input = '') {
const kata = [];
input.split('').forEach((char) => {
// Short circuit to avoid incorrect codeshift for 'ー' and '・'
if (isCharLongDash(char) || isCharSlashDot(char)) {
kata.push(char);
} else if (isCharHiragana(char)) {
// Shift charcode.
const code = char.charCodeAt(0) + (KATAKANA_START - HIRAGANA_START);
const kataChar = String.fromCharCode(code);
kata.push(kataChar);
} else {
// Pass non-hiragana chars through
kata.push(char);
}
});
return kata.join('');
}
// memoize and deeply compare args so we only recreate when necessary
const createRomajiToKanaMap = memoizeOne(
(IMEMode, useObsoleteKana, customKanaMapping) => {
let map = getRomajiToKanaTree();
map = IMEMode ? IME_MODE_MAP(map) : map;
map = useObsoleteKana ? USE_OBSOLETE_KANA_MAP(map) : map;
if (customKanaMapping) {
map = mergeCustomMapping(map, customKanaMapping);
}
return map;
},
dequal
);
/**
* Convert [Romaji](https://en.wikipedia.org/wiki/Romaji) to [Kana](https://en.wikipedia.org/wiki/Kana), lowercase text will result in [Hiragana](https://en.wikipedia.org/wiki/Hiragana) and uppercase text will result in [Katakana](https://en.wikipedia.org/wiki/Katakana).
* @param {String} [input=''] text
* @param {DefaultOptions} [options=defaultOptions]
* @return {String} converted text
* @example
* toKana('onaji BUTTSUUJI')
* // => 'おなじ ブッツウジ'
* toKana('ONAJI buttsuuji')
* // => 'オナジ ぶっつうじ'
* toKana('座禅‘zazen’スタイル')
* // => '座禅「ざぜん」スタイル'
* toKana('batsuge-mu')
* // => 'ばつげーむ'
* toKana('!?.:/,~-‘’“”[](){}') // Punctuation conversion
* // => '!?。:・、〜ー「」『』[](){}'
* toKana('we', { useObsoleteKana: true })
* // => 'ゑ'
* toKana('wanakana', { customKanaMapping: { na: 'に', ka: 'bana' } });
* // => 'わにbanaに'
*/
function toKana(input = '', options = {}, map) {
let config;
if (!map) {
config = mergeWithDefaultOptions(options);
map = createRomajiToKanaMap(
config.IMEMode,
config.useObsoleteKana,
config.customKanaMapping
);
} else {
config = options;
}
// throw away the substring index information and just concatenate all the kana
return splitIntoConvertedKana(input, config, map)
.map((kanaToken) => {
const [start, end, kana] = kanaToken;
if (kana === null) {
// haven't converted the end of the string, since we are in IME mode
return input.slice(start);
}
const enforceHiragana = config.IMEMode === TO_KANA_METHODS.HIRAGANA;
const enforceKatakana = config.IMEMode === TO_KANA_METHODS.KATAKANA
|| [...input.slice(start, end)].every(isCharUpperCase);
return enforceHiragana || !enforceKatakana
? kana
: hiraganaToKatakana(kana);
})
.join('');
}
/**
*
* @private
* @param {String} [input=''] input text
* @param {DefaultOptions} [options=defaultOptions] toKana options
* @param {Object} [map] custom mapping
* @returns {Array[]} [[start, end, token]]
* @example
* splitIntoConvertedKana('buttsuuji')
* // => [[0, 2, 'ぶ'], [2, 6, 'っつ'], [6, 7, 'う'], [7, 9, 'じ']]
*/
function splitIntoConvertedKana(input = '', options = {}, map) {
const { IMEMode, useObsoleteKana, customKanaMapping } = options;
if (!map) {
map = createRomajiToKanaMap(IMEMode, useObsoleteKana, customKanaMapping);
}
return applyMapping(input.toLowerCase(), map, !IMEMode);
}
/**
* Tests a character. Returns true if the character is [Romaji](https://en.wikipedia.org/wiki/Romaji) (allowing [Hepburn romanisation](https://en.wikipedia.org/wiki/Hepburn_romanization))
* @param {String} char character string to test
* @return {Boolean}
*/
function isCharRomaji(char = '') {
if (isEmpty(char)) return false;
return ROMAJI_RANGES.some(([start, end]) => isCharInRange(char, start, end));
}
/**
* Test if `input` is [Romaji](https://en.wikipedia.org/wiki/Romaji) (allowing [Hepburn romanisation](https://en.wikipedia.org/wiki/Hepburn_romanization))
* @param {String} [input=''] text
* @param {Regexp} [allowed] additional test allowed to pass for each char
* @return {Boolean} true if [Romaji](https://en.wikipedia.org/wiki/Romaji)
* @example
* isRomaji('Tōkyō and Ōsaka')
* // => true
* isRomaji('12a*b&c-d')
* // => true
* isRomaji('あアA')
* // => false
* isRomaji('お願い')
* // => false
* isRomaji('a!b&cーd') // Zenkaku punctuation fails
* // => false
* isRomaji('a!b&cーd', /[!ー]/)
* // => true
*/
function isRomaji(input = '', allowed) {
const augmented = typeOf(allowed) === 'regexp';
return isEmpty(input)
? false
: [...input].every((char) => {
const isRoma = isCharRomaji(char);
return !augmented ? isRoma : isRoma || allowed.test(char);
});
}
/**
* Tests a character. Returns true if the character is [Katakana](https://en.wikipedia.org/wiki/Katakana).
* @param {String} char character string to test
* @return {Boolean}
*/
function isCharKatakana(char = '') {
return isCharInRange(char, KATAKANA_START, KATAKANA_END);
}
/**
* Test if `input` is [Hiragana](https://en.wikipedia.org/wiki/Hiragana)
* @param {String} [input=''] text
* @return {Boolean} true if all [Hiragana](https://en.wikipedia.org/wiki/Hiragana)
* @example
* isHiragana('げーむ')
* // => true
* isHiragana('A')
* // => false
* isHiragana('あア')
* // => false
*/
function isHiragana(input = '') {
if (isEmpty(input)) return false;
return [...input].every(isCharHiragana);
}
/**
* Test if `input` is [Katakana](https://en.wikipedia.org/wiki/Katakana)
* @param {String} [input=''] text
* @return {Boolean} true if all [Katakana](https://en.wikipedia.org/wiki/Katakana)
* @example
* isKatakana('ゲーム')
* // => true
* isKatakana('あ')
* // => false
* isKatakana('A')
* // => false
* isKatakana('あア')
* // => false
*/
function isKatakana(input = '') {
if (isEmpty(input)) return false;
return [...input].every(isCharKatakana);
}
/**
* Tests a character. Returns true if the character is a CJK ideograph (kanji).
* @param {String} char character string to test
* @return {Boolean}
*/
function isCharKanji(char = '') {
return isCharInRange(char, KANJI_START, KANJI_END);
}
/**
* Tests if `input` is [Kanji](https://en.wikipedia.org/wiki/Kanji) ([Japanese CJK ideographs](https://en.wikipedia.org/wiki/CJK_Unified_Ideographs))
* @param {String} [input=''] text
* @return {Boolean} true if all [Kanji](https://en.wikipedia.org/wiki/Kanji)
* @example
* isKanji('刀')
* // => true
* isKanji('切腹')
* // => true
* isKanji('勢い')
* // => false
* isKanji('あAア')
* // => false
* isKanji('🐸')
* // => false
*/
function isKanji(input = '') {
if (isEmpty(input)) return false;
return [...input].every(isCharKanji);
}
/**
* Test if `input` contains a mix of [Romaji](https://en.wikipedia.org/wiki/Romaji) *and* [Kana](https://en.wikipedia.org/wiki/Kana), defaults to pass through [Kanji](https://en.wikipedia.org/wiki/Kanji)
* @param {String} input text
* @param {Object} [options={ passKanji: true }] optional config to pass through kanji
* @return {Boolean} true if mixed
* @example
* isMixed('Abあア'))
* // => true
* isMixed('お腹A')) // ignores kanji by default
* // => true
* isMixed('お腹A', { passKanji: false }))
* // => false
* isMixed('ab'))
* // => false
* isMixed('あア'))
* // => false
*/
function isMixed(input = '', options = { passKanji: true }) {
const chars = [...input];
let hasKanji = false;
if (!options.passKanji) {
hasKanji = chars.some(isKanji);
}
return (chars.some(isHiragana) || chars.some(isKatakana)) && chars.some(isRomaji) && !hasKanji;
}
const isCharInitialLongDash = (char, index) => isCharLongDash(char) && index < 1;
const isCharInnerLongDash = (char, index) => isCharLongDash(char) && index > 0;
const isKanaAsSymbol = (char) => ['ヶ', 'ヵ'].includes(char);
const LONG_VOWELS = {
a: 'あ',
i: 'い',
u: 'う',
e: 'え',
o: 'う',
};
// inject toRomaji to avoid circular dependency between toRomaji <-> katakanaToHiragana
function katakanaToHiragana(
input = '',
toRomaji,
{ isDestinationRomaji, convertLongVowelMark } = {}
) {
let previousKana = '';
return input
.split('')
.reduce((hira, char, index) => {
// Short circuit to avoid incorrect codeshift for 'ー' and '・'
if (
isCharSlashDot(char)
|| isCharInitialLongDash(char, index)
|| isKanaAsSymbol(char)
) {
return hira.concat(char);
}
// Transform long vowels: 'オー' to 'おう'
if (
convertLongVowelMark
&& previousKana
&& isCharInnerLongDash(char, index)
) {
// Transform previousKana back to romaji, and slice off the vowel
const romaji = toRomaji(previousKana).slice(-1);
// However, ensure 'オー' => 'おお' => 'oo' if this is a transform on the way to romaji
if (
isCharKatakana(input[index - 1])
&& romaji === 'o'
&& isDestinationRomaji
) {
return hira.concat('お');
}
return hira.concat(LONG_VOWELS[romaji]);
// Transform all other chars
}
if (!isCharLongDash(char) && isCharKatakana(char)) {
const code = char.charCodeAt(0) + (HIRAGANA_START - KATAKANA_START);
const hiraChar = String.fromCharCode(code);
previousKana = hiraChar;
return hira.concat(hiraChar);
}
// Pass non katakana chars through
previousKana = '';
return hira.concat(char);
}, [])
.join('');
}
let kanaToHepburnMap = null;
/* eslint-disable */
// prettier-ignore
const BASIC_ROMAJI = {
あ:'a', い:'i', う:'u', え:'e', お:'o',
か:'ka', き:'ki', く:'ku', け:'ke', こ:'ko',
さ:'sa', し:'shi', す:'su', せ:'se', そ:'so',
た:'ta', ち:'chi', つ:'tsu', て:'te', と:'to',
な:'na', に:'ni', ぬ:'nu', ね:'ne', の:'no',
は:'ha', ひ:'hi', ふ:'fu', へ:'he', ほ:'ho',
ま:'ma', み:'mi', む:'mu', め:'me', も:'mo',
ら:'ra', り:'ri', る:'ru', れ:'re', ろ:'ro',
や:'ya', ゆ:'yu', よ:'yo',
わ:'wa', ゐ:'wi', ゑ:'we', を:'wo',
ん: 'n',
が:'ga', ぎ:'gi', ぐ:'gu', げ:'ge', ご:'go',
ざ:'za', じ:'ji', ず:'zu', ぜ:'ze', ぞ:'zo',
だ:'da', ぢ:'ji', づ:'zu', で:'de', ど:'do',
ば:'ba', び:'bi', ぶ:'bu', べ:'be', ぼ:'bo',
ぱ:'pa', ぴ:'pi', ぷ:'pu', ぺ:'pe', ぽ:'po',
ゔぁ:'va', ゔぃ:'vi', ゔ:'vu', ゔぇ:'ve', ゔぉ:'vo',
};
/* eslint-enable */
const SPECIAL_SYMBOLS = {
'。': '.',
'、': ',',
':': ':',
'・': '/',
'!': '!',
'?': '?',
'〜': '~',
'ー': '-',
'「': '‘',
'」': '’',
'『': '“',
'』': '”',
'[': '[',
']': ']',
'(': '(',
')': ')',
'{': '{',
'}': '}',
' ': ' ',
};
// んい -> n'i
const AMBIGUOUS_VOWELS = ['あ', 'い', 'う', 'え', 'お', 'や', 'ゆ', 'よ'];
const SMALL_Y = { ゃ: 'ya', ゅ: 'yu', ょ: 'yo' };
const SMALL_Y_EXTRA = { ぃ: 'yi', ぇ: 'ye' };
const SMALL_AIUEO = {
ぁ: 'a',
ぃ: 'i',
ぅ: 'u',
ぇ: 'e',
ぉ: 'o',
};
const YOON_KANA = [
'き',
'に',
'ひ',
'み',
'り',
'ぎ',
'び',
'ぴ',
'ゔ',
'く',
'ふ',
];
const YOON_EXCEPTIONS = {
し: 'sh',
ち: 'ch',
じ: 'j',
ぢ: 'j',
};
const SMALL_KANA = {
っ: '',
ゃ: 'ya',
ゅ: 'yu',
ょ: 'yo',
ぁ: 'a',
ぃ: 'i',
ぅ: 'u',
ぇ: 'e',
ぉ: 'o',
};
// going with the intuitive (yet incorrect) solution where っや -> yya and っぃ -> ii
// in other words, just assume the sokuon could have been applied to anything
const SOKUON_WHITELIST = {
b: 'b',
c: 't',
d: 'd',
f: 'f',
g: 'g',
h: 'h',
j: 'j',
k: 'k',
m: 'm',
p: 'p',
q: 'q',
r: 'r',
s: 's',
t: 't',
v: 'v',
w: 'w',
x: 'x',
z: 'z',
};
function getKanaToHepburnTree() {
if (kanaToHepburnMap == null) {
kanaToHepburnMap = createKanaToHepburnMap();
}
return kanaToHepburnMap;
}
function getKanaToRomajiTree(romanization) {
switch (romanization) {
case ROMANIZATIONS.HEPBURN:
return getKanaToHepburnTree();
default:
return {};
}
}
function createKanaToHepburnMap() {
const romajiTree = transform(BASIC_ROMAJI);
const subtreeOf = (string) => getSubTreeOf(romajiTree, string);
const setTrans = (string, transliteration) => {
subtreeOf(string)[''] = transliteration;
};
Object.entries(SPECIAL_SYMBOLS).forEach(([jsymbol, symbol]) => {
subtreeOf(jsymbol)[''] = symbol;
});
[...Object.entries(SMALL_Y), ...Object.entries(SMALL_AIUEO)].forEach(
([roma, kana]) => {
setTrans(roma, kana);
}
);
// きゃ -> kya
YOON_KANA.forEach((kana) => {
const firstRomajiChar = subtreeOf(kana)[''][0];
Object.entries(SMALL_Y).forEach(([yKana, yRoma]) => {
setTrans(kana + yKana, firstRomajiChar + yRoma);
});
// きぃ -> kyi
Object.entries(SMALL_Y_EXTRA).forEach(([yKana, yRoma]) => {
setTrans(kana + yKana, firstRomajiChar + yRoma);
});
});
Object.entries(YOON_EXCEPTIONS).forEach(([kana, roma]) => {
// じゃ -> ja
Object.entries(SMALL_Y).forEach(([yKana, yRoma]) => {
setTrans(kana + yKana, roma + yRoma[1]);
});
// じぃ -> jyi, じぇ -> je
setTrans(`${kana}ぃ`, `${roma}yi`);
setTrans(`${kana}ぇ`, `${roma}e`);
});
romajiTree['っ'] = resolveTsu(romajiTree);
Object.entries(SMALL_KANA).forEach(([kana, roma]) => {
setTrans(kana, roma);
});
AMBIGUOUS_VOWELS.forEach((kana) => {
setTrans(`ん${kana}`, `n'${subtreeOf(kana)['']}`);
});
// NOTE: could be re-enabled with an option?
// // んば -> mbo
// const LABIAL = [
// 'ば', 'び', 'ぶ', 'べ', 'ぼ',
// 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ',
// 'ま', 'み', 'む', 'め', 'も',
// ];
// LABIAL.forEach((kana) => {
// setTrans(`ん${kana}`, `m${subtreeOf(kana)['']}`);
// });
return Object.freeze(JSON.parse(JSON.stringify(romajiTree)));
}
function resolveTsu(tree) {
return Object.entries(tree).reduce((tsuTree, [key, value]) => {
if (!key) {
// we have reached the bottom of this branch
const consonant = value.charAt(0);
// eslint-disable-next-line no-param-reassign
tsuTree[key] = Object.keys(SOKUON_WHITELIST).includes(consonant)
? SOKUON_WHITELIST[consonant] + value
: value;
} else {
// more subtrees
// eslint-disable-next-line no-param-reassign
tsuTree[key] = resolveTsu(value);
}
return tsuTree;
}, {});
}
// memoize and deeply compare args so we only recreate when necessary
const createKanaToRomajiMap = memoizeOne(
(romanization, customRomajiMapping) => {
let map = getKanaToRomajiTree(romanization);
if (customRomajiMapping) {
map = mergeCustomMapping(map, customRomajiMapping);
}
return map;
},
dequal
);
/**
* Convert kana to romaji
* @param {String} kana text input
* @param {DefaultOptions} [options=defaultOptions]
* @param {Object} map custom mapping
* @return {String} converted text
* @example
* toRomaji('ひらがな カタカナ')
* // => 'hiragana katakana'
* toRomaji('げーむ ゲーム')
* // => 'ge-mu geemu'
* toRomaji('ひらがな カタカナ', { upcaseKatakana: true })
* // => 'hiragana KATAKANA'
* toRomaji('つじぎり', { customRomajiMapping: { じ: 'zi', つ: 'tu', り: 'li' } });
* // => 'tuzigili'
*/
function toRomaji(input = '', options = {}, map) {
const config = mergeWithDefaultOptions(options);
if (!map) {
map = createKanaToRomajiMap(
config.romanization,
config.customRomajiMapping
);
}
// just throw away the substring index information and simply concatenate all the kana
return splitIntoRomaji(input, config, map)
.map((romajiToken) => {
const [start, end, romaji] = romajiToken;
const makeUpperCase = config.upcaseKatakana && isKatakana(input.slice(start, end));
return makeUpperCase ? romaji.toUpperCase() : romaji;
})
.join('');
}
function splitIntoRomaji(input, options, map) {
if (!map) {
map = createKanaToRomajiMap(
options.romanization,
options.customRomajiMapping
);
}
const config = Object.assign({}, { isDestinationRomaji: true }, options);
return applyMapping(
katakanaToHiragana(input, toRomaji, config),
map,
!options.IMEMode
);
}
/**
* Tests a character. Returns true if the character is considered English punctuation.
* @param {String} char character string to test
* @return {Boolean}
*/
function isCharEnglishPunctuation(char = '') {
if (isEmpty(char)) return false;
return EN_PUNCTUATION_RANGES.some(([start, end]) => isCharInRange(char, start, end));
}
/**
* Convert input to [Hiragana](https://en.wikipedia.org/wiki/Hiragana)
* @param {String} [input=''] text
* @param {DefaultOptions} [options=defaultOptions]
* @return {String} converted text
* @example
* toHiragana('toukyou, オオサカ')
* // => 'とうきょう、 おおさか'
* toHiragana('only カナ', { passRomaji: true })
* // => 'only かな'
* toHiragana('wi')
* // => 'うぃ'
* toHiragana('wi', { useObsoleteKana: true })
* // => 'ゐ'
*/
function toHiragana(input = '', options = {}) {
const config = mergeWithDefaultOptions(options);
if (config.passRomaji) {
return katakanaToHiragana(input, toRomaji, config);
}
if (isMixed(input, { passKanji: true })) {
const convertedKatakana = katakanaToHiragana(input, toRomaji, config);
return toKana(convertedKatakana.toLowerCase(), config);
}
if (isRomaji(input) || isCharEnglishPunctuation(input)) {
return toKana(input.toLowerCase(), config);
}
return katakanaToHiragana(input, toRomaji, config);
}
/**
* Convert input to [Katakana](https://en.wikipedia.org/wiki/Katakana)
* @param {String} [input=''] text
* @param {DefaultOptions} [options=defaultOptions]
* @return {String} converted text
* @example
* toKatakana('toukyou, おおさか')
* // => 'トウキョウ、 オオサカ'
* toKatakana('only かな', { passRomaji: true })
* // => 'only カナ'
* toKatakana('wi')
* // => 'ウィ'
* toKatakana('wi', { useObsoleteKana: true })
* // => 'ヰ'
*/
function toKatakana(input = '', options = {}) {
const mergedOptions = mergeWithDefaultOptions(options);
if (mergedOptions.passRomaji) {
return hiraganaToKatakana(input);
}
if (isMixed(input) || isRomaji(input) || isCharEnglishPunctuation(input)) {
const hiragana = toKana(input.toLowerCase(), mergedOptions);
return hiraganaToKatakana(hiragana);
}
return hiraganaToKatakana(input);
}
const FuzzySet = function(arr, useLevenshtein, gramSizeLower, gramSizeUpper) {
var fuzzyset = {
};
// default options
arr = arr || [];
fuzzyset.gramSizeLower = gramSizeLower || 2;
fuzzyset.gramSizeUpper = gramSizeUpper || 3;
fuzzyset.useLevenshtein = (typeof useLevenshtein !== 'boolean') ? true : useLevenshtein;
// define all the object functions and attributes
fuzzyset.exactSet = {};
fuzzyset.matchDict = {};
fuzzyset.items = {};
// helper functions
var levenshtein = function(str1, str2) {
var current = [], prev, value;
for (var i = 0; i <= str2.length; i++)
for (var j = 0; j <= str1.length; j++) {
if (i && j)
if (str1.charAt(j - 1) === str2.charAt(i - 1))
value = prev;
else
value = Math.min(current[j], current[j - 1], prev) + 1;
else
value = i + j;
prev = current[j];
current[j] = value;
}
return current.pop();
};
// return an edit distance from 0 to 1
var _distance = function(str1, str2) {
if (str1 === null && str2 === null) throw 'Trying to compare two null values';
if (str1 === null || str2 === null) return 0;
str1 = String(str1); str2 = String(str2);
var distance = levenshtein(str1, str2);
if (str1.length > str2.length) {
return 1 - distance / str1.length;
} else {
return 1 - distance / str2.length;
}
};
// u00C0-u00FF is latin characters
// u0621-u064a is arabic letters
// u0660-u0669 is arabic numerals
// TODO: figure out way to do this for more languages
var _nonWordRe = /[^a-zA-Z0-9\u00C0-\u00FF\u0621-\u064A\u0660-\u0669, ]+/g;
var _iterateGrams = function(value, gramSize) {
gramSize = gramSize || 2;
var simplified = '-' + value.toLowerCase().replace(_nonWordRe, '') + '-',
lenDiff = gramSize - simplified.length,
results = [];
if (lenDiff > 0) {
for (var i = 0; i < lenDiff; ++i) {
simplified += '-';
}
}
for (var i = 0; i < simplified.length - gramSize + 1; ++i) {
results.push(simplified.slice(i, i + gramSize));
}
return results;
};
var _gramCounter = function(value, gramSize) {
// return an object where key=gram, value=number of occurrences
gramSize = gramSize || 2;
var result = {},
grams = _iterateGrams(value, gramSize),
i = 0;
for (i; i < grams.length; ++i) {
if (grams[i] in result) {
result[grams[i]] += 1;
} else {
result[grams[i]] = 1;
}
}
return result;
};
// the main functions
fuzzyset.get = function(value, defaultValue, minMatchScore) {
// check for value in set, returning defaultValue or null if none found
if (minMatchScore === undefined) {
minMatchScore = .33;
}
var result = this._get(value, minMatchScore);
if (!result && typeof defaultValue !== 'undefined') {
return defaultValue;
}
return result;
};
fuzzyset._get = function(value, minMatchScore) {
var results = [];
// start with high gram size and if there are no results, go to lower gram sizes
for (var gramSize = this.gramSizeUpper; gramSize >= this.gramSizeLower; --gramSize) {
results = this.__get(value, gramSize, minMatchScore);
if (results && results.length > 0) {
return results;
}
}
return null;
};
fuzzyset.__get = function(value, gramSize, minMatchScore) {
var normalizedValue = this._normalizeStr(value),
matches = {},
gramCounts = _gramCounter(normalizedValue, gramSize),
items = this.items[gramSize],
sumOfSquareGramCounts = 0,
gram,
gramCount,
i,
index,
otherGramCount;
for (gram in gramCounts) {
gramCount = gramCounts[gram];
sumOfSquareGramCounts += Math.pow(gramCount, 2);
if (gram in this.matchDict) {
for (i = 0; i < this.matchDict[gram].length; ++i) {
index = this.matchDict[gram][i][0];
otherGramCount = this.matchDict[gram][i][1];
if (index in matches) {
matches[index] += gramCount * otherGramCount;
} else {
matches[index] = gramCount * otherGramCount;
}
}
}
}
function isEmptyObject(obj) {
for(var prop in obj) {
if(obj.hasOwnProperty(prop))
return false;
}
return true;
}
if (isEmptyObject(matches)) {
return null;
}
var vectorNormal = Math.sqrt(sumOfSquareGramCounts),
results = [],
matchScore;
// build a results list of [score, str]
for (var matchIndex in matches) {
matchScore = matches[matchIndex];
results.push([matchScore / (vectorNormal * items[matchIndex][0]), items[matchIndex][1]]);
}
var sortDescending = function(a, b) {
if (a[0] < b[0]) {
return 1;
} else if (a[0] > b[0]) {
return -1;
} else {
return 0;
}
};
results.sort(sortDescending);
if (this.useLevenshtein) {
var newResults = [],
endIndex = Math.min(50, results.length);
// truncate somewhat arbitrarily to 50
for (var i = 0; i < endIndex; ++i) {
newResults.push([_distance(results[i][1], normalizedValue), results[i][1]]);
}
results = newResults;
results.sort(sortDescending);
}
newResults = [];
results.forEach(function(scoreWordPair) {
if (scoreWordPair[0] >= minMatchScore) {
newResults.push([scoreWordPair[0], this.exactSet[scoreWordPair[1]]]);
}
}.bind(this));
return newResults;
};
fuzzyset.add = function(value) {
var normalizedValue = this._normalizeStr(value);
if (normalizedValue in this.exactSet) {
return false;
}
var i = this.gramSizeLower;
for (i; i < this.gramSizeUpper + 1; ++i) {
this._add(value, i);
}
};
fuzzyset._add = function(value, gramSize) {
var normalizedValue = this._normalizeStr(value),
items = this.items[gramSize] || [],
index = items.length;
items.push(0);
var gramCounts = _gramCounter(normalizedValue, gramSize),
sumOfSquareGramCounts = 0,
gram, gramCount;
for (gram in gramCounts) {
gramCount = gramCounts[gram];
sumOfSquareGramCounts += Math.pow(gramCount, 2);
if (gram in this.matchDict) {
this.matchDict[gram].push([index, gramCount]);
} else {
this.matchDict[gram] = [[index, gramCount]];
}
}
var vectorNormal = Math.sqrt(sumOfSquareGramCounts);
items[index] = [vectorNormal, normalizedValue];
this.items[gramSize] = items;
this.exactSet[normalizedValue] = value;
};
fuzzyset._normalizeStr = function(str) {
if (Object.prototype.toString.call(str) !== '[object String]') throw 'Must use a string as argument to FuzzySet functions';
return str.toLowerCase();
};
// return length of items in set
fuzzyset.length = function() {
var count = 0,
prop;
for (prop in this.exactSet) {
if (this.exactSet.hasOwnProperty(prop)) {
count += 1;
}
}
return count;
};
// return is set is empty
fuzzyset.isEmpty = function() {
for (var prop in this.exactSet) {
if (this.exactSet.hasOwnProperty(prop)) {
return false;
}
}
return true;
};
// return list of values loaded into set
fuzzyset.values = function() {
var values = [],
prop;
for (prop in this.exactSet) {
if (this.exactSet.hasOwnProperty(prop)) {
values.push(this.exactSet[prop]);
}
}
return values;
};
// initialization
var i = fuzzyset.gramSizeLower;
for (i; i < fuzzyset.gramSizeUpp