react-use-kana
Version:
A tiny React hook to create better Japanese form
185 lines (178 loc) • 8.6 kB
JavaScript
import { useState } from 'react';
/*! *****************************************************************************
Copyright (c) Microsoft Corporation.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
***************************************************************************** */
var __assign = function() {
__assign = Object.assign || function __assign(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
var KANA_REGEX = /([ ぁあ-んーわ゙ゐ゙ゔゑ゙を゙]+)/g; // eslint-disable-line no-misleading-character-class
var NON_KANA_REGEX = /([^ ぁあ-んーわ゙ゐ゙ゔゑ゙を゙]+)/g; // eslint-disable-line no-misleading-character-class
var SPACE_REGEX = /([ ]+)/g;
var ALPHABET_USED_DURING_INPUT_REGEX = /[a-z]+/g;
var isKana = function (value) { return !!value.match(KANA_REGEX); };
var isNonKana = function (value) { return !!value.match(NON_KANA_REGEX); };
// Split into kanas, spaces, other characters
var splitIntoCharGroups = function (value) {
return value
.split(KANA_REGEX)
.flatMap(function (str) { return str.split(SPACE_REGEX); })
.map(function (str) { return str.replace(ALPHABET_USED_DURING_INPUT_REGEX, ''); }) // trim '山d' => '山'
.filter(Boolean); // Filter out empty values
};
var extractDiff = function (from, to) {
var added = to.filter(function (x) { return !from.includes(x); });
var removed = from.filter(function (x) { return !to.includes(x); });
return { added: added, removed: removed };
};
var extractPairFromDiff = function (_a) {
// What's the No.?
// Conversion pattern: See details in https://docs.google.com/spreadsheets/d/13kMl3XQ2SG9BQTYaP-lUeVPu5I6u7Xi8Gw3H6ErYqyM/edit?usp=sharing
var added = _a.added, removed = _a.removed;
if (added.length === 0 || // No. 1, 4, 5, 11
removed.length === 0 || // No. 1, 2, 3, 6
added.every(isKana) || // No. 3, 9, 10, 15
removed.every(isNonKana) // No. 4, 7, 9, 10
) {
// For No. 7 (from nonKana to nonKana), it's going to fallback to No. 14
return {};
}
else if (removed.some(isNonKana) && removed.some(isKana) && added.every(isNonKana)) {
// No. 14 (from mixed of kana and nonKana, to nonKana)
// given
// added: ['山田']
// removed: ['山', 'だ']
// then
// pair is { '田': 'だ' }
var addedString_1 = added.join('');
return removed.reduce(function (resultMap, removedChars, i) {
if (isNonKana(removedChars)) {
var position = addedString_1.indexOf(removedChars);
if (position === 0) {
addedString_1 = addedString_1.slice(removedChars.length);
}
else if (position > 0) {
resultMap[addedString_1.slice(0, position)] = removed[i - 1];
addedString_1 = addedString_1.slice(position + 1);
}
}
else if (isKana(removedChars) && i === removed.length - 1) {
// If the last string is kana, couple it with the remaining nonKana
resultMap[addedString_1] = removedChars;
}
return resultMap;
}, {});
}
else {
// No. 8 (from kana to nonKana)
// No. 13 (from kana to mixed of kana and nonKana)
// given
// added: ['山', 'だ']
// removed: ['やまだ']
// then
// pair is { '山': 'やま' }
var removedString_1 = removed.join('');
return added.reduce(function (resultMap, addedChars, i) {
if (isKana(addedChars)) {
var position = removedString_1.indexOf(addedChars);
if (position === 0) {
removedString_1 = removedString_1.slice(addedChars.length);
}
else if (position > 0) {
resultMap[added[i - 1]] = removedString_1.slice(0, position);
removedString_1 = removedString_1.slice(position + 1);
}
}
else if (isNonKana(addedChars) && i === added.length - 1) {
// If the last string is nonKana, couple it with the remaining kana
resultMap[addedChars] = removedString_1;
}
return resultMap;
}, {});
}
};
var findPair = function (charGroupsCandidates, currentCharGroups, setLastConvertedCharGroups) {
var previousCharGroups = charGroupsCandidates[0], tail = charGroupsCandidates.slice(1);
if (!previousCharGroups) {
return {};
}
var diff = extractDiff(previousCharGroups, currentCharGroups);
var pair = extractPairFromDiff(diff);
if (Object.keys(pair).length !== 0) {
// If a pair of non-kana and kana is found, memoize it for later comparison.
// This is for a case when a user convert an input from kana to non-kana to non-kana.
// e.g. 'た' => '多' => '田' . In this case, it needs to store 'た' when making pair of { '多': 'た' }
// for the next making pair of { '田': 'た' }
setLastConvertedCharGroups(previousCharGroups);
return pair;
}
else {
// The first trial to find pair (between the latest input and the 2nd latest input) fails,
// it retries once more by using `lastConvertedCharGroups`.
return findPair(tail, currentCharGroups, setLastConvertedCharGroups);
}
};
var hiraganaToKatatana = function (str) {
return str.replace(/[\u3041-\u3096]/g, function (ch) { return String.fromCharCode(ch.charCodeAt(0) + 0x60); });
};
var convertCharGroupsToKana = function (kanaMap, charGroups, kanaType) {
var knownNonKanas = Object.keys(kanaMap);
var hiragana = charGroups
.map(function (chars) {
return knownNonKanas
.filter(function (knownNonKana) { return chars.indexOf(knownNonKana) >= 0; })
.reduce(function (memo, nonKana) { return memo.replace(nonKana, kanaMap[nonKana]); }, chars);
})
.filter(isKana)
.join('');
if (kanaType === 'katakana') {
return hiraganaToKatatana(hiragana);
}
else {
return hiragana;
}
};
var useKana = function (_a) {
var _b = _a === void 0 ? {} : _a, _c = _b.kanaType, kanaType = _c === void 0 ? 'hiragana' : _c;
// used by library users
var _d = useState(''), kana = _d[0], setKana = _d[1];
// library internal use
var _e = useState([]), previousCharGroups = _e[0], setPreviousCharGroups = _e[1];
var _f = useState([]), lastConvertedCharGroups = _f[0], setLastConvertedCharGroups = _f[1];
var _g = useState({}), kanaMap = _g[0], setKanaMap = _g[1];
var setKanaSource = function (value) {
if (value === '') {
// If a user inputs nothing, reset everything
setKana('');
setKanaMap({});
setPreviousCharGroups([]);
setLastConvertedCharGroups([]);
}
else {
var currentCharGroups = splitIntoCharGroups(value);
// Create kana map that contains pairs of non-kana and kana based on the original map
var latestKanaMap = __assign(__assign({}, kanaMap), findPair([previousCharGroups, lastConvertedCharGroups], currentCharGroups, setLastConvertedCharGroups));
var currentKana = convertCharGroupsToKana(latestKanaMap, currentCharGroups, kanaType);
setKana(currentKana);
setKanaMap(latestKanaMap);
setPreviousCharGroups(currentCharGroups);
}
};
return { kana: kana, setKanaSource: setKanaSource };
};
export { useKana };