unicode-regex
Version:
regular expression for matching unicode category
26 lines (25 loc) • 1.03 kB
JavaScript
import { Charset } from 'regexp-util';
import * as data from './data.generated/index.js';
export default function unicode(categories) {
const keys = Object.keys(categories);
if (keys.length === 0) {
throw new Error(`Expected at least one category, but received 0.`);
}
if (keys.some(key => {
const subCategories = categories[key];
return subCategories === undefined || subCategories.length === 0;
})) {
throw new Error(`Expected at least one sub category, but received 0.`);
}
const charsets = keys.map(category => {
const subCategories = categories[category];
const subCharsets = subCategories.map(subCategory => getCharset(category, subCategory));
return new Charset().union(...subCharsets);
});
return charsets.reduce((a, b) => a.intersect(b));
}
function getCharset(category, subCategory) {
const categoryData = data[category];
const charsetInputs = categoryData[subCategory];
return new Charset().union(...charsetInputs);
}