UNPKG

unicode-segmenter

Version:

A lightweight implementation of the Unicode Text Segmentation (UAX #29)

114 lines (103 loc) 3.12 kB
// @ts-check import { graphemeSegments } from './grapheme.js'; const p_locale = Symbol(); const p_granularity = Symbol(); /** * Adapter for `Intl.Segmenter` API * * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter * * @implements {Intl.Segmenter} */ export class Segmenter { /** * @param {string} [locale] * @param {Intl.SegmenterOptions} [options={}] */ constructor(locale, options = {}) { let { granularity = 'grapheme', } = options; switch (granularity) { case 'grapheme': break; case 'word': throw new TypeError('Unicode "word" segmenter is currently not implemented'); case 'sentence': throw new TypeError('Unicode "sentence" segmenter is currently not implemented'); default: throw new RangeError(`Value ${granularity} out of range for Intl.Segmenter options property granularity`); } /** @type {string} */ this[p_locale] = locale || 'en'; /** @type {Intl.ResolvedSegmenterOptions["granularity"]} */ this[p_granularity] = granularity; } /** * Impelements {@link Intl.Segmenter.segment} * * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/segment * * @param {string} input * @return {SegmentsAdapter} */ segment(input) { return new SegmentsAdapter(input); } /** * Impelements {@link Intl.Segmenter.resolvedOptions} * * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/resolvedOptions * @return {Intl.ResolvedSegmenterOptions} */ resolvedOptions() { return { locale: this[p_locale], granularity: this[p_granularity], }; } } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/segment/Segments * @implements {Intl.Segments} */ class SegmentsAdapter { /** * @param {string} input */ constructor(input) { /** @type {string} */ this.input = input; } /** * @return {Intl.SegmentIterator<Intl.SegmentData>} */ *[Symbol.iterator]() { // only grapheme segmenter is currently provided for (let { segment, index, input } of graphemeSegments(this.input)) { yield { segment, index, input }; } } /** * Impelements {@link Intl.Segments.containing} * * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter/segment/Segments/containing * * @param {number} [codeUnitIndex=0] * @return {Intl.SegmentData} A resolved segment data */ containing(codeUnitIndex = 0) { let offset = 0; // only grapheme segmenter is currently provided for (let x of graphemeSegments(this.input)) { offset += x.segment.length; if (codeUnitIndex < offset) { return x; } } // FIXME mistyped upstream // See https://github.com/microsoft/TypeScript/pull/58084 // @ts-ignore `Segments.prototype.containing()` can actually returns `undefined`. return undefined; } }