UNPKG

@readium/shared

Version:

Shared models to be used across other Readium projects and implementations in Typescript

40 lines (39 loc) 1.41 kB
import { Language } from "../Language"; import { Tokenizer } from "./Tokenizer"; export type Range = [number, number]; /** * A tokenizer splitting a String into range tokens (e.g. words, sentences, etc.). */ export type TextTokenizer = Tokenizer<string, Range>; /** * A text token unit which can be used with a [TextTokenizer]. */ export declare enum TextUnit { Word = "word", Sentence = "sentence", Paragraph = "paragraph" } export declare const DefaultTextContentTokenizer: (language: Language | null, unit: TextUnit) => TextTokenizer; /** * A [TextTokenizer] using the Intl.Segmenter API. * Very aware of language-specific rules since it uses ICU behind the scenes. */ export declare class IntlTextTokenizer implements TextTokenizer { private unit; private segmenter; constructor(language: Language | null, unit: TextUnit); tokenize(data: string): Range[]; } /** * A [TextTokenizer] using a naive approach to splitting text into tokens. * This is a fallback for browsers that don't support Intl.Segmenter. * It works mainly on English and similar languages. Don't use unless necessary. */ export declare class NaiveTextTokenizer { private unit; private tokenizer; private isEnglish; constructor(language: Language | null, unit: TextUnit); tokenize(data: string): Range[]; } export declare const speakableToken: (token: string) => string | null;