punkt
Version:
A port of NLTK's Punkt sentence tokenizer to JS.
24 lines • 840 B
TypeScript
import PunktLanguageVars from "./languageVars.js";
import PunktParameters from "./parameters.js";
export { PunktLanguageVars, PunktParameters };
export declare class PunktTokenizer {
private params;
private langVars;
constructor(params: PunktParameters, langVars?: PunktLanguageVars);
private tokenizeWords;
private annotateFirstPass;
private firstPassAnnotation;
private spanTokenize;
private sentencesFromText;
private getLastWhitespaceIndex;
private matchPotentialEndContexts;
private slicesFromText;
private realignBoundaries;
private textContainsSentbreak;
private annotateTokens;
private annotateSecondPass;
private secondPassAnnotation;
private orthoHeuristic;
tokenize(text: string, realignBoundaries?: boolean): string[];
}
//# sourceMappingURL=index.d.ts.map