UNPKG

sentence-splitter

Version:
69 lines (62 loc) 1.87 kB
import { SourceCode } from "./SourceCode.js"; import { AbstractParser } from "./AbstractParser.js"; export const DefaultOptions = { separatorCharacters: [ ".", // period ".", // (ja) zenkaku-period "。", // (ja) 句点 "?", // question mark "!", // exclamation mark "?", // (ja) zenkaku question mark "!" // (ja) zenkaku exclamation mark ] }; export interface SeparatorParserOptions { /** * Recognize each characters as separator * Example [".", "!", "?"] */ separatorCharacters?: string[]; } /** * Separator parser */ export class SeparatorParser implements AbstractParser { private separatorCharacters: string[]; constructor(readonly options?: SeparatorParserOptions) { this.separatorCharacters = options && options.separatorCharacters ? options.separatorCharacters : DefaultOptions.separatorCharacters; } test(sourceCode: SourceCode) { if (sourceCode.isInContext()) { return false; } if (sourceCode.isInContextRange()) { return false; } const firstChar = sourceCode.read(); const nextChar = sourceCode.read(1); if (!firstChar) { return false; } if (!this.separatorCharacters.includes(firstChar)) { return false; } // Need space after period // Example: "This is a pen. This is not a pen." // It will avoid false-position like `1.23` if (firstChar === ".") { if (nextChar) { return /[\s\t\r\n]/.test(nextChar); } else { return true; } } return true; } seek(sourceCode: SourceCode): void { while (this.test(sourceCode)) { sourceCode.peek(); } } }