UNPKG

punkt

Version:

A port of NLTK's Punkt sentence tokenizer to JS.

101 lines 4.04 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); class PunktParameters { constructor(getOrParams) { this.initialized = false; this.abbreviationsV = []; this.collocationsV = new Map(); this.orthoContextV = new Map(); this.sentStartersV = []; if (typeof getOrParams === "function") { this.get = getOrParams; } else { const params = getOrParams; this.initialized = true; this.get = () => Promise.resolve(""); this.abbreviationsV = params.abbreviations; if (params.collocations instanceof Map) { this.collocationsV = params.collocations; } else { for (const key of Object.keys(params.collocations)) { this.collocationsV.set(key, params.collocations[key]); } } if (params.orthoContext instanceof Map) { this.orthoContextV = params.orthoContext; } else { for (const key of Object.keys(params.orthoContext)) { this.orthoContextV.set(key, params.orthoContext[key]); } } this.sentStartersV = params.sentStarters; } } init() { return __awaiter(this, void 0, void 0, function* () { if (this.initialized) return; const sources = yield Promise.all([ this.get("abbrev_types.txt"), this.get("collocations.tab"), this.get("ortho_context.tab"), this.get("sent_starters.txt"), ]); this.abbreviationsV = sources[0] .split("\n") .map(line => line.trim()) .filter(line => line.length > 0); this.collocationsV = new Map(sources[1] .split("\n") .map(line => line.trim()) .filter(line => line.length > 0) .map(line => line.split("\t")) .map(line => [line[0], line[1]])); this.orthoContextV = new Map(sources[2] .split("\n") .map(line => line.trim()) .filter(line => line.length > 0) .map(line => line.split("\t")) .map(line => [line[0], parseInt(line[1])])); this.sentStartersV = sources[3] .split("\n") .map(line => line.trim()) .filter(line => line.length > 0); this.initialized = true; }); } get abbreviations() { if (!this.initialized) throw new Error("PunktParameters not initialized"); return this.abbreviationsV; } get collocations() { if (!this.initialized) throw new Error("PunktParameters not initialized"); return this.collocationsV; } get orthoContext() { if (!this.initialized) throw new Error("PunktParameters not initialized"); return this.orthoContextV; } get sentStarters() { if (!this.initialized) throw new Error("PunktParameters not initialized"); return this.sentStartersV; } } exports.default = PunktParameters; //# sourceMappingURL=parameters.js.map