punkt
Version:
A port of NLTK's Punkt sentence tokenizer to JS.
101 lines • 4.04 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
class PunktParameters {
constructor(getOrParams) {
this.initialized = false;
this.abbreviationsV = [];
this.collocationsV = new Map();
this.orthoContextV = new Map();
this.sentStartersV = [];
if (typeof getOrParams === "function") {
this.get = getOrParams;
}
else {
const params = getOrParams;
this.initialized = true;
this.get = () => Promise.resolve("");
this.abbreviationsV = params.abbreviations;
if (params.collocations instanceof Map) {
this.collocationsV = params.collocations;
}
else {
for (const key of Object.keys(params.collocations)) {
this.collocationsV.set(key, params.collocations[key]);
}
}
if (params.orthoContext instanceof Map) {
this.orthoContextV = params.orthoContext;
}
else {
for (const key of Object.keys(params.orthoContext)) {
this.orthoContextV.set(key, params.orthoContext[key]);
}
}
this.sentStartersV = params.sentStarters;
}
}
init() {
return __awaiter(this, void 0, void 0, function* () {
if (this.initialized)
return;
const sources = yield Promise.all([
this.get("abbrev_types.txt"),
this.get("collocations.tab"),
this.get("ortho_context.tab"),
this.get("sent_starters.txt"),
]);
this.abbreviationsV = sources[0]
.split("\n")
.map(line => line.trim())
.filter(line => line.length > 0);
this.collocationsV = new Map(sources[1]
.split("\n")
.map(line => line.trim())
.filter(line => line.length > 0)
.map(line => line.split("\t"))
.map(line => [line[0], line[1]]));
this.orthoContextV = new Map(sources[2]
.split("\n")
.map(line => line.trim())
.filter(line => line.length > 0)
.map(line => line.split("\t"))
.map(line => [line[0], parseInt(line[1])]));
this.sentStartersV = sources[3]
.split("\n")
.map(line => line.trim())
.filter(line => line.length > 0);
this.initialized = true;
});
}
get abbreviations() {
if (!this.initialized)
throw new Error("PunktParameters not initialized");
return this.abbreviationsV;
}
get collocations() {
if (!this.initialized)
throw new Error("PunktParameters not initialized");
return this.collocationsV;
}
get orthoContext() {
if (!this.initialized)
throw new Error("PunktParameters not initialized");
return this.orthoContextV;
}
get sentStarters() {
if (!this.initialized)
throw new Error("PunktParameters not initialized");
return this.sentStartersV;
}
}
exports.default = PunktParameters;
//# sourceMappingURL=parameters.js.map