wordmap
Version:
Multi-Lingual Word Alignment Prediction
97 lines (96 loc) • 3.44 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
class CorpusFaker {
constructor(n = 3) {
this.n = n;
}
static random(min, max) {
return Math.floor(Math.random() * max) + min;
}
static character(index) {
return "abcdefghijklmnopqrstuvwxyz"[index];
}
static randomCharacter() {
return this.character(CorpusFaker.random(0, 25));
}
static randomWord(maxLength = 5) {
const word = [];
const length = CorpusFaker.random(1, maxLength);
for (let i = 0; i < length; i++) {
word.push(CorpusFaker.randomCharacter());
}
return word.join("");
}
static randomPhrase(maxWords, maxWordLength) {
if (typeof maxWords === "undefined") {
throw Error("Must pass in maxWords.");
}
if (typeof maxWordLength === "undefined") {
throw Error("Must pass in maxWordLength.");
}
const phrase = [];
const length = CorpusFaker.random(1, maxWords);
for (let i = 0; i < length; i++) {
const word = CorpusFaker.randomWord(maxWordLength);
phrase.push(word);
}
return phrase.join(" ");
}
static lexiconSentencePair(maxPhrases, lexicon) {
if (typeof maxPhrases === "undefined") {
throw Error("Must pass in maxPhrases.");
}
if (typeof lexicon !== "object") {
throw Error("Must pass in lexicon.");
}
const sourceArray = [];
const targetArray = [];
const times = CorpusFaker.random(1, maxPhrases);
for (let i = 0; i < times; i++) {
const sourcePhrases = Object.keys(lexicon);
const randomSourcePhrase = sourcePhrases[Math.floor(Math.random() *
sourcePhrases.length)];
const targetTranslations = lexicon[randomSourcePhrase];
const randomTargetTranslation = targetTranslations[Math.floor(Math.random() *
targetTranslations.length)];
sourceArray.push(randomSourcePhrase);
targetArray.push(randomTargetTranslation);
targetArray.sort();
}
sourceArray.push(".");
targetArray.push(".");
return [sourceArray.join(" "), targetArray.join(" ")];
}
lexiconCorpusGenerate(length, lexicon) {
const lines = [];
for (let i = 0; i < length; i++) {
const line = CorpusFaker.lexiconSentencePair(5, lexicon);
lines.push(line);
}
return lines;
}
lexicon(entryCount) {
if (typeof entryCount === "undefined") {
throw Error("Must pass in entryCount.");
}
const lexicon = {};
for (let i = 0; i < entryCount; i++) {
const entry = this.lexiconEntry(3);
lexicon[entry[0]] = entry[1];
}
return lexicon;
}
lexiconEntry(maxTranslations) {
if (typeof maxTranslations === "undefined") {
throw Error("Must pass in maxTranslations.");
}
const word = CorpusFaker.randomPhrase(this.n, 7);
const translations = [];
const translationCount = CorpusFaker.random(1, maxTranslations);
for (let i = 0; i < translationCount; i++) {
translations.push(CorpusFaker.randomPhrase(this.n, 7));
}
return [word, translations];
}
}
exports.default = CorpusFaker;