ipa-jfk
Version:
IPA narrow transcription of English words in New York City accent
216 lines (207 loc) • 6.87 kB
JavaScript
/* Copyright (C) 2020-2021 b1f6c1c4
*
* This file is part of IPA-JFK.
*
* IPA-JFK is free software: you can redistribute it and/or modify it under the
* terms of the GNU Affero General Public License as published by the Free
* Software Foundation, version 3.
*
* IPA-JFK is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for
* more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with IPA-JFK. If not, see <https://www.gnu.org/licenses/>.
*/
const maybeRhotic = ['AA', 'EH', /* 'ER', */ 'IH', 'AO', 'UH'];
function rhoticize(phs) {
if (!phs) return phs;
const res = [];
let isY = false;
let state = null;
for (let p of phs) {
if (!state) {
if (p.isVowel && maybeRhotic.includes(p.phoneme))
state = p;
else
res.push(p);
} else {
if (p.isVowel) {
res.push(state);
state = p;
} else if (p.phoneme === 'R') {
// cure-force merger
if (state.phoneme === 'UH' && !isY) {
res.push({ ...state, pho: 'O', property: 'rhotic' });
} else {
res.push({ ...state, property: 'rhotic' });
}
state = null;
} else {
res.push(state);
res.push(p);
state = null;
}
}
if (!p.isVowel)
isY = p.phoneme === 'Y';
}
if (state)
res.push(state);
return res;
}
const aeTense = [
// The two exceptions:
'CAN\'T', 'AVENUE',
];
const aeTensePhonemes = ['M', 'N', 'NG', 'B', 'D', 'JH', 'G', 'F', 'TH', 'S', 'SH'];
const aeLax = [
// Function words with simple coda:
'CAN', 'HAVE', 'HAD', 'HAS', 'AT', 'THAT', 'AS', 'AND', 'AN', 'ANY', 'AM',
// Loanwords, names, abbreviations:
// We rely on aeHint for this
];
const aeLaxPhonemes = ['P', 'T', 'CH', 'K', 'V', 'DH', 'Z', 'ZH', 'L'];
// aeHint:
// 0: All lax
// 1: Last /ae/ tense
// 2: 2nd-last /ae/ tense
// ...
// -1: Last /ae/ lax
// -2: 2nd-last last /ae/ lax
// ...
// 1, -2: Combine 1 and -2
// ...
function tensing(phs, word, aeHint) {
if (!phs) return phs;
const res = [];
const aeHints = [];
if (aeHint) {
const aes = phs.reduce((v,{ phoneme }) => v + (phoneme === 'AE'), 0);
aeHint.split(',').forEach((v) => {
if (!v) return;
if (+v === 0) {
for (let i = 0; i < aes; i++) aeHints[i] = false;
} else if (+v < 0) {
aeHints[aes + +v] = false;
} else if (+v > 0) {
aeHints[aes - +v] = true;
}
});
}
for (let pi = 0; pi < phs.length; pi++) {
const p = phs[pi];
switch (p.phoneme) {
case 'AE': {
let tense = aeHints.splice(0, 1)[0];
if (tense !== undefined) {
// do nothing
} else if (aeLax.includes(word)) {
tense = false;
} else if (aeTense.includes(word)) {
tense = true;
} else if (pi === phs.length - 1) { // End of word
tense = false;
} else if (phs[pi + 1].isVowel) { // End of syllable
tense = false;
} else if (pi < phs.length - 2 && phs[pi + 2].isVowel) { // Before a pre-vocalic consonant
tense = false;
} else if (aeTensePhonemes.includes(phs[pi + 1].phoneme)) { // Check the consonant is
tense = true;
} else if (aeLaxPhonemes.includes(phs[pi + 1].phoneme)) {
tense = false;
} else {
console.error('Warning: cannot decide /ae/-raising of ', word);
tense = false;
}
res.push({ property: tense ? 'tense' : 'lax', ...p, pho: 'ae' });
break;
}
case 'AY': res.push({ property: 'tense', ...p, pho: 'aI' }); break;
case 'AW': res.push({ property: 'tense', ...p, pho: 'aU' }); break;
case 'AA': res.push({ property: 'tense', ...p, pho: 'A' }); break;
case 'AH': res.push({ property: 'lax', ...p, pho: p.stress ? '2' : '@', weak: !p.stress }); break;
case 'EY': res.push({ property: 'tense', ...p, pho: 'eI' }); break;
case 'EH': res.push({ property: 'lax', ...p, pho: 'e' }); break;
case 'ER': res.push({ property: 'rhotic', pho: p.stress ? '3r' : '@', weak: !p.stress, ...p }); break;
case 'IY': res.push({ property: 'tense', ...p, pho: 'i', weak: !p.stress }); break;
case 'IH': res.push({ property: 'lax', ...p, pho: p.property ? 'i' : 'I', weak: !p.property && !p.stress }); break;
case 'OW':
if (pi || p.stress)
res.push({ property: 'tense', ...p, pho: 'oU' });
else
res.push({ property: 'lax', ...p, pho: 'U', weak: true });
break;
case 'OY': res.push({ property: 'tense', ...p, pho: 'OI' }); break;
// Note: Due to cot-thought merger, tense AO and lax AO are identical.
case 'AO': res.push({ property: 'tense', ...p, pho: 'O' }); break;
case 'UW': res.push({ property: 'tense', ...p, pho: 'u', weak: !p.stress }); break;
// Note: Due to cure-force merger, rhotic UH (not after /j/) becomes rhotic /O/
case 'UH': res.push({ property: 'lax', pho: 'U', ...p }); break;
default: res.push(p); break;
}
}
return res;
}
const nasalPhonemes = ['M', 'N', 'NG'];
function nasalize(phs) {
if (!phs) return phs;
const res = [];
for (let pi = 0; pi < phs.length; pi++) {
const p = phs[pi];
if (p.isVowel) {
const nasalized = p.isVowel && pi < phs.length - 1 && nasalPhonemes.includes(phs[pi + 1].phoneme);
res.push({ ...p, nasalized });
} else {
res.push(p);
}
}
return res;
}
function nasalizeAndLengthen(phs) {
if (!phs) return phs;
const res = [];
for (let pi = 0; pi < phs.length; pi++) {
const p = phs[pi];
if (!p.isVowel) {
res.push(p);
continue;
}
const nasalized = p.isVowel && pi < phs.length - 1 && nasalPhonemes.includes(phs[pi + 1].phoneme);
let length = 1;
if (pi && res[pi - 1].length === 1.5) {
length = 0.5;
}
switch (p.pho) {
case 'A':
if (p.property === 'tense') length = 2;
break;
case '3r':
length = 2;
break;
case 'i':
if (p.property === 'tense') {
if (pi < phs.length - 1 && phs[pi + 1].pho !== 'I' && p.weak && phs[pi + 1].weak) {
length = 1.5;
} else {
length = 2;
}
}
break;
case '@':
if (pi && phs[pi - 1].phoneme === 'N' && phs[pi - 1].phono === 'nucleus') {
length = 0.7;
}
break;
}
res.push({ ...p, nasalized, length });
}
return res;
}
export default (phs, word, aeHint) => {
const phs1 = rhoticize(phs);
const phs2 = tensing(phs1, word, aeHint);
const phs3 = nasalizeAndLengthen(phs2);
return phs3;
};