UNPKG

ipa-jfk

Version:

IPA narrow transcription of English words in New York City accent

323 lines (314 loc) 10.7 kB
/* Copyright (C) 2020-2021 b1f6c1c4 * * This file is part of IPA-JFK. * * IPA-JFK is free software: you can redistribute it and/or modify it under the * terms of the GNU Affero General Public License as published by the Free * Software Foundation, version 3. * * IPA-JFK is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for * more details. * * You should have received a copy of the GNU Affero General Public License * along with IPA-JFK. If not, see <https://www.gnu.org/licenses/>. */ const onset = [ [], // All single consonant phonemes except /N/: ['M'], ['N'], ['P'], ['B'], ['T'], ['D'], ['K'], ['G'], ['F'], ['V'], ['TH'], ['DH'], ['S'], ['Z'], ['SH'], ['ZH'], ['HH'], ['L'], ['R'], ['Y'], ['W'], ['CH'], ['JH'], // Stop plus approximant other than /j/: ['P', 'L'], ['B', 'L'], ['K', 'L'], ['G', 'L'], ['P', 'R'], ['B', 'R'], ['T', 'R'], ['K', 'R'], ['G', 'R'], ['T', 'W'], ['D', 'W'], ['G', 'W'], ['K', 'W'], ['P', 'W'], // Voiceless fricative or /v/ plus approximant other than /j/: ['F', 'L'], ['S', 'L'], ['TH', 'L'], ['F', 'R'], ['TH', 'R'], ['SH', 'R'], ['HH', 'W'], ['S', 'W'], ['TH', 'W'], ['V', 'W'], // Consonant plus /j/ (before /u:/ or its modified/reduced forms): ['P', 'Y'], ['B', 'Y'], ['T', 'Y'], ['D', 'Y'], ['K', 'Y'], ['G', 'Y'], ['M', 'Y'], ['N', 'Y'], ['F', 'Y'], ['V', 'Y'], ['TH', 'Y'], ['S', 'Y'], ['Z', 'Y'], ['HH', 'Y'], ['L', 'Y'], // /s/ plus voiceless stop: ['S', 'P'], ['S', 'T'], ['S', 'K'], // /s/ plus nasal other than /N/: ['S', 'M'], ['S', 'N'], // /s/ plus voiceless fricative: ['S', 'F'], ['S', 'TH'], // /s/ plus voiceless stop plus approximant: ['S', 'P', 'L'], ['S', 'K', 'L'], ['S', 'P', 'R'], ['S', 'T', 'R'], ['S', 'K', 'R'], ['S', 'K', 'W'], ['S', 'M', 'Y'], ['S', 'P', 'Y'], ['S', 'T', 'Y'], ['S', 'K', 'Y'], // /s/ plus voiceless fricative plus approximant: ['S', 'F', 'R'], ]; const coda = [ [], // The single consonant phonemes except /h/, /w/, /j/ and, in non-rhotic varieties, /r/: ['M'], ['N'], ['NG'], ['P'], ['B'], ['T'], ['D'], ['K'], ['G'], ['F'], ['V'], ['TH'], ['DH'], ['S'], ['Z'], ['SH'], ['ZH'], ['L'], ['R'], ['CH'], ['JH'], // Lateral approximant plus stop or affricate: ['L', 'P'], ['L', 'B'], ['L', 'T'], ['L', 'D'], ['L', 'CH'], ['L', 'JH'], ['L', 'K'], // In rhotic varieties, /r/ plus stop or affricate: ['R', 'P'], ['R', 'B'], ['R', 'T'], ['R', 'D'], ['R', 'CH'], ['R', 'JH'], ['R', 'K'], ['R', 'G'], // Lateral approximant + fricative: ['L', 'F'], ['L', 'V'], ['L', 'TH'], ['L', 'S'], ['L', 'Z'], ['L', 'SH'], // In rhotic varieties, /r/ + fricative: ['R', 'F'], ['R', 'V'], ['R', 'TH'], ['R', 'S'], ['R', 'Z'], ['R', 'SH'], // Lateral approximant + nasal: ['L', 'M'], ['L', 'N'], // In rhotic varieties, /r/ + nasal or lateral: ['R', 'M'], ['R', 'N'], ['R', 'L'], // Nasal + homorganic stop or affricate: ['M', 'P'], ['N', 'T'], ['N', 'D'], ['N', 'CH'], ['N', 'JH'], ['NG', 'K'], // Nasal + fricative: ['M', 'F'], ['M', 'TH'], ['N', 'TH'], ['N', 'S'], ['N', 'Z'], ['NG', 'TH'], // Voiceless fricative plus voiceless stop: ['F', 'T'], ['S', 'P'], ['S', 'T'], ['S', 'K'], // Two voiceless fricatives: ['F', 'TH'], // Two voiceless stops: ['P', 'T'], ['K', 'T'], // Stop plus voiceless fricative: ['P', 'TH'], ['P', 'S'], ['T', 'TH'], ['T', 'S'], ['D', 'TH'], ['D', 'S'], // Lateral approximant + two consonants: ['L', 'P', 'T'], ['L', 'P', 'S'], ['L', 'F', 'TH'], ['L', 'T', 'S'], ['L', 'S', 'T'], ['L', 'K', 'T'], ['L', 'K', 'S'], // In rhotic varieties, /r/ + two consonants: ['R', 'M', 'TH'], ['R', 'P', 'T'], ['R', 'P', 'S'], ['R', 'T', 'S'], ['R', 'S', 'T'], ['R', 'K', 'T'], // Nasal + homorganic stop + stop or fricative: ['M', 'P', 'T'], ['M', 'P', 'S'], ['N', 'D', 'TH'], ['NG', 'K', 'T'], ['NG', 'K', 'S'], ['NG', 'K', 'TH'], // Three obstruents: ['K', 'S', 'TH'], ['K', 'S', 'T'], ]; const badSplit = [ ['S', 'P'], ['S', 'T'], ['S', 'K'], ['T', 'S'], ['D', 'Z'], ['T', 'R'], ['D', 'R'], ['HH', 'Y'], ]; Array.prototype.includesX = function(valueToFind) { for (let o of this) { if (o.length === valueToFind.length) { let i; for (i = 0; i < valueToFind.length; i++) if (o[i] !== valueToFind[i]) break; if (i === valueToFind.length) return true; } } return false; }; function syllabify(phs, syllableHint) { if (!phs) return phs; const ints = []; let state = []; for (let p of phs) { if (p.isVowel) { ints.push(state); ints.push(p); state = []; } else { state.push(p); } } ints.push(state); const syllableHints = syllableHint ? syllableHint.trim().split(/\s+/).map((h) => +h) : []; for (let i = 0; i < ints.length; i += 2) { const int = ints[i]; let maybe; if (syllableHints[i / 2 - 1] !== undefined) { maybe = [syllableHints[i / 2 - 1]]; } else if (int.length === 0) { maybe = [0]; } else if (i === 0) { maybe = [0]; } else if (i === ints.length - 1) { maybe = [int.length]; } else if (ints[i - 1].phoneme === 'ER' && ['NG', 'DH', 'ZH', 'HH', 'R', 'Y', 'W'].includes(int[0].phoneme)) { maybe = [0]; } else if (int.length === 1 && int[0].phoneme === 'N' && ints[i - 1].phoneme === 'AH' && !ints[i - 1].stress && ints[i + 1].phoneme === 'AH' && !ints[i + 1].stress) { maybe = [1]; } else { maybe = []; for (let j = 0; j <= int.length; j++) { if (coda.includesX(int.slice(0, j).map((x) => x.phoneme)) && onset.includesX(int.slice(j).map((x) => x.phoneme))) { maybe.push(j); } } } if (maybe.length === 1) { int.split = maybe[0]; continue; } const stressD = (ints[i + 1].stress * 2 % 3) - (ints[i - 1].stress * 2 % 3); if (!maybe.length) { console.error('Warning: no possible splitting in ', int.map((n) => n.phoneme).join(' ')); maybe = []; for (let j = 0; j <= int.length; j++) { maybe.push(j); } } maybe = maybe.map((pos) => { let fit = -Math.abs(pos - int.length / 2 + 0.3 * (stressD + 0.1)); if (!pos && ints[i - 1].phoneme === 'ER') { fit += 1.2; } if (pos === int.length && ints[i + 1].phoneme === 'ER') { fit += 1.1; } if (pos && pos < int.length) { if (badSplit.includesX([int[pos - 1].phoneme, int[pos].phoneme])) { fit -= 114514; } if (int[pos].phoneme === 'Y') { fit -= 1.5; } if (['P', 'B', 'T', 'D', 'K', 'G'].includes(int[pos - 1].phoneme) && ['P', 'B', 'T', 'D', 'K', 'G', 'CH', 'JH'].includes(int[pos].phoneme)) { fit -= 3.0; } if (['P', 'T', 'K'].includes(int[pos - 1].phoneme) && ['F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'JH'].includes(int[pos].phoneme)) { fit -= 2.5; } } return { pos, fit }; }); maybe.sort(({ fit: l }, { fit: r }) => r - l); int.split = maybe[0].pos; } const res = []; for (let i = 0; i < ints.length; i += 2) { const prv = ints[i]; const nxt = prv.splice(prv.split); const vow = i < ints.length - 1 && ints[i + 1]; const stress = vow && vow.stress; for (let p of prv) { res.push({ ...p, phono: 'coda', stress }); } for (let p of nxt) { res.push({ ...p, phono: 'onset', stress }); } if (vow) { res.push({ ...vow, phono: 'nucleus' }); } } return res; } function syllablicize(phs) { if (!phs) return phs; const res = []; for (let pi = 0; pi < phs.length; pi++) { const p = phs[pi]; if (!(p.phoneme === 'AH' && !p.stress && pi < phs.length - 1 && phs[pi + 1].phono === 'coda')) { res.push(p); continue; } let sy = false; if (['L'].includes(phs[pi + 1].phoneme)) { if (pi && phs[pi - 1].phono === 'coda' || pi && phs[pi - 1].phono === 'nucleus') { sy = true; } } else if (['M', 'N'].includes(phs[pi + 1].phoneme)) { if (pi && phs[pi - 1].phono === 'coda' && !['M', 'N', 'NG'].includes(phs[pi - 1].phoneme) || pi && phs[pi - 1].phono === 'nucleus') { sy = true; } } if (sy) { res.push({ ...phs[pi + 1], phono: phs[pi + 1].phono === 'coda' ? 'nucleus' : 'onset', stress: 0 }); pi++; } else { res.push(p); } } return res; } function rPhoneme(phs) { if (!phs) return phs; const res = []; for (let pi = 0; pi < phs.length; pi++) { const p = phs[pi]; if (p.phoneme !== 'ER') { res.push(p); continue; } // Case 0: @r [+ C], 3r [+ C] if (!(pi < phs.length - 1 && phs[pi + 1].isVowel)) { res.push(p.stress ? p : { ...p, isVowel: false, weak: undefined, pho: 'r', phono: 'nucleus' }); continue; } // Case 1: @r + @r if (!p.stress && !phs[pi + 1].stress && phs[pi + 1].phoneme === 'ER') { let t; if (!pi) { t = undefined; } else if (phs[pi - 1].isVowel) { t = undefined; } else if (['M', 'NG', 'G'].includes(phs[pi - 1].phoneme)) { t = undefined; } else if (!['T', 'D'].includes(phs[pi - 1].phoneme)) { t = 1; } else if (pi === 1) { t = undefined; } else if (phs[pi - 2].isVowel) { t = phs[pi - 1].phoneme === 'T' ? 2 : 3; } else { t = phs[pi - 1].phoneme === 'T' ? 1 : 2; } if (!t) { console.error('Warning: cannot determine ER in', int.map((n) => n.phoneme).join(' ')); t = 1; } res.push(t >= 2 ? p : { ...p, isVowel: false, weak: undefined, pho: 'r', phono: 'nucleus' }); res.push(t <= 2 ? phs[pi + 1] : { ...phs[pi + 1], isVowel: false, weak: undefined, pho: 'r', phono: 'nucleus' }); pi++; } // Case 2: 3r + @r else if (p.stress && !phs[pi + 1].stress && phs[pi + 1].phoneme === 'ER') { res.push(p); res.push({ ...phs[pi + 1], isVowel: false, weak: undefined, pho: 'r', phono: 'nucleus' }); pi++; } // Case 3: 3r + V, @r + V else { res.push(p); res.push({ isVowel: false, pho: 'r', phono: 'onset', stress: phs[pi + 1].stress, short: true }); } } return res; } export default (phs, syllableHint, phonemic) => { const phs1 = syllabify(phs, syllableHint); if (phonemic) return phs1; const phs2 = syllablicize(phs1); const phs3 = rPhoneme(phs2); return phs3; };