UNPKG

native-lyrics-tools

Version:

A JavaScript library for parsing and generating various lyric formats.

126 lines (116 loc) 4.01 kB
// Minimal TTML Lyric parser/stringifier (word-by-word, no extra fields) // Word and Line structures, like your LRC/QRC/ESLRC class LyricWord { constructor(word, start_time = 0, end_time = 0) { this.word = word; this.start_time = start_time; this.end_time = end_time; } } class LyricLine { constructor(words = []) { this.words = words; } } // Parse timestamps: [HH:]MM:SS(.mmm) function parseTimestamp(str) { if (!str) return 0; let m = str.match(/^(\d+):(\d{2}):(\d{2})(?:\.(\d{1,3}))?$/); if (m) { let ms = Number((m[4] || '0').padEnd(3, '0')); return Number(m[1]) * 3600000 + Number(m[2]) * 60000 + Number(m[3]) * 1000 + ms; } m = str.match(/^(\d{1,2}):(\d{2})(?:\.(\d{1,3}))?$/); if (m) { let ms = Number((m[3] || '0').padEnd(3, '0')); return Number(m[1]) * 60000 + Number(m[2]) * 1000 + ms; } m = str.match(/^(\d{1,2})(?:\.(\d{1,3}))?$/); if (m) { let ms = Number((m[2] || '0').padEnd(3, '0')); return Number(m[1]) * 1000 + ms; } return 0; } // XML unescape function xmlUnescape(str) { return String(str) .replace(/&lt;/g, '<') .replace(/&gt;/g, '>') .replace(/&quot;/g, '"') .replace(/&amp;/g, '&'); } // XML escape function xmlEscape(str) { return String(str) .replace(/&/g, '&amp;') .replace(/</g, '&lt;') .replace(/>/g, '&gt;') .replace(/"/g, '&quot;'); } // Parse TTML string to array of LyricLine (like other formats) function parseTTML(src) { // Remove comments and collapse whitespace for easier regex src = src.replace(/<!--[\s\S]*?-->/g, '').replace(/\r?\n/g, ' ').replace(/\s+/g, ' '); // Find all <p ...>...</p> (lines) let lineRe = /<p([^>]*)>([\s\S]*?)<\/p>/g; let lines = []; let lineMatch; while ((lineMatch = lineRe.exec(src))) { const pAttrs = parseAttrs(lineMatch[1]); // Parse all <span ...>...</span> in this line (words) let wordRe = /<span([^>]*)>([\s\S]*?)<\/span>/g; let words = []; let wordMatch; while ((wordMatch = wordRe.exec(lineMatch[2]))) { const spanAttrs = parseAttrs(wordMatch[1]); const wordText = xmlUnescape(wordMatch[2].trim()); const start = parseTimestamp(spanAttrs.begin); const end = parseTimestamp(spanAttrs.end); if (wordText) words.push(new LyricWord(wordText, start, end)); } if (words.length > 0) { lines.push(new LyricLine(words)); } } return lines; } // Helper to parse XML attributes function parseAttrs(str) { const re = /([:\w-]+)="([^"]*)"/g; let match, attrs = {}; while ((match = re.exec(str))) { attrs[match[1]] = match[2]; } return attrs; } // Format ms to TTML timestamp (HH:MM:SS.mmm) function ttmlTimestamp(ms) { const h = Math.floor(ms / 3600000); const m = Math.floor(ms / 60000) % 60; const s = Math.floor(ms / 1000) % 60; const ms3 = ms % 1000; return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}.${String(ms3).padStart(3, '0')}`; } // Stringify array of LyricLine as TTML function stringifyTTML(lines) { let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<tt xmlns="http://www.w3.org/ns/ttml">\n <body><div>\n`; for (const line of lines) { // Line times: use first/last word if available const start = line.words.length ? line.words[0].start_time : 0; const end = line.words.length ? line.words[line.words.length - 1].end_time : 0; xml += ` <p${start ? ` begin="${ttmlTimestamp(start)}"` : ''}${end ? ` end="${ttmlTimestamp(end)}"` : ''}>`; for (const word of line.words) { xml += `<span${word.start_time ? ` begin="${ttmlTimestamp(word.start_time)}"` : ''}${word.end_time ? ` end="${ttmlTimestamp(word.end_time)}"` : ''}>${xmlEscape(word.word)}</span>`; } xml += `</p>\n`; } xml += ' </div></body>\n</tt>\n'; return xml; } export { LyricWord, LyricLine, parseTTML, stringifyTTML };