native-lyrics-tools
Version:
A JavaScript library for parsing and generating various lyric formats.
126 lines (116 loc) • 4.01 kB
JavaScript
// Minimal TTML Lyric parser/stringifier (word-by-word, no extra fields)
// Word and Line structures, like your LRC/QRC/ESLRC
class LyricWord {
constructor(word, start_time = 0, end_time = 0) {
this.word = word;
this.start_time = start_time;
this.end_time = end_time;
}
}
class LyricLine {
constructor(words = []) {
this.words = words;
}
}
// Parse timestamps: [HH:]MM:SS(.mmm)
function parseTimestamp(str) {
if (!str) return 0;
let m = str.match(/^(\d+):(\d{2}):(\d{2})(?:\.(\d{1,3}))?$/);
if (m) {
let ms = Number((m[4] || '0').padEnd(3, '0'));
return Number(m[1]) * 3600000 + Number(m[2]) * 60000 + Number(m[3]) * 1000 + ms;
}
m = str.match(/^(\d{1,2}):(\d{2})(?:\.(\d{1,3}))?$/);
if (m) {
let ms = Number((m[3] || '0').padEnd(3, '0'));
return Number(m[1]) * 60000 + Number(m[2]) * 1000 + ms;
}
m = str.match(/^(\d{1,2})(?:\.(\d{1,3}))?$/);
if (m) {
let ms = Number((m[2] || '0').padEnd(3, '0'));
return Number(m[1]) * 1000 + ms;
}
return 0;
}
// XML unescape
function xmlUnescape(str) {
return String(str)
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/&/g, '&');
}
// XML escape
function xmlEscape(str) {
return String(str)
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"');
}
// Parse TTML string to array of LyricLine (like other formats)
function parseTTML(src) {
// Remove comments and collapse whitespace for easier regex
src = src.replace(/<!--[\s\S]*?-->/g, '').replace(/\r?\n/g, ' ').replace(/\s+/g, ' ');
// Find all <p ...>...</p> (lines)
let lineRe = /<p([^>]*)>([\s\S]*?)<\/p>/g;
let lines = [];
let lineMatch;
while ((lineMatch = lineRe.exec(src))) {
const pAttrs = parseAttrs(lineMatch[1]);
// Parse all <span ...>...</span> in this line (words)
let wordRe = /<span([^>]*)>([\s\S]*?)<\/span>/g;
let words = [];
let wordMatch;
while ((wordMatch = wordRe.exec(lineMatch[2]))) {
const spanAttrs = parseAttrs(wordMatch[1]);
const wordText = xmlUnescape(wordMatch[2].trim());
const start = parseTimestamp(spanAttrs.begin);
const end = parseTimestamp(spanAttrs.end);
if (wordText) words.push(new LyricWord(wordText, start, end));
}
if (words.length > 0) {
lines.push(new LyricLine(words));
}
}
return lines;
}
// Helper to parse XML attributes
function parseAttrs(str) {
const re = /([:\w-]+)="([^"]*)"/g;
let match, attrs = {};
while ((match = re.exec(str))) {
attrs[match[1]] = match[2];
}
return attrs;
}
// Format ms to TTML timestamp (HH:MM:SS.mmm)
function ttmlTimestamp(ms) {
const h = Math.floor(ms / 3600000);
const m = Math.floor(ms / 60000) % 60;
const s = Math.floor(ms / 1000) % 60;
const ms3 = ms % 1000;
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}.${String(ms3).padStart(3, '0')}`;
}
// Stringify array of LyricLine as TTML
function stringifyTTML(lines) {
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<tt xmlns="http://www.w3.org/ns/ttml">\n <body><div>\n`;
for (const line of lines) {
// Line times: use first/last word if available
const start = line.words.length ? line.words[0].start_time : 0;
const end = line.words.length ? line.words[line.words.length - 1].end_time : 0;
xml += ` <p${start ? ` begin="${ttmlTimestamp(start)}"` : ''}${end ? ` end="${ttmlTimestamp(end)}"` : ''}>`;
for (const word of line.words) {
xml += `<span${word.start_time ? ` begin="${ttmlTimestamp(word.start_time)}"` : ''}${word.end_time ? ` end="${ttmlTimestamp(word.end_time)}"` : ''}>${xmlEscape(word.word)}</span>`;
}
xml += `</p>\n`;
}
xml += ' </div></body>\n</tt>\n';
return xml;
}
export {
LyricWord,
LyricLine,
parseTTML,
stringifyTTML
};