UNPKG

@ioris/parser-ttml

Version:

[![npm version](https://badge.fury.io/js/@ioris%2Fparser-ttml.svg)](https://badge.fury.io/js/@ioris%2Fparser-ttml) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)

8 lines (7 loc) 7.59 kB
{ "version": 3, "sources": ["../src/Parser.TTMLParser.ts"], "sourcesContent": ["import {\n type CreateLineArgs,\n type CreateLyricArgs,\n type CreateParagraphArgs,\n createLyric,\n} from \"@ioris/core\";\n\nconst isElementNode = (n: Node) => Node.ELEMENT_NODE === n.nodeType;\n\nconst confirmTag = (t: Element[\"tagName\"]) => {\n const tagName = t.toLowerCase();\n return (n: Element) => {\n return n.nodeName.toLowerCase() === tagName;\n };\n};\n\nfunction parseTime(t: string): number {\n if (t === \"\") {\n return 0;\n }\n if (t.endsWith(\"s\")) {\n return Number(t.slice(0, -1));\n }\n return Number(t.split(\":\").reduce((acc, time) => 60 * acc + Number(time), 0));\n}\n\nconst getTime = (e: Element): { begin: number; end: number } => {\n return {\n begin: parseTime(e.getAttribute(\"begin\") || \"\"),\n end: parseTime(e.getAttribute(\"end\") || \"\"),\n };\n};\n\nconst TIMING_TYPE = {\n Line: \"Line\",\n Word: \"Word\",\n} as const;\n\nexport class TTMLParser {\n lineTokenizer?: CreateLyricArgs[\"lineTokenizer\"];\n paragraphTokenizer?: CreateLyricArgs[\"paragraphTokenizer\"];\n offsetSec?: number;\n\n constructor(props?: {\n lineTokenizer?: CreateLyricArgs[\"lineTokenizer\"];\n paragraphTokenizer?: CreateLyricArgs[\"paragraphTokenizer\"];\n offsetSec?: number;\n }) {\n this.lineTokenizer = props ? props.lineTokenizer : undefined;\n this.paragraphTokenizer = props ? props.paragraphTokenizer : undefined;\n this.offsetSec = props ? props.offsetSec : undefined;\n }\n\n public async parse(ttml: XMLDocument, resourceID: string) {\n const duration = parseTime(\n ttml.querySelector(\"body\")?.getAttribute(\"dur\") || \"\",\n );\n const timelines: CreateLyricArgs[\"timelines\"] = [];\n const paragraphs = ttml.querySelectorAll(\"div\");\n\n for (const paragraphElm of Array.from(paragraphs)) {\n const { lineTimelines } = this.parseParagraphTimelines(paragraphElm);\n timelines.push(lineTimelines);\n }\n\n const lyric = await createLyric({\n resourceID,\n duration,\n timelines,\n lineTokenizer: this.lineTokenizer,\n paragraphTokenizer: this.paragraphTokenizer,\n offsetSec: this.offsetSec,\n });\n\n return lyric;\n }\n\n private parseParagraphTimelines(paragraphElm: HTMLDivElement): {\n lineTimelines: CreateParagraphArgs[\"timelines\"];\n } {\n if (!isElementNode(paragraphElm) || !confirmTag(\"div\")(paragraphElm)) {\n throw new Error(\"Invalid TTML format\");\n }\n const lines = paragraphElm.querySelectorAll(\"p\");\n const lineTimelines: CreateParagraphArgs[\"timelines\"] = [];\n\n for (const lineElm of Array.from(lines)) {\n const { wordTimelines } = this.parseLineTimelines(lineElm);\n lineTimelines.push(wordTimelines);\n }\n\n return {\n lineTimelines,\n };\n }\n\n private parseLineTimelines(lineElm: HTMLParagraphElement): {\n wordTimelines: CreateLineArgs[\"timelines\"];\n } {\n if (!isElementNode(lineElm) || !confirmTag(\"p\")(lineElm)) {\n throw new Error(\"Invalid TTML format\");\n }\n\n const wordTimelines: CreateLineArgs[\"timelines\"] = [];\n const { begin, end } = getTime(lineElm);\n\n const timingType =\n Array.from(lineElm.children)\n .filter(isElementNode)\n .filter(confirmTag(\"span\")).length > 0\n ? TIMING_TYPE.Word\n : TIMING_TYPE.Line;\n\n if (timingType === TIMING_TYPE.Line) {\n if (lineElm.textContent === null) {\n return {\n wordTimelines,\n };\n }\n\n wordTimelines.push({\n wordID: crypto.randomUUID(),\n begin,\n end,\n text: lineElm.textContent,\n });\n\n return {\n wordTimelines,\n };\n }\n\n const spans = Array.from(lineElm.querySelectorAll(\"span\"));\n\n return spans.reduce<{\n wordTimelines: CreateLineArgs[\"timelines\"];\n }>(\n (acc, spanElm, wordIndex) => {\n const last = acc.wordTimelines[acc.wordTimelines.length - 1];\n const { begin, end } = getTime(spanElm);\n const beforeElm = spans[wordIndex - 1];\n const { end: beforeEnd } = beforeElm\n ? getTime(beforeElm)\n : { end: begin };\n const hasWhitespace =\n beforeEnd - begin > 0.1 ||\n (spanElm.nextSibling !== null && spanElm.nextSibling.nodeType === 3);\n acc.wordTimelines[0] = {\n ...acc.wordTimelines[0],\n begin: last?.begin || begin,\n end,\n text: `${last?.text || \"\"}${spanElm.textContent}${\n hasWhitespace ? \" \" : \"\"\n }`,\n };\n return acc;\n },\n { wordTimelines },\n );\n }\n}\n\nexport default TTMLParser;\n"], "mappings": "AAAA,OAIE,eAAAA,MACK,cAEP,IAAMC,EAAiBC,GAAY,KAAK,eAAiBA,EAAE,SAErDC,EAAcC,GAA0B,CAC5C,IAAMC,EAAUD,EAAE,YAAY,EAC9B,OAAQ,GACC,EAAE,SAAS,YAAY,IAAMC,CAExC,EAEA,SAASC,EAAUF,EAAmB,CACpC,OAAIA,IAAM,GACD,EAELA,EAAE,SAAS,GAAG,EACT,OAAOA,EAAE,MAAM,EAAG,EAAE,CAAC,EAEvB,OAAOA,EAAE,MAAM,GAAG,EAAE,OAAO,CAACG,EAAKC,IAAS,GAAKD,EAAM,OAAOC,CAAI,EAAG,CAAC,CAAC,CAC9E,CAEA,IAAMC,EAAWC,IACR,CACL,MAAOJ,EAAUI,EAAE,aAAa,OAAO,GAAK,EAAE,EAC9C,IAAKJ,EAAUI,EAAE,aAAa,KAAK,GAAK,EAAE,CAC5C,GAGIC,EAAc,CAClB,KAAM,OACN,KAAM,MACR,EAEaC,EAAN,KAAiB,CACtB,cACA,mBACA,UAEA,YAAYC,EAIT,CACD,KAAK,cAAgBA,EAAQA,EAAM,cAAgB,OACnD,KAAK,mBAAqBA,EAAQA,EAAM,mBAAqB,OAC7D,KAAK,UAAYA,EAAQA,EAAM,UAAY,MAC7C,CAEA,MAAa,MAAMC,EAAmBC,EAAoB,CACxD,IAAMC,EAAWV,EACfQ,EAAK,cAAc,MAAM,GAAG,aAAa,KAAK,GAAK,EACrD,EACMG,EAA0C,CAAC,EAC3CC,EAAaJ,EAAK,iBAAiB,KAAK,EAE9C,QAAWK,KAAgB,MAAM,KAAKD,CAAU,EAAG,CACjD,GAAM,CAAE,cAAAE,CAAc,EAAI,KAAK,wBAAwBD,CAAY,EACnEF,EAAU,KAAKG,CAAa,CAC9B,CAWA,OATc,MAAMpB,EAAY,CAC9B,WAAAe,EACA,SAAAC,EACA,UAAAC,EACA,cAAe,KAAK,cACpB,mBAAoB,KAAK,mBACzB,UAAW,KAAK,SAClB,CAAC,CAGH,CAEQ,wBAAwBE,EAE9B,CACA,GAAI,CAAClB,EAAckB,CAAY,GAAK,CAAChB,EAAW,KAAK,EAAEgB,CAAY,EACjE,MAAM,IAAI,MAAM,qBAAqB,EAEvC,IAAME,EAAQF,EAAa,iBAAiB,GAAG,EACzCC,EAAkD,CAAC,EAEzD,QAAWE,KAAW,MAAM,KAAKD,CAAK,EAAG,CACvC,GAAM,CAAE,cAAAE,CAAc,EAAI,KAAK,mBAAmBD,CAAO,EACzDF,EAAc,KAAKG,CAAa,CAClC,CAEA,MAAO,CACL,cAAAH,CACF,CACF,CAEQ,mBAAmBE,EAEzB,CACA,GAAI,CAACrB,EAAcqB,CAAO,GAAK,CAACnB,EAAW,GAAG,EAAEmB,CAAO,EACrD,MAAM,IAAI,MAAM,qBAAqB,EAGvC,IAAMC,EAA6C,CAAC,EAC9C,CAAE,MAAAC,EAAO,IAAAC,CAAI,EAAIhB,EAAQa,CAAO,EAStC,IANE,MAAM,KAAKA,EAAQ,QAAQ,EACxB,OAAOrB,CAAa,EACpB,OAAOE,EAAW,MAAM,CAAC,EAAE,OAAS,EACnCQ,EAAY,KACZA,EAAY,QAECA,EAAY,KAC7B,OAAIW,EAAQ,cAAgB,KACnB,CACL,cAAAC,CACF,GAGFA,EAAc,KAAK,CACjB,OAAQ,OAAO,WAAW,EAC1B,MAAAC,EACA,IAAAC,EACA,KAAMH,EAAQ,WAChB,CAAC,EAEM,CACL,cAAAC,CACF,GAGF,IAAMG,EAAQ,MAAM,KAAKJ,EAAQ,iBAAiB,MAAM,CAAC,EAEzD,OAAOI,EAAM,OAGX,CAACnB,EAAKoB,EAASC,IAAc,CAC3B,IAAMC,EAAOtB,EAAI,cAAcA,EAAI,cAAc,OAAS,CAAC,EACrD,CAAE,MAAAiB,EAAO,IAAAC,CAAI,EAAIhB,EAAQkB,CAAO,EAChCG,EAAYJ,EAAME,EAAY,CAAC,EAC/B,CAAE,IAAKG,CAAU,EAAID,EACvBrB,EAAQqB,CAAS,EACjB,CAAE,IAAKN,CAAM,EACXQ,EACJD,EAAYP,EAAQ,IACnBG,EAAQ,cAAgB,MAAQA,EAAQ,YAAY,WAAa,EACpE,OAAApB,EAAI,cAAc,CAAC,EAAI,CACrB,GAAGA,EAAI,cAAc,CAAC,EACtB,MAAOsB,GAAM,OAASL,EACtB,IAAAC,EACA,KAAM,GAAGI,GAAM,MAAQ,EAAE,GAAGF,EAAQ,WAAW,GAC7CK,EAAgB,IAAM,EACxB,EACF,EACOzB,CACT,EACA,CAAE,cAAAgB,CAAc,CAClB,CACF,CACF", "names": ["createLyric", "isElementNode", "n", "confirmTag", "t", "tagName", "parseTime", "acc", "time", "getTime", "e", "TIMING_TYPE", "TTMLParser", "props", "ttml", "resourceID", "duration", "timelines", "paragraphs", "paragraphElm", "lineTimelines", "lines", "lineElm", "wordTimelines", "begin", "end", "spans", "spanElm", "wordIndex", "last", "beforeElm", "beforeEnd", "hasWhitespace"] }