UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

113 lines (112 loc) 4.17 kB
import { XMLParser } from "fast-xml-parser"; import { dice, normalize } from "./scoring"; import { decodeHtmlEntities } from "./string_cleaning"; const CHAPTER_MATCH_THRESHOLD = 0.5; const xmlParser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", }); function getTimecodeForChapterId(finalPlayerNvs, chapterId) { const xml = xmlParser.parse(finalPlayerNvs); const synchros = xml?.player?.synchro; if (!synchros) return null; const synchsArray = Array.isArray(synchros) ? synchros : [synchros]; const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId)); if (!match) return null; const rawTimecode = match["@_timecode"]; if (rawTimecode == null) return null; const ms = Number(rawTimecode); if (Number.isNaN(ms)) return null; return Math.floor(ms / 1000); } function toArray(v) { if (!v) return []; return Array.isArray(v) ? v : [v]; } export function getLevel1Chapters(dataNvs) { const xml = xmlParser.parse(dataNvs); const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter; const roots = toArray(root); return roots .map((ch, i) => { const id = ch?.id ?? ch?.["@_id"]; const labelRaw = ch?.label ?? ch?.["@_label"] ?? ""; return { id: String(id), label: decodeHtmlEntities(String(labelRaw)).trim(), index: i, }; }) .filter((c) => c.id && c.label); } export function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) { const q = normalize(agendaTitle); let best = null; for (const ch of chapters) { const s = dice(q, ch.label); if (!best || s > best.score) best = { chapter: ch, score: s }; } if (!best || best.score < CHAPTER_MATCH_THRESHOLD) return { chapter: chapters[0], score: 0 }; return best; } export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) { const l1 = getLevel1Chapters(dataNvs); if (!l1.length) return null; const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet); if (!best) return null; const chapter = best.chapter; const next = l1[chapter.index + 1] ?? null; const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id); if (start == null) return null; const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null; return { start, end, chapterId: chapter.id, nextChapterId: next?.id ?? null, score: best.score, }; } export function parseDataNvs(nvs) { const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1]; const epoch = epochStr ? Number(epochStr) : undefined; // There can be multiple organes for one video in meta const organes = []; const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi; let m; const salle = decodeHtmlEntities(nvs.match(/<metadata\s+name="salle"\s+value="([^"]+)"/i)?.[1]).trim(); while ((m = organesRegex.exec(nvs)) !== null) { const tag = m[0]; const label = tag.match(/\blabel="([^"]+)"/i)?.[1]; if (label) { const decoded = decodeHtmlEntities(label).trim(); if (decoded) organes.push(decoded); } } if (organes.length === 0) { organes.push("Séance publique"); } const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i); const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined; return { epoch, organes, firstChapterLabel, salle }; } export function buildSenatVodMasterM3u8FromNvs(nvsText) { // serverfiles://senat/2025/10/encoder10_20251022084451_2.mp4 const m = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d+)_([0-9]{13,14})(?:_[0-9]+)?\.mp4/i); if (!m) return null; const [, yyyy, mm, encoder, stamp] = m; const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`; return `${base}.smil/master.m3u8`; }