@tricoteuses/senat
Version:
Handle French Sénat's open data
113 lines (112 loc) • 4.17 kB
JavaScript
import { XMLParser } from "fast-xml-parser";
import { dice, normalize } from "./scoring";
import { decodeHtmlEntities } from "./string_cleaning";
const CHAPTER_MATCH_THRESHOLD = 0.5;
const xmlParser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: "@_",
});
function getTimecodeForChapterId(finalPlayerNvs, chapterId) {
const xml = xmlParser.parse(finalPlayerNvs);
const synchros = xml?.player?.synchro;
if (!synchros)
return null;
const synchsArray = Array.isArray(synchros) ? synchros : [synchros];
const match = synchsArray.find((s) => String(s["@_id"]) === String(chapterId));
if (!match)
return null;
const rawTimecode = match["@_timecode"];
if (rawTimecode == null)
return null;
const ms = Number(rawTimecode);
if (Number.isNaN(ms))
return null;
return Math.floor(ms / 1000);
}
function toArray(v) {
if (!v)
return [];
return Array.isArray(v) ? v : [v];
}
export function getLevel1Chapters(dataNvs) {
const xml = xmlParser.parse(dataNvs);
const root = xml?.data?.chapters?.chapter ?? xml?.chapters?.chapter;
const roots = toArray(root);
return roots
.map((ch, i) => {
const id = ch?.id ?? ch?.["@_id"];
const labelRaw = ch?.label ?? ch?.["@_label"] ?? "";
return {
id: String(id),
label: decodeHtmlEntities(String(labelRaw)).trim(),
index: i,
};
})
.filter((c) => c.id && c.label);
}
export function pickBestLevel1ChapterForAgenda(chapters, agendaTitle) {
const q = normalize(agendaTitle);
let best = null;
for (const ch of chapters) {
const s = dice(q, ch.label);
if (!best || s > best.score)
best = { chapter: ch, score: s };
}
if (!best || best.score < CHAPTER_MATCH_THRESHOLD)
return { chapter: chapters[0], score: 0 };
return best;
}
export function getAgendaSegmentTimecodes(dataNvs, finalPlayerNvs, agendaTitleOrObjet) {
const l1 = getLevel1Chapters(dataNvs);
if (!l1.length)
return null;
const best = pickBestLevel1ChapterForAgenda(l1, agendaTitleOrObjet);
if (!best)
return null;
const chapter = best.chapter;
const next = l1[chapter.index + 1] ?? null;
const start = getTimecodeForChapterId(finalPlayerNvs, chapter.id);
if (start == null)
return null;
const end = next ? getTimecodeForChapterId(finalPlayerNvs, next.id) : null;
return {
start,
end,
chapterId: chapter.id,
nextChapterId: next?.id ?? null,
score: best.score,
};
}
export function parseDataNvs(nvs) {
const epochStr = nvs.match(/<metadata\s+name="date"\s+value="(\d+)"/i)?.[1];
const epoch = epochStr ? Number(epochStr) : undefined;
// There can be multiple organes for one video in meta
const organes = [];
const organesRegex = /<metadata\b[^>]*\bname="organes"[^>]*>/gi;
let m;
const salle = decodeHtmlEntities(nvs.match(/<metadata\s+name="salle"\s+value="([^"]+)"/i)?.[1]).trim();
while ((m = organesRegex.exec(nvs)) !== null) {
const tag = m[0];
const label = tag.match(/\blabel="([^"]+)"/i)?.[1];
if (label) {
const decoded = decodeHtmlEntities(label).trim();
if (decoded)
organes.push(decoded);
}
}
if (organes.length === 0) {
organes.push("Séance publique");
}
const firstChapterLabelMatch = nvs.match(/<chapter\b[^>]*\blabel="([^"]+)"/i);
const firstChapterLabel = firstChapterLabelMatch ? decodeHtmlEntities(firstChapterLabelMatch[1]).trim() : undefined;
return { epoch, organes, firstChapterLabel, salle };
}
export function buildSenatVodMasterM3u8FromNvs(nvsText) {
// serverfiles://senat/2025/10/encoder10_20251022084451_2.mp4
const m = nvsText.match(/serverfiles:\/\/senat\/(\d{4})\/(\d{2})\/(encoder\d+)_([0-9]{13,14})(?:_[0-9]+)?\.mp4/i);
if (!m)
return null;
const [, yyyy, mm, encoder, stamp] = m;
const base = `https://vodsenat.akamaized.net/senat/${yyyy}/${mm}/${encoder}_${stamp}`;
return `${base}.smil/master.m3u8`;
}