UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

135 lines (134 loc) 5.85 kB
import { isAmbiguousTimeOriginal } from "../utils/date"; import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../utils/nvs-parsing"; import { dice, getOrgKey, normalize, scoreVideo } from "../utils/scoring"; import { SENAT_DATAS_ROOT, weights } from "./config"; import { fetchBuffer } from "./search"; export async function matchOneReunion(args) { const { agenda, agendaTs, timeAmbigious, candidates, weights, fetchDataNvs, options } = args; if (!options["silent"]) console.log(`Matching video for reunion: ${agenda?.uid}`); const isSP = (agenda.type ?? "").toLowerCase().includes("séance publique"); const minAcceptBase = weights?.minAccept; const margin = weights?.margin; const titleDominance = weights?.titleDominance; const orgUncertainPenalty = weights?.orgUncertainPenalty; // 1 disables const orgSkipDice = weights?.orgSkipDice; const minAccept = minAcceptBase + (candidates.length >= 20 ? 0.08 : candidates.length >= 10 ? 0.05 : 0); const vw = weights ? { wTitle: weights.wTitle, wOrg: weights.wOrg, wSalle: weights.wSalle, wTime: weights.wTime, sameOrgBonus: weights.sameOrgBonus, titleMin: isSP ? (weights.spTitleMin ?? weights.titleMin) : weights.titleMin, } : undefined; // Keep ranked to compute margin (best vs second best) const ranked = []; for (const c of candidates) { const dataBuf = await fetchDataNvs(c); if (!dataBuf) continue; const dataStr = dataBuf.toString("utf-8"); const m3u8 = buildSenatVodMasterM3u8FromNvs(dataStr); if (!m3u8) continue; const meta = parseDataNvs(dataStr); let sameOrg = false; // Organe gate (same key OR strong dice) if (agenda.organe && meta.organes?.length) { const agendaOrgNorm = normalize(agenda.organe); const agendaKey = getOrgKey(agendaOrgNorm); let bestDice = 0; let hasSameKey = false; for (const vo of meta.organes) { const videoOrgNorm = normalize(vo); const videoKey = getOrgKey(videoOrgNorm); const d = dice(agendaOrgNorm, videoOrgNorm); if (videoKey === agendaKey && videoKey !== "autre") hasSameKey = true; if (d > bestDice) bestDice = d; } if (hasSameKey) { sameOrg = true; } else if (bestDice < orgSkipDice) { continue; } } let videoTitle = c.title; const chapterVideoTitles = getLevel1Chapters(dataStr); if (c.isSeancePublique && meta.firstChapterLabel) { videoTitle = meta.firstChapterLabel; } if (!vw) throw new Error("matchOneReunion: missing weights (vw)"); const { score: rawScore, signals } = scoreVideo(agenda, agendaTs, sameOrg, vw, videoTitle, meta.epoch, meta.organes, timeAmbigious, meta.salle, chapterVideoTitles); const s = sameOrg ? rawScore : rawScore * orgUncertainPenalty; // Optional: title dominance gate (reject if score isn't "driven" enough by title) if (titleDominance > 0 && signals.titleScore < titleDominance * s) { continue; } ranked.push({ id: c.id, hash: c.hash, pageUrl: c.pageUrl, epoch: meta.epoch, vtitle: videoTitle, score: s, vorgane: meta.organes?.[0], vsalle: meta.salle, m3u8, signals, }); } if (!ranked.length) return null; ranked.sort((a, b) => b.score - a.score); const best = ranked[0]; const second = ranked[1] ?? null; // Accept threshold (best must pass) if (best.score < minAccept) { if (!options["silent"]) console.log(`[threshold] ${agenda.uid} score=${best.score.toFixed(2)} agenda title="${agenda.titre ?? ""}" best title="${best.vtitle ?? ""}"`); return null; } // Margin (confidence) gate if (second && best.score - second.score < margin) { if (!options["silent"]) { console.log(`[Margin] ${agenda.uid} score=${best.score.toFixed(2)} agenda title="${agenda.titre ?? ""}" agenda date="${agenda.startTime ?? ""}" best title="${best.vtitle ?? ""}" date="${best.epoch ? new Date(best.epoch * 1000).toISOString() : "null"}"`); console.log("second title=", second.vtitle ?? "", " score=", second.score.toFixed(2), "date=", second.epoch ? new Date(second.epoch * 1000).toISOString() : "null"); } if (isSP && timeAmbigious) { // Optional safety: second must also be "acceptable" if (second.score >= minAccept) { console.log(" - special case: séance publique + time ambigious => accepting best + secondBest (bis)"); return { best, secondBest: second, reason: "margin_ambiguous_time_sp" }; } } return null; } return { best, secondBest: null }; } export async function matchAgendaToVideo(args) { const { agenda, agendaTs, candidates, options } = args; const timeAmbigious = isAmbiguousTimeOriginal(agenda.events[0].timeOriginal); if (timeAmbigious && !options["silent"]) { console.log(`[match] ${agenda.uid} timeOriginal ambiguous => ignoring time scoring: "${agenda.events[0].timeOriginal}"`); } return matchOneReunion({ agenda, agendaTs, timeAmbigious, candidates, weights, options, fetchDataNvs: (c) => fetchBuffer(`${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`), }); }