@tricoteuses/senat
Version:
Handle French Sénat's open data
135 lines (134 loc) • 5.85 kB
JavaScript
import { isAmbiguousTimeOriginal } from "../utils/date";
import { buildSenatVodMasterM3u8FromNvs, getLevel1Chapters, parseDataNvs } from "../utils/nvs-parsing";
import { dice, getOrgKey, normalize, scoreVideo } from "../utils/scoring";
import { SENAT_DATAS_ROOT, weights } from "./config";
import { fetchBuffer } from "./search";
export async function matchOneReunion(args) {
const { agenda, agendaTs, timeAmbigious, candidates, weights, fetchDataNvs, options } = args;
if (!options["silent"])
console.log(`Matching video for reunion: ${agenda?.uid}`);
const isSP = (agenda.type ?? "").toLowerCase().includes("séance publique");
const minAcceptBase = weights?.minAccept;
const margin = weights?.margin;
const titleDominance = weights?.titleDominance;
const orgUncertainPenalty = weights?.orgUncertainPenalty; // 1 disables
const orgSkipDice = weights?.orgSkipDice;
const minAccept = minAcceptBase + (candidates.length >= 20 ? 0.08 : candidates.length >= 10 ? 0.05 : 0);
const vw = weights
? {
wTitle: weights.wTitle,
wOrg: weights.wOrg,
wSalle: weights.wSalle,
wTime: weights.wTime,
sameOrgBonus: weights.sameOrgBonus,
titleMin: isSP ? (weights.spTitleMin ?? weights.titleMin) : weights.titleMin,
}
: undefined;
// Keep ranked to compute margin (best vs second best)
const ranked = [];
for (const c of candidates) {
const dataBuf = await fetchDataNvs(c);
if (!dataBuf)
continue;
const dataStr = dataBuf.toString("utf-8");
const m3u8 = buildSenatVodMasterM3u8FromNvs(dataStr);
if (!m3u8)
continue;
const meta = parseDataNvs(dataStr);
let sameOrg = false;
// Organe gate (same key OR strong dice)
if (agenda.organe && meta.organes?.length) {
const agendaOrgNorm = normalize(agenda.organe);
const agendaKey = getOrgKey(agendaOrgNorm);
let bestDice = 0;
let hasSameKey = false;
for (const vo of meta.organes) {
const videoOrgNorm = normalize(vo);
const videoKey = getOrgKey(videoOrgNorm);
const d = dice(agendaOrgNorm, videoOrgNorm);
if (videoKey === agendaKey && videoKey !== "autre")
hasSameKey = true;
if (d > bestDice)
bestDice = d;
}
if (hasSameKey) {
sameOrg = true;
}
else if (bestDice < orgSkipDice) {
continue;
}
}
let videoTitle = c.title;
const chapterVideoTitles = getLevel1Chapters(dataStr);
if (c.isSeancePublique && meta.firstChapterLabel) {
videoTitle = meta.firstChapterLabel;
}
if (!vw)
throw new Error("matchOneReunion: missing weights (vw)");
const { score: rawScore, signals } = scoreVideo(agenda, agendaTs, sameOrg, vw, videoTitle, meta.epoch, meta.organes, timeAmbigious, meta.salle, chapterVideoTitles);
const s = sameOrg ? rawScore : rawScore * orgUncertainPenalty;
// Optional: title dominance gate (reject if score isn't "driven" enough by title)
if (titleDominance > 0 && signals.titleScore < titleDominance * s) {
continue;
}
ranked.push({
id: c.id,
hash: c.hash,
pageUrl: c.pageUrl,
epoch: meta.epoch,
vtitle: videoTitle,
score: s,
vorgane: meta.organes?.[0],
vsalle: meta.salle,
m3u8,
signals,
});
}
if (!ranked.length)
return null;
ranked.sort((a, b) => b.score - a.score);
const best = ranked[0];
const second = ranked[1] ?? null;
// Accept threshold (best must pass)
if (best.score < minAccept) {
if (!options["silent"])
console.log(`[threshold] ${agenda.uid} score=${best.score.toFixed(2)}
agenda title="${agenda.titre ?? ""}"
best title="${best.vtitle ?? ""}"`);
return null;
}
// Margin (confidence) gate
if (second && best.score - second.score < margin) {
if (!options["silent"]) {
console.log(`[Margin] ${agenda.uid} score=${best.score.toFixed(2)}
agenda title="${agenda.titre ?? ""}" agenda date="${agenda.startTime ?? ""}"
best title="${best.vtitle ?? ""}" date="${best.epoch ? new Date(best.epoch * 1000).toISOString() : "null"}"`);
console.log("second title=", second.vtitle ?? "", " score=", second.score.toFixed(2), "date=", second.epoch ? new Date(second.epoch * 1000).toISOString() : "null");
}
if (isSP && timeAmbigious) {
// Optional safety: second must also be "acceptable"
if (second.score >= minAccept) {
console.log(" - special case: séance publique + time ambigious => accepting best + secondBest (bis)");
return { best, secondBest: second, reason: "margin_ambiguous_time_sp" };
}
}
return null;
}
return { best, secondBest: null };
}
export async function matchAgendaToVideo(args) {
const { agenda, agendaTs, candidates, options } = args;
const timeAmbigious = isAmbiguousTimeOriginal(agenda.events[0].timeOriginal);
if (timeAmbigious && !options["silent"]) {
console.log(`[match] ${agenda.uid} timeOriginal ambiguous => ignoring time scoring: "${agenda.events[0].timeOriginal}"`);
}
return matchOneReunion({
agenda,
agendaTs,
timeAmbigious,
candidates,
weights,
options,
fetchDataNvs: (c) => fetchBuffer(`${SENAT_DATAS_ROOT}/${c.id}_${c.hash}/content/data.nvs`),
});
}