UNPKG

vex-match-parser

Version:

A TypeScript library for parsing VEX Robotics match schedules from PDF files. Works in both Node.js and browser environments.

github.com/junho/vex-match-parser

junho/vex-match-parser

211 lines (210 loc) • 6.48 kB

JavaScript

import b from "node:fs"; import x from "node:path"; class o extends Error { constructor(e) { super(e), this.name = "ParseError"; } } class P { static detect(e, r, t) { const n = e.toLowerCase(); if (n.includes("v5")) return "V5"; if (n.includes("iq")) return "VEXIQ"; if (n.includes("go")) return "VEXGO"; if (r.includes("V5")) return "V5"; if (r.includes("IQ")) return "VEXIQ"; if (r.includes("GO")) return "VEXGO"; if (t === 4) return "V5"; if (t === 2) return "VEXIQ"; throw new o("Unable to determine competition type"); } } function F(s) { return /^[0-9]+[A-Z]+\*?$/.test(s.trim()); } function E(s) { const e = s.trim(); if (e.startsWith("Q")) return "Qualification"; if (e.startsWith("P")) return "Practice"; throw new o(`Invalid match number: ${s}`); } function V(s) { return s.trim().replace(/\s+/g, " "); } let p = null; const v = async () => { if (p) return p; if (typeof process < "u" && process.versions != null && process.versions.node != null) try { p = await import("pdfjs-dist/legacy/build/pdf.min.mjs"); } catch { p = await import("pdfjs-dist"); } else p = await import("pdfjs-dist"); return p; }; class g { static async parseFromBuffer(e, r = "schedule.pdf") { try { if (!e || e instanceof ArrayBuffer && e.byteLength === 0 || e instanceof Uint8Array && e.length === 0) throw new o("Invalid input: PDF data is empty"); const t = await this.extractTextFromPDF(e); if (!t || t.trim().length === 0) throw new o("Invalid PDF: No text content found"); const { title: n, event: c, division: l, headerText: a } = this.parseHeader(t); if (!n.includes("Match List")) throw new o('Invalid PDF: Missing "Match List" header. This may not be a VEX match schedule.'); const f = this.extractTableRows(t); if (f.length === 0) throw new o("No match data found in PDF. Please ensure this is a valid VEX match schedule."); const i = this.extractTeamsFromRow(f[0]), m = P.detect( r, a, i.length ), u = [], h = []; for (let w = 0; w < f.length; w++) try { u.push(this.parseRow(f[w], m)); } catch (y) { h.push(`Row ${w + 1}: ${y instanceof Error ? y.message : String(y)}`); } if (u.length === 0) throw new o(`Failed to parse any matches. Errors: ${h.join(` `)}`); if (h.length > 0 && h.length / f.length > 0.3) throw new o(`Failed to parse ${h.length}/${f.length} matches. Errors: ${h.slice(0, 5).join(` `)}`); const d = E(u[0].matchNumber); return { title: n, matchType: d, event: c, division: l, type: m, matches: u, filename: r }; } catch (t) { throw t instanceof o ? t : new o(`Failed to parse PDF: ${t instanceof Error ? t.message : String(t)}`); } } static async extractTextFromPDF(e) { try { const r = await v(); r.GlobalWorkerOptions && !r.GlobalWorkerOptions.workerSrc && (r.GlobalWorkerOptions.workerSrc = "pdfjs-dist/legacy/build/pdf.worker.mjs"); const n = await r.getDocument({ data: e, useWorkerFetch: !1, isEvalSupported: !1, useSystemFonts: !0 }).promise; if (!n || n.numPages === 0) throw new o("Invalid PDF: No pages found"); let c = ""; for (let l = 1; l <= n.numPages; l++) try { const i = (await (await n.getPage(l)).getTextContent()).items; let m = -1, u = ""; for (const h of i) { if (!h.str || !h.transform) continue; const d = h.transform[5]; m !== -1 && Math.abs(d - m) > 5 && (u.trim() && (c += u.trim() + ` `), u = ""), u += h.str, m = d; } u.trim() && (c += u.trim() + ` `); } catch (a) { throw new o(`Failed to extract text from page ${l}: ${a instanceof Error ? a.message : String(a)}`); } return c; } catch (r) { throw r instanceof o ? r : new o(`Failed to load PDF: ${r instanceof Error ? r.message : String(r)}`); } } static parseHeader(e) { const r = e.split(` `).map((m) => m.trim()).filter(Boolean), t = r.find((m) => m.includes("Match List")) || "", n = V(t), c = r[1] || "", l = c.lastIndexOf(" - "); let a = "", f = "Default Division"; l !== -1 ? (a = c.substring(0, l).trim(), f = c.substring(l + 3).trim()) : a = c.trim(); const i = r.slice(0, 5).join(" "); return { title: n, event: a, division: f, headerText: i }; } static extractTableRows(e) { const r = e.split(` `).map((n) => n.trim()).filter(Boolean), t = []; for (const n of r) /^[QP]\d+/.test(n) && t.push(n); return t; } static extractTeamsFromRow(e) { return e.split(/\s+/).filter(Boolean).filter((t) => F(t)); } static parseRow(e, r) { const t = e.split(/\s+/).filter(Boolean); if (t.length < 5) throw new o(`Invalid row format: ${e}`); const n = t[0], c = E(n), l = t[1]; let a = 2; for (; a < t.length && !F(t[a]); ) a++; const f = t.slice(2, a).join(" "), i = t.slice(a); if (r === "V5") { if (i.length < 4) throw new o( `V5 match requires 4 teams, got ${i.length} in ${n}` ); return { type: "V5", matchNumber: n, matchType: c, field: l, time: f, red1: i[0], red2: i[1], blue1: i[2], blue2: i[3] }; } if (i.length < 2) throw new o( `Match requires 2 teams, got ${i.length} in ${n}` ); return r === "VEXGO" ? { type: "VEXGO", matchNumber: n, matchType: c, field: l, time: f, team1: i[0], team2: i[1] } : { type: "VEXIQ", matchNumber: n, matchType: c, field: l, time: f, team1: i[0], team2: i[1] }; } } async function I(s, e) { if (typeof s == "string") { const r = b.readFileSync(s), t = new Uint8Array(r), n = e || x.basename(s); return g.parseFromBuffer(t, n); } else return s instanceof ArrayBuffer ? g.parseFromBuffer(new Uint8Array(s), e || "schedule.pdf") : g.parseFromBuffer(s, e || "schedule.pdf"); } async function B(s) { const e = await s.arrayBuffer(); return g.parseFromBuffer(e, s.name); } export { o as ParseError, I as parseVEXMatchSchedule, B as parseVEXMatchScheduleFromFile };