vex-match-parser
Version:
A TypeScript library for parsing VEX Robotics match schedules from PDF files. Works in both Node.js and browser environments.
211 lines (210 loc) • 6.48 kB
JavaScript
import b from "node:fs";
import x from "node:path";
class o extends Error {
constructor(e) {
super(e), this.name = "ParseError";
}
}
class P {
static detect(e, r, t) {
const n = e.toLowerCase();
if (n.includes("v5")) return "V5";
if (n.includes("iq")) return "VEXIQ";
if (n.includes("go")) return "VEXGO";
if (r.includes("V5")) return "V5";
if (r.includes("IQ")) return "VEXIQ";
if (r.includes("GO")) return "VEXGO";
if (t === 4) return "V5";
if (t === 2) return "VEXIQ";
throw new o("Unable to determine competition type");
}
}
function F(s) {
return /^[0-9]+[A-Z]+\*?$/.test(s.trim());
}
function E(s) {
const e = s.trim();
if (e.startsWith("Q")) return "Qualification";
if (e.startsWith("P")) return "Practice";
throw new o(`Invalid match number: ${s}`);
}
function V(s) {
return s.trim().replace(/\s+/g, " ");
}
let p = null;
const v = async () => {
if (p)
return p;
if (typeof process < "u" && process.versions != null && process.versions.node != null)
try {
p = await import("pdfjs-dist/legacy/build/pdf.min.mjs");
} catch {
p = await import("pdfjs-dist");
}
else
p = await import("pdfjs-dist");
return p;
};
class g {
static async parseFromBuffer(e, r = "schedule.pdf") {
try {
if (!e || e instanceof ArrayBuffer && e.byteLength === 0 || e instanceof Uint8Array && e.length === 0)
throw new o("Invalid input: PDF data is empty");
const t = await this.extractTextFromPDF(e);
if (!t || t.trim().length === 0)
throw new o("Invalid PDF: No text content found");
const { title: n, event: c, division: l, headerText: a } = this.parseHeader(t);
if (!n.includes("Match List"))
throw new o('Invalid PDF: Missing "Match List" header. This may not be a VEX match schedule.');
const f = this.extractTableRows(t);
if (f.length === 0)
throw new o("No match data found in PDF. Please ensure this is a valid VEX match schedule.");
const i = this.extractTeamsFromRow(f[0]), m = P.detect(
r,
a,
i.length
), u = [], h = [];
for (let w = 0; w < f.length; w++)
try {
u.push(this.parseRow(f[w], m));
} catch (y) {
h.push(`Row ${w + 1}: ${y instanceof Error ? y.message : String(y)}`);
}
if (u.length === 0)
throw new o(`Failed to parse any matches. Errors:
${h.join(`
`)}`);
if (h.length > 0 && h.length / f.length > 0.3)
throw new o(`Failed to parse ${h.length}/${f.length} matches. Errors:
${h.slice(0, 5).join(`
`)}`);
const d = E(u[0].matchNumber);
return {
title: n,
matchType: d,
event: c,
division: l,
type: m,
matches: u,
filename: r
};
} catch (t) {
throw t instanceof o ? t : new o(`Failed to parse PDF: ${t instanceof Error ? t.message : String(t)}`);
}
}
static async extractTextFromPDF(e) {
try {
const r = await v();
r.GlobalWorkerOptions && !r.GlobalWorkerOptions.workerSrc && (r.GlobalWorkerOptions.workerSrc = "pdfjs-dist/legacy/build/pdf.worker.mjs");
const n = await r.getDocument({
data: e,
useWorkerFetch: !1,
isEvalSupported: !1,
useSystemFonts: !0
}).promise;
if (!n || n.numPages === 0)
throw new o("Invalid PDF: No pages found");
let c = "";
for (let l = 1; l <= n.numPages; l++)
try {
const i = (await (await n.getPage(l)).getTextContent()).items;
let m = -1, u = "";
for (const h of i) {
if (!h.str || !h.transform) continue;
const d = h.transform[5];
m !== -1 && Math.abs(d - m) > 5 && (u.trim() && (c += u.trim() + `
`), u = ""), u += h.str, m = d;
}
u.trim() && (c += u.trim() + `
`);
} catch (a) {
throw new o(`Failed to extract text from page ${l}: ${a instanceof Error ? a.message : String(a)}`);
}
return c;
} catch (r) {
throw r instanceof o ? r : new o(`Failed to load PDF: ${r instanceof Error ? r.message : String(r)}`);
}
}
static parseHeader(e) {
const r = e.split(`
`).map((m) => m.trim()).filter(Boolean), t = r.find((m) => m.includes("Match List")) || "", n = V(t), c = r[1] || "", l = c.lastIndexOf(" - ");
let a = "", f = "Default Division";
l !== -1 ? (a = c.substring(0, l).trim(), f = c.substring(l + 3).trim()) : a = c.trim();
const i = r.slice(0, 5).join(" ");
return { title: n, event: a, division: f, headerText: i };
}
static extractTableRows(e) {
const r = e.split(`
`).map((n) => n.trim()).filter(Boolean), t = [];
for (const n of r)
/^[QP]\d+/.test(n) && t.push(n);
return t;
}
static extractTeamsFromRow(e) {
return e.split(/\s+/).filter(Boolean).filter((t) => F(t));
}
static parseRow(e, r) {
const t = e.split(/\s+/).filter(Boolean);
if (t.length < 5)
throw new o(`Invalid row format: ${e}`);
const n = t[0], c = E(n), l = t[1];
let a = 2;
for (; a < t.length && !F(t[a]); )
a++;
const f = t.slice(2, a).join(" "), i = t.slice(a);
if (r === "V5") {
if (i.length < 4)
throw new o(
`V5 match requires 4 teams, got ${i.length} in ${n}`
);
return {
type: "V5",
matchNumber: n,
matchType: c,
field: l,
time: f,
red1: i[0],
red2: i[1],
blue1: i[2],
blue2: i[3]
};
}
if (i.length < 2)
throw new o(
`Match requires 2 teams, got ${i.length} in ${n}`
);
return r === "VEXGO" ? {
type: "VEXGO",
matchNumber: n,
matchType: c,
field: l,
time: f,
team1: i[0],
team2: i[1]
} : {
type: "VEXIQ",
matchNumber: n,
matchType: c,
field: l,
time: f,
team1: i[0],
team2: i[1]
};
}
}
async function I(s, e) {
if (typeof s == "string") {
const r = b.readFileSync(s), t = new Uint8Array(r), n = e || x.basename(s);
return g.parseFromBuffer(t, n);
} else
return s instanceof ArrayBuffer ? g.parseFromBuffer(new Uint8Array(s), e || "schedule.pdf") : g.parseFromBuffer(s, e || "schedule.pdf");
}
async function B(s) {
const e = await s.arrayBuffer();
return g.parseFromBuffer(e, s.name);
}
export {
o as ParseError,
I as parseVEXMatchSchedule,
B as parseVEXMatchScheduleFromFile
};