@earvinpiamonte/pagasa-tcb-parser
Version:
A TypeScript library for parsing PAGASA weather bulletin PDF files
177 lines • 7.22 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.parsePdfFromBuffer = void 0;
const pdf_parse_1 = __importDefault(require("pdf-parse"));
const signal_parser_1 = require("./signal-parser");
const patterns_1 = require("../constants/patterns");
const parseDate = (date, time, meridian) => `${date} ${time} ${meridian}`.replace(/\s+/g, " ").trim();
const toISO = (date, time, meridian) => {
if (!date || !time || !meridian) {
return undefined;
}
try {
const base = `${date} ${time} ${meridian}`;
const dateObject = new Date(base + " GMT+0800");
return isNaN(dateObject.getTime()) ? undefined : dateObject.toISOString();
}
catch {
return undefined;
}
};
const extractMeta = (text) => {
const title = text.match(patterns_1.PATTERNS.bulletinTitle)?.[1] ||
text.match(patterns_1.PATTERNS.advisoryTitle)?.[1];
const subtitle = text.match(patterns_1.PATTERNS.bulletinSubtitle)?.[1] ||
text.match(patterns_1.PATTERNS.plainCycloneClassification)?.[0];
const names = text.match(patterns_1.PATTERNS.cycloneNames);
const issued = text.match(patterns_1.PATTERNS.issued);
const valid = text.match(patterns_1.PATTERNS.validUntil);
let dateIssued;
let dateIssuedISO;
if (!issued) {
const issuedAlternateMatch = text.match(patterns_1.PATTERNS.issuedAlt);
if (issuedAlternateMatch) {
const time = issuedAlternateMatch[1];
const meridian = issuedAlternateMatch[2];
const day = issuedAlternateMatch[3];
const month = issuedAlternateMatch[4];
const year = issuedAlternateMatch[5];
dateIssued = `${month} ${day}, ${year} ${time} ${meridian}`;
dateIssuedISO = toISO(`${month} ${day}, ${year}`, time, meridian);
}
}
if (!dateIssued && issued) {
dateIssued = parseDate(issued[1], issued[2], issued[3]);
dateIssuedISO = toISO(issued[1], issued[2], issued[3]);
}
let dateValidUntil;
let dateValidUntilISO;
if (!valid) {
const validTodayTimeMatch = text.match(patterns_1.PATTERNS.validTodayTime);
if (validTodayTimeMatch && dateIssued) {
// Reuse issued date's date part (Month Day, Year)
const datePart = (dateIssued.match(/([A-Za-z]+\s+\d{1,2},\s*\d{4})/) ||
[])[1];
const time = validTodayTimeMatch[1];
const meridian = validTodayTimeMatch[2];
if (datePart) {
dateValidUntil = `${datePart} ${time} ${meridian}`;
dateValidUntilISO = toISO(datePart, time, meridian);
}
}
}
if (!dateValidUntil && valid) {
dateValidUntil = parseDate(valid[1], valid[2], valid[3]);
dateValidUntilISO = toISO(valid[1], valid[2], valid[3]);
}
let description;
if (subtitle) {
const subtitleIndex = text.indexOf(subtitle);
if (subtitleIndex !== -1) {
const linesAfterSubtitle = text
.slice(subtitleIndex + subtitle.length)
.split(/\n+/)
.map((line) => line.trim());
let capturing = false;
const descriptionLines = [];
for (const line of linesAfterSubtitle) {
if (!line) {
continue;
}
if (/^TROPICAL CYCLONE WIND SIGNALS/i.test(line)) {
break;
}
if (/^(Issued at|Valid for broadcast|Prepared by:|Checked by:|Page \d+ of \d+|Republic of the Philippines|DEPARTMENT OF SCIENCE|Philippine Atmospheric|Services Administration|Weather Division|MMSS-\d+)/i.test(line)) {
if (!capturing) {
continue;
}
break;
}
const isUpper = /^[A-Z0-9 “”"'(),.-]+$/.test(line) && /[A-Z]/.test(line);
if (!capturing) {
const cycloneNameFromSubtitle = subtitle?.match(/[“"']?([A-Z]{3,})/)?.[1];
const dynamicPatternParts = [
"WEAKENS",
"INTENSIFIES",
"MAINTAINS",
"ENTERS",
"PASSES",
"APPROACHES",
"EXIT",
"RE-?ENTERS",
"RE-?EMERGES",
"REGENERATES",
"REMAINS",
"RE-?FORMS",
"RE-?INTENSIFIES",
"DEVELOPS",
"DISSIPATES",
"REMNANT\\s+LOW",
];
if (cycloneNameFromSubtitle) {
dynamicPatternParts.unshift(cycloneNameFromSubtitle);
}
const triggerRegex = new RegExp(`(${dynamicPatternParts.join("|")})`, "i");
if (isUpper && triggerRegex.test(line)) {
capturing = true;
descriptionLines.push(line.replace(/\s+/g, " ").trim());
}
}
else {
if (isUpper) {
descriptionLines.push(line.replace(/\s+/g, " ").trim());
continue;
}
break;
}
if (descriptionLines.length >= 2) {
break;
}
}
if (descriptionLines.length) {
description = descriptionLines.join(" ");
}
}
}
return {
title: title || null,
subtitle: subtitle || null,
description: description || null,
dateIssued: dateIssued || null,
dateIssuedISO: dateIssuedISO || null,
dateValidUntil: dateValidUntil || null,
dateValidUntilISO: dateValidUntilISO || null,
cyclone: {
name: names?.[2] ||
subtitle?.match(/Tropical\s+Depression\s+([A-Z]{3,})/i)?.[1] ||
null,
internationalName: names?.[3] || null,
signals: [],
},
};
};
const parsePdfFromBuffer = async (buffer) => {
try {
const data = await (0, pdf_parse_1.default)(buffer);
const meta = extractMeta(data.text);
const signalsMap = (0, signal_parser_1.extractSignals)(data.text);
const signalsArray = Object.keys(signalsMap)
.map((k) => ({ level: Number(k), regions: signalsMap[k].regions }))
.sort((a, b) => a.level - b.level);
return {
...meta,
cyclone: {
...meta.cyclone,
signals: signalsArray,
},
};
}
catch (error) {
throw new Error(`Failed to parse PDF buffer: ${error instanceof Error ? error.message : String(error)}`);
}
};
exports.parsePdfFromBuffer = parsePdfFromBuffer;
//# sourceMappingURL=pdf-parser.js.map