@earvinpiamonte/pagasa-tcb-parser
Version:
A TypeScript library for parsing PAGASA weather bulletin PDF files
66 lines • 2.19 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractMunicipalities = exports.splitPreservingParentheses = void 0;
const patterns_1 = require("../constants/patterns");
const splitPreservingParentheses = (text) => {
const result = [];
let current = "";
let parenthesesDepth = 0;
let i = 0;
while (i < text.length) {
const char = text[i];
const nextChars = text.slice(i, i + 5).toLowerCase();
if (char === "(") {
parenthesesDepth++;
current += char;
}
else if (char === ")") {
parenthesesDepth--;
current += char;
}
else if (parenthesesDepth === 0 && (char === "," || char === ";")) {
if (current.trim()) {
result.push(current.trim());
}
current = "";
}
else if (parenthesesDepth === 0 && nextChars === " and ") {
if (current.trim()) {
result.push(current.trim());
}
current = "";
i += 4;
}
else {
current += char;
}
i++;
}
if (current.trim()) {
result.push(current.trim());
}
return result.filter((segment) => segment.length > 0);
};
exports.splitPreservingParentheses = splitPreservingParentheses;
const extractMunicipalities = (areaText) => {
const municipalities = [];
let match;
// Reset pattern to start from beginning
patterns_1.PATTERNS.parentheses.lastIndex = 0;
while ((match = patterns_1.PATTERNS.parentheses.exec(areaText)) !== null) {
const munis = match[1]
.split(",")
.map((m) => m.trim())
.filter((m) => m.length > 0 && !/^\d+$/.test(m));
municipalities.push(...munis);
}
const cleanName = areaText
.replace(/\s*\([^)]*\)/g, "")
.replace(patterns_1.PATTERNS.cleanExtra, "")
.replace(patterns_1.PATTERNS.restPattern, "")
.replace(/^the\s+/i, "")
.trim();
return { name: cleanName, municipalities };
};
exports.extractMunicipalities = extractMunicipalities;
//# sourceMappingURL=text-utils.js.map