gcash-pdf-parser
Version:
Extract GCash transaction data from PDF statements
95 lines (94 loc) • 4.22 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const GCashPDFParser_1 = require("./parsers/GCashPDFParser");
/**
* Command-line interface for the GCash PDF parser
*
* Usage: node dist/cli.js <pdfFilePath> <password> [outputDir]
*
* @example
* ```
* node dist/cli.js statement.pdf mypassword123 ./output
* ```
*/
function main() {
return __awaiter(this, void 0, void 0, function* () {
const args = process.argv.slice(2);
if (args.length < 2) {
console.error("Usage: node dist/cli.js <pdfFilePath> <password> [outputDir]");
process.exit(1);
}
const [pdfFilePath, password] = args;
const outputDir = args[2] || "output";
try {
// Read the PDF file as an ArrayBuffer
const fileBuffer = fs.readFileSync(pdfFilePath);
const arrayBuffer = fileBuffer.buffer.slice(fileBuffer.byteOffset, fileBuffer.byteOffset + fileBuffer.byteLength);
console.log("Processing PDF file:", pdfFilePath);
console.log("Output directory:", outputDir);
// Parse the PDF - Fix: Pass options object instead of string
const parser = new GCashPDFParser_1.GCashPDFParser(arrayBuffer, password, { outputDir });
yield parser.parse();
// Save and output results
const csvPath = parser.saveCSV();
console.log(`CSV file has been generated: ${csvPath}`);
const pageTexts = parser.getPageTexts();
console.log(`Extracted ${pageTexts.length} pages of text`);
console.log(`Check the ${path.join(outputDir, "debug")} directory for extracted raw text`);
// Print a sample of the first page text
if (pageTexts.length > 0) {
console.log("\nSample of first page text (first 200 characters):");
console.log(pageTexts[0].substring(0, 200) + "...");
}
}
catch (error) {
console.error("Error processing PDF:", error);
process.exit(1);
}
});
}
main();