UNPKG

mindee

Version:

Mindee Client Library for Node.js

77 lines (75 loc) 3.38 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Document = void 0; const extras_1 = require("./extras"); const extras_2 = require("./extras/extras"); const ocr_1 = require("./ocr"); const ragExtra_1 = require("./extras/ragExtra"); /** * Document prediction wrapper class. Holds the results of a parsed document. * @typeParam T an extension of an `Inference`. Mandatory in order to properly create an inference. */ class Document { /** * * @param inferenceClass constructor signature for an inference. * @param httpResponse raw http response. */ constructor(inferenceClass, httpResponse) { this.id = httpResponse?.id ?? ""; this.filename = httpResponse?.name ?? ""; this.ocr = httpResponse.ocr && Object.keys(httpResponse.ocr).length > 0 ? new ocr_1.Ocr(httpResponse.ocr) : undefined; this.inference = new inferenceClass(httpResponse["inference"]); if (httpResponse["extras"] && Object.keys(httpResponse["extras"].length > 0)) { const extras = {}; Object.entries(httpResponse["extras"]).forEach(([extraKey, extraValue]) => { switch (extraKey) { case "cropper": extras["cropper"] = new extras_1.CropperExtra(extraValue); break; case "full_text_ocr": extras["fullTextOcr"] = new extras_1.FullTextOcrExtra(extraValue); break; case "rag": extras["rag"] = new ragExtra_1.RAGExtra(extraValue); break; } }); this.extras = new extras_2.Extras(extras); } if (!this.extras || !("fullTextOcr" in this.extras) || this.extras["full_text_ocr"].toString().length === 0) { this.injectFullTextOcr(httpResponse); } this.nPages = httpResponse["n_pages"]; } /** * Default string representation. */ toString() { return `########\nDocument\n######## :Mindee ID: ${this.id} :Filename: ${this.filename} ${this.inference?.toString()}`; } injectFullTextOcr(rawPrediction) { if (rawPrediction["inference"]["pages"].length < 1 || !("extras" in rawPrediction["inference"]["pages"][0]) || rawPrediction["inference"]["pages"][0]["extras"].length < 1 || !("full_text_ocr" in rawPrediction["inference"]["pages"][0]["extras"]) || !rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"] || !("content" in rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"]) || !rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"]["content"]) { return; } const fullTextOcr = rawPrediction["inference"]["pages"].filter((e) => "extras" in e).map((e) => e["extras"]["full_text_ocr"]["content"]).join("\n"); const artificialTextObj = { "content": fullTextOcr.length > 0 ? fullTextOcr : "" }; if (!this.extras) { this.extras = new extras_2.Extras({ "fullTextOcr": new extras_1.FullTextOcrExtra(artificialTextObj) }); } else { this.extras["fullTextOcr"] = new extras_1.FullTextOcrExtra(artificialTextObj); } } } exports.Document = Document;