mindee
Version:
Mindee Client Library for Node.js
77 lines (75 loc) • 3.38 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.Document = void 0;
const extras_1 = require("./extras");
const extras_2 = require("./extras/extras");
const ocr_1 = require("./ocr");
const ragExtra_1 = require("./extras/ragExtra");
/**
* Document prediction wrapper class. Holds the results of a parsed document.
* @typeParam T an extension of an `Inference`. Mandatory in order to properly create an inference.
*/
class Document {
/**
*
* @param inferenceClass constructor signature for an inference.
* @param httpResponse raw http response.
*/
constructor(inferenceClass, httpResponse) {
this.id = httpResponse?.id ?? "";
this.filename = httpResponse?.name ?? "";
this.ocr = httpResponse.ocr && Object.keys(httpResponse.ocr).length > 0 ? new ocr_1.Ocr(httpResponse.ocr) : undefined;
this.inference = new inferenceClass(httpResponse["inference"]);
if (httpResponse["extras"] &&
Object.keys(httpResponse["extras"].length > 0)) {
const extras = {};
Object.entries(httpResponse["extras"]).forEach(([extraKey, extraValue]) => {
switch (extraKey) {
case "cropper":
extras["cropper"] = new extras_1.CropperExtra(extraValue);
break;
case "full_text_ocr":
extras["fullTextOcr"] = new extras_1.FullTextOcrExtra(extraValue);
break;
case "rag":
extras["rag"] = new ragExtra_1.RAGExtra(extraValue);
break;
}
});
this.extras = new extras_2.Extras(extras);
}
if (!this.extras || !("fullTextOcr" in this.extras) || this.extras["full_text_ocr"].toString().length === 0) {
this.injectFullTextOcr(httpResponse);
}
this.nPages = httpResponse["n_pages"];
}
/**
* Default string representation.
*/
toString() {
return `########\nDocument\n########
:Mindee ID: ${this.id}
:Filename: ${this.filename}
${this.inference?.toString()}`;
}
injectFullTextOcr(rawPrediction) {
if (rawPrediction["inference"]["pages"].length < 1 ||
!("extras" in rawPrediction["inference"]["pages"][0]) ||
rawPrediction["inference"]["pages"][0]["extras"].length < 1 ||
!("full_text_ocr" in rawPrediction["inference"]["pages"][0]["extras"]) ||
!rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"] ||
!("content" in rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"]) ||
!rawPrediction["inference"]["pages"][0]["extras"]["full_text_ocr"]["content"]) {
return;
}
const fullTextOcr = rawPrediction["inference"]["pages"].filter((e) => "extras" in e).map((e) => e["extras"]["full_text_ocr"]["content"]).join("\n");
const artificialTextObj = { "content": fullTextOcr.length > 0 ? fullTextOcr : "" };
if (!this.extras) {
this.extras = new extras_2.Extras({ "fullTextOcr": new extras_1.FullTextOcrExtra(artificialTextObj) });
}
else {
this.extras["fullTextOcr"] = new extras_1.FullTextOcrExtra(artificialTextObj);
}
}
}
exports.Document = Document;