mindee
Version:
Mindee Client Library for Node.js
70 lines (69 loc) • 3.09 kB
JavaScript
var __classPrivateFieldGet = (this && this.__classPrivateFieldGet) || function (receiver, state, kind, f) {
if (kind === "a" && !f) throw new TypeError("Private accessor was defined without a getter");
if (typeof state === "function" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError("Cannot read private member from an object whose class did not declare it");
return kind === "m" ? f : kind === "a" ? f.call(receiver) : f ? f.value : state.get(receiver);
};
var _OcrPage_instances, _OcrPage_areWordsOnSameLine, _OcrPage_toLines;
Object.defineProperty(exports, "__esModule", { value: true });
exports.OcrPage = void 0;
const geometry_1 = require("../../geometry");
class OcrPage {
constructor(rawPrediction) {
_OcrPage_instances.add(this);
/** Flat list of all words read */
this.allWords = [];
/** List of words by which line they are on */
this.allLines = [];
const allWords = [];
rawPrediction["all_words"] &&
rawPrediction["all_words"].forEach((word) => {
allWords.push(word);
});
this.allWords = allWords.sort((word1, word2) => (0, geometry_1.compareOnY)(word1.polygon, word2.polygon));
}
/**
* Get all words on the page as ordered lines
* @returns Sorted lines on the pages
*/
getAllLines() {
if (!this.allLines || this.allLines.length === 0) {
this.allLines = __classPrivateFieldGet(this, _OcrPage_instances, "m", _OcrPage_toLines).call(this);
}
return this.allLines;
}
/**
* Default string representation.
*/
toString() {
return this.getAllLines()
.map((line) => line.map((word) => word.text).join(" "))
.join("\n");
}
}
exports.OcrPage = OcrPage;
_OcrPage_instances = new WeakSet(), _OcrPage_areWordsOnSameLine = function _OcrPage_areWordsOnSameLine(currentWord, nextWord) {
const currentInNext = (0, geometry_1.isPointInPolygonY)((0, geometry_1.getCentroid)(currentWord.polygon), nextWord.polygon);
const nextInCurrent = (0, geometry_1.isPointInPolygonY)((0, geometry_1.getCentroid)(nextWord.polygon), currentWord.polygon);
return nextInCurrent || currentInNext;
}, _OcrPage_toLines = function _OcrPage_toLines() {
const lines = [];
const lineIdx = [];
this.allWords.forEach((current, idxCurrent) => {
if (!lineIdx.includes(idxCurrent)) {
lines.push([current]);
lineIdx.push(idxCurrent);
}
this.allWords.forEach((next, idxNext) => {
if (idxCurrent === idxNext || lineIdx.includes(idxNext)) {
return;
}
if (__classPrivateFieldGet(this, _OcrPage_instances, "m", _OcrPage_areWordsOnSameLine).call(this, current, next)) {
lineIdx.push(idxNext);
lines[lines.length - 1].push(next);
}
});
});
lines.forEach((line) => line.sort((word1, word2) => (0, geometry_1.compareOnX)(word1.polygon, word2.polygon)));
return lines;
};
;