react-pdf-ner-annotator
Version:
A React component to annotate named entities directly onto a PDF.
127 lines • 8.05 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
exports.__esModule = true;
var react_1 = __importStar(require("react"));
var react_intersection_observer_1 = require("react-intersection-observer");
var pdfHelpers_1 = require("../helpers/pdfHelpers");
var textLayer_1 = require("../interfaces/textLayer");
var useTesseract_1 = __importDefault(require("../hooks/useTesseract"));
var useTextLayer_1 = __importDefault(require("../hooks/useTextLayer"));
var Selection_1 = __importDefault(require("./Selection"));
var OcrInfo_1 = __importDefault(require("./OcrInfo"));
var Loader_1 = __importDefault(require("./Loader"));
var TextLayer_1 = __importDefault(require("./textLayer/TextLayer"));
var AreaLayer_1 = __importDefault(require("./areaLayer/AreaLayer"));
var configContext_1 = __importDefault(require("../context/configContext"));
var annotationContext_1 = __importDefault(require("../context/annotationContext"));
var Page = function (_a) {
var pageNumber = _a.pageNumber, shouldRender = _a.shouldRender, page = _a.page, scale = _a.scale, annotations = _a.annotations, addAnnotation = _a.addAnnotation, updateLastAnnotationForEntity = _a.updateLastAnnotationForEntity, addPageToTextMap = _a.addPageToTextMap, initialTextLayer = _a.initialTextLayer;
var disableOCR = (0, react_1.useContext)(configContext_1["default"]).config.disableOCR;
var tokenizer = (0, react_1.useContext)(annotationContext_1["default"]).tokenizer;
var _b = (0, react_intersection_observer_1.useInView)({ threshold: 0 }), inViewRef = _b[0], inView = _b[1];
var canvasRef = (0, react_1.useRef)(null);
var _c = (0, react_1.useState)(true), loading = _c[0], setLoading = _c[1];
var _d = (0, react_1.useState)(null), pdfPage = _d[0], setPdfPage = _d[1];
var _e = (0, react_1.useState)(null), context = _e[0], setContext = _e[1];
var _f = (0, react_1.useState)(false), startOcr = _f[0], setStartOcr = _f[1];
var _g = (0, react_1.useState)({ width: (916 / 1.5) * scale, height: (1174 / 1.5) * scale }), pageViewport = _g[0], setPageViewport = _g[1];
var _h = (0, useTextLayer_1["default"])(scale, context, initialTextLayer), textLayer = _h.textLayer, buildTextLayer = _h.buildTextLayer;
var _j = (0, useTesseract_1["default"])(scale, context), ocrResult = _j.ocrResult, ocrError = _j.ocrError, ocrLoading = _j.ocrLoading, doOCR = _j.doOCR;
var message = ocrResult ? "OCR confidence ".concat(ocrResult.confidence, "%") : undefined;
(0, react_1.useEffect)(function () {
if (annotations.length) {
if (textLayer) {
addPageToTextMap(pageNumber, textLayer, textLayer_1.TextLayerType.TEXT_LAYER, 1, tokenizer);
return;
}
if (ocrResult) {
addPageToTextMap(pageNumber, ocrResult.ocrWords, textLayer_1.TextLayerType.ORC, ocrResult.confidence);
}
}
}, [annotations, textLayer, ocrResult, pageNumber, addPageToTextMap, tokenizer]);
(0, react_1.useEffect)(function () {
if (!disableOCR && startOcr && inView && !ocrResult) {
doOCR();
}
}, [disableOCR, startOcr, inView, doOCR, ocrResult]);
(0, react_1.useEffect)(function () {
if (canvasRef) {
setContext(canvasRef.current.getContext('2d'));
}
}, [canvasRef]);
(0, react_1.useEffect)(function () {
if (canvasRef && context && page && inView) {
page.then(function (pdfPageResult) {
var viewport = pdfPageResult.getViewport({ scale: scale });
var width = viewport.width, height = viewport.height;
setPageViewport(viewport);
var canvas = canvasRef.current;
canvas.width = width;
canvas.height = height;
pdfPageResult
.render({
canvasContext: context,
viewport: viewport
})
.promise.then(function () {
setPdfPage(pdfPageResult);
});
});
}
}, [page, scale, canvasRef, context, inView]);
(0, react_1.useEffect)(function () {
if (textLayer === null || textLayer === void 0 ? void 0 : textLayer.length) {
setLoading(false);
return;
}
if (inView && pdfPage && !textLayer) {
pdfPage.getTextContent().then(function (content) {
if (content.items.length) {
var contentMerged = (0, pdfHelpers_1.mergeSplitWords)(content);
buildTextLayer(contentMerged, pageViewport);
}
else {
setStartOcr(true);
}
setLoading(false);
});
}
}, [inView, pdfPage, pageViewport, context, page, textLayer, buildTextLayer]);
return (react_1["default"].createElement("div", { className: "page", ref: inViewRef },
react_1["default"].createElement("div", { className: "page__container", style: { width: "".concat(pageViewport.width, "px"), height: "".concat(pageViewport.height, "px") } },
react_1["default"].createElement("div", { className: "page__canvas-container", style: { width: "".concat(pageViewport.width, "px"), height: "".concat(pageViewport.height, "px") } },
loading ? react_1["default"].createElement(Loader_1["default"], null) : null,
react_1["default"].createElement("canvas", { ref: canvasRef, style: { width: "".concat(pageViewport.width, "px"), height: "".concat(pageViewport.height, "px") } })),
react_1["default"].createElement(Selection_1["default"], { pageNumber: pageNumber, className: "page__text-layer-container", style: { width: "".concat(pageViewport.width, "px"), height: "".concat(pageViewport.height, "px") }, addAnnotation: addAnnotation, updateLastAnnotationForEntity: updateLastAnnotationForEntity, pdfInformation: { width: pageViewport.width, height: pageViewport.height, scale: scale }, pdfContext: context },
react_1["default"].createElement(TextLayer_1["default"], { inView: inView, shouldRender: shouldRender, canvasInitialized: !!canvasRef, textLayer: textLayer || (ocrResult === null || ocrResult === void 0 ? void 0 : ocrResult.ocrWords), pageNumber: pageNumber, needsTokenization: !initialTextLayer }),
react_1["default"].createElement(AreaLayer_1["default"], { pdfScale: scale, pageNumber: pageNumber }),
react_1["default"].createElement("div", { className: "ocr-info-container" },
react_1["default"].createElement(OcrInfo_1["default"], { loading: ocrLoading, message: message, error: ocrError }))))));
};
exports["default"] = (0, react_1.memo)(Page);
//# sourceMappingURL=Page.js.map