react-pdf-ner-annotator
Version:
A React component to annotate named entities directly onto a PDF.
152 lines • 6.67 kB
JavaScript
;
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
exports.__esModule = true;
exports.mergeSplitWords = exports.calculateRectangleProperties = exports.recalculateBoundingBox = exports.calculateFontSize = exports.calculateTransform = exports.calculateTextProperties = void 0;
// @ts-ignore
var lodash_1 = __importDefault(require("lodash"));
var PdfJs = __importStar(require("pdfjs-dist/build/pdf"));
var MAX_ALLOWED_DISTANCE = 0.5;
var calculateTextProperties = function (textItem, style, viewPort, context) {
var tx = PdfJs.Util.transform(viewPort.transform, textItem.transform);
var angle = Math.atan2(tx[1], tx[0]);
if (style.vertical) {
angle += Math.PI / 2;
}
var fontSize = Math.sqrt(tx[2] * tx[2] + tx[3] * tx[3]);
var fontAscent = fontSize;
if (style.ascent) {
fontAscent *= style.ascent;
}
else if (style.descent) {
fontAscent *= 1 + style.descent;
}
var left;
var top;
if (angle === 0) {
// eslint-disable-next-line prefer-destructuring
left = tx[4];
top = tx[5] - fontAscent;
}
else {
left = tx[4] + fontAscent * Math.sin(angle);
top = tx[5] - fontAscent * Math.cos(angle);
}
var canvasWidth;
if (style.vertical) {
canvasWidth = textItem.height * viewPort.scale;
}
else {
canvasWidth = textItem.width * viewPort.scale;
}
var transform = (0, exports.calculateTransform)(canvasWidth, fontSize, style.fontFamily, textItem.str, context);
return { left: left, top: top, fontSize: fontSize, transform: transform };
};
exports.calculateTextProperties = calculateTextProperties;
var calculateTransform = function (canvasWidth, fontSize, fontFamily, text, context) {
var transform = 1;
if (canvasWidth) {
context.font = "".concat(fontSize, "px ").concat(fontFamily);
var width = context.measureText(text).width;
transform = canvasWidth / width;
}
return transform;
};
exports.calculateTransform = calculateTransform;
var calculateFontSize = function (width, height, text) {
var area = width * height;
var length = text.length;
return Math.sqrt(area / length) * 1.3333;
};
exports.calculateFontSize = calculateFontSize;
var recalculateBoundingBox = function (coordinates, oldScale, newScale) {
return {
left: (coordinates.left / oldScale) * newScale,
top: (coordinates.top / oldScale) * newScale,
width: (coordinates.width / oldScale) * newScale,
height: (coordinates.height / oldScale) * newScale
};
};
exports.recalculateBoundingBox = recalculateBoundingBox;
var calculateRectangleProperties = function (boundingBox) {
var x0 = boundingBox.x0, x1 = boundingBox.x1, y0 = boundingBox.y0, y1 = boundingBox.y1;
var width = Math.sqrt(Math.pow(x1 - x0, 2) + Math.pow(y0 - y0, 2));
var height = Math.sqrt(Math.pow(x1 - x1, 2) + Math.pow(y1 - y0, 2));
return { left: x0, top: y0, width: width, height: height };
};
exports.calculateRectangleProperties = calculateRectangleProperties;
var mergeSplitWords = function (textContent) {
var items = textContent.items;
var mergedTextContent = __assign(__assign({}, textContent), { items: [] });
items.forEach(function (item) {
var prevWidth = 0;
var sameLevel = items.filter(function (candidate) {
if (filterByDistance(item, candidate, prevWidth)) {
prevWidth += candidate.width;
return true;
}
return false;
});
if (sameLevel.length) {
mergedTextContent.items.push(__assign(__assign({}, item), { width: item.width + sameLevel.map(function (val) { return val.width; }).reduce(function (a, b) { return a + b; }, 0), str: item.str + sameLevel.map(function (val) { return val.str; }).join('') }));
items.splice.apply(items, __spreadArray([0, items.length], items.filter(function (candidate) { return !lodash_1["default"].includes(sameLevel, candidate); }), false));
}
else {
mergedTextContent.items.push(item);
}
});
return mergedTextContent;
};
exports.mergeSplitWords = mergeSplitWords;
var filterByDistance = function (current, candidate, addedWidth) {
var distance = lodash_1["default"].round(candidate.transform[4] - (current.transform[4] + current.width + addedWidth), 1);
return (current.transform[5] === candidate.transform[5] &&
current.transform[4] < candidate.transform[4] &&
distance >= -MAX_ALLOWED_DISTANCE &&
distance <= MAX_ALLOWED_DISTANCE);
};
//# sourceMappingURL=pdfHelpers.js.map