react-pdf-ner-annotator
Version:
A React component to annotate named entities directly onto a PDF.
79 lines • 3.33 kB
JavaScript
;
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
exports.__esModule = true;
exports.tokenizeText = exports.getTextMetrics = exports.buildTextMapFromTextLayer = void 0;
var deburr_1 = __importDefault(require("lodash/deburr"));
var textLayer_1 = require("../interfaces/textLayer");
var buildTextMapFromTextLayer = function (pdfTextLayer, type, tokenizer) {
var textMap = [];
if (type === textLayer_1.TextLayerType.TEXT_LAYER) {
var index_1 = 0;
pdfTextLayer.forEach(function (textLayerItem) {
if (!textLayerItem.text) {
return;
}
var offset = 0;
var text = textLayerItem.text, fontSize = textLayerItem.fontSize, fontFamily = textLayerItem.fontFamily, transform = textLayerItem.transform, coords = textLayerItem.coords;
text.match(tokenizer).forEach(function (token) {
var textWidth = calculateTextWidth(token, fontSize, fontFamily, transform);
if (token !== ' ') {
index_1 += 1;
textMap.push(__assign(__assign({}, textLayerItem), { dataI: index_1, text: token, coords: {
left: offset + coords.left,
top: coords.top,
width: textWidth,
height: coords.height
} }));
}
offset += textWidth;
});
});
}
else {
pdfTextLayer.forEach(function (textLayerItem, index) {
textMap.push(__assign(__assign({}, textLayerItem), { dataI: index }));
});
}
return textMap;
};
exports.buildTextMapFromTextLayer = buildTextMapFromTextLayer;
var calculateTextWidth = function (text, fontSize, fontFamily, transform) {
var canvas = document.createElement('canvas');
var context = canvas.getContext('2d');
context.font = "".concat(fontSize, "px ").concat(fontFamily);
var metrics = context.measureText(text);
return metrics.width * transform;
};
var getTextMetrics = function (text) {
var canvas = document.createElement('canvas');
var context = canvas.getContext('2d');
context.font = '12px sans-serif';
var metrics = context.measureText(text);
var lineHeight = 1.2 * context.measureText('M').width;
return {
width: metrics.width,
height: lineHeight
};
};
exports.getTextMetrics = getTextMetrics;
var tokenizeText = function (input, tokenizer, needsTokenization) {
if (needsTokenization) {
return (0, deburr_1["default"])(input).match(tokenizer);
}
return (0, deburr_1["default"])(input).match(new RegExp(/[^\s]+/g));
};
exports.tokenizeText = tokenizeText;
//# sourceMappingURL=textMapHelpers.js.map