esearch-ocr
Version:
paddleocr models run on onnx
1,587 lines (1,381 loc) • 59.4 kB
text/typescript
import { Cls } from "./cls";
import {
newCanvas,
setCanvas,
toPaddleInput,
type SessionType,
type AsyncType,
data2canvas,
resizeImg,
int,
tLog,
clip,
} from "./untils";
import { type Contour, findContours, minAreaRect, type Point } from "./cv";
export {
setOCREnv,
init,
/** @deprecated use return obj from init */
x as ocr,
loadImg,
/** @deprecated use return obj from init */
Det as det,
/** @deprecated use return obj from init */
Rec as rec,
afAfRec as analyzeLayout,
initDet,
initRec,
initDocDirCls,
rotateImg,
};
export type initType = AsyncType<ReturnType<typeof init>>;
export type { OrtOption, InitOcrBase, InitOcrGlobal, detResultType, resultType, loadImgType };
type ColumnsTip = { box: BoxType; type: "auto" | "ignore" | "table" | "raw" | "raw-blank" }[];
type OrtOption = {
ort: typeof import("onnxruntime-common");
ortOption?: import("onnxruntime-common").InferenceSession.SessionOptions;
};
type InitDetBase = {
input: string | ArrayBufferLike | Uint8Array;
ratio?: number;
on?: (r: detResultType) => void;
};
type InitRecBase = {
input: string | ArrayBufferLike | Uint8Array;
decodeDic: string;
imgh?: number;
on?: (index: number, result: { text: string; mean: number }, total: number) => void;
optimize?: {
space?: boolean;
};
};
type InitDocClsBase = {
input: string | ArrayBufferLike | Uint8Array;
};
type InitOcrBase = {
det: InitDetBase;
rec: InitRecBase;
docCls?: InitDocClsBase;
analyzeLayout?: {
docDirs?: ReadingDir[];
columnsTip?: ColumnsTip;
};
dev?: boolean;
log?: boolean;
detRatio?: number;
} & OrtOption;
type InitOcrGlobal = {
/** @deprecated use setOCREnv instead */
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
canvas?: (w: number, h: number) => any;
/** @deprecated use setOCREnv instead */
imageData?;
};
type loadImgType = string | HTMLImageElement | HTMLCanvasElement | ImageData;
type detResultType = { box: BoxType; img: ImageData; style: { bg: color; text: color } }[];
type detDataType = {
data: AsyncType<ReturnType<typeof runDet>>["data"];
width: number;
height: number;
};
type pointType = [number, number];
type BoxType = [pointType, pointType, pointType, pointType];
type pointsType = pointType[];
type resultType = { text: string; mean: number; box: BoxType; style: { bg: color; text: color } }[];
type ReadingDirPart = "lr" | "rl" | "tb" | "bt";
type ReadingDir = {
block: ReadingDirPart;
inline: ReadingDirPart;
};
const task = new tLog("t");
const task2 = new tLog("af_det");
let dev = false;
let canlog = false;
let globalOCR: AsyncType<ReturnType<typeof initOCR>> | null = null;
function putImgDom(img: OffscreenCanvas, id?: string) {
const canvas = document.createElement("canvas");
canvas.width = img.width;
canvas.height = img.height;
canvas.getContext("2d")!.drawImage(img, 0, 0);
if (id) canvas.id = id;
try {
document?.body?.append(canvas);
} catch (error) {}
}
let createImageData = (data: Uint8ClampedArray, w: number, h: number) => {
return new ImageData(data, w, h);
};
function log(...args: any[]) {
if (canlog) console.log(...args);
}
function logSrc(...args: any[]) {
if (canlog) console.log(...args.map((i) => structuredClone(i)));
}
function logColor(...args: string[]) {
if (canlog) {
console.log(args.map((x) => `%c${x}`).join(""), ...args.map((x) => `color: ${x}`));
}
}
async function init(
op:
| InitOcrBase
| ({
/** @deprecated use det.input */
detPath: string;
/** @deprecated use rec.input */
recPath: string;
/** @deprecated */
layoutPath?: string;
/** @deprecated use docCls.input */
docClsPath?: string;
/** @deprecated use rec.decodeDic */
dic: string;
/** @deprecated */
layoutDic?: string;
/** @deprecated use analyzeLayout.docDirs */
docDirs?: ReadingDir[];
/** @deprecated use analyzeLayout.columnsTip */
columnsTip?: ColumnsTip;
dev?: boolean;
log?: boolean;
/** @deprecated use rec.imgh */
imgh?: number;
/** @deprecated use det.ratio */
detRatio?: number;
/** @deprecated use det.on and rec.on */
onProgress?: (type: "det" | "rec", total: number, count: number) => void;
/** @deprecated use det.on */
onDet?: (r: detResultType) => void;
/** @deprecated use rec.on */
onRec?: (index: number, result: { text: string; mean: number }) => void;
} & InitOcrGlobal &
OrtOption),
) {
// 兼容老版本
setOCREnv(op);
const xop: InitOcrBase = {
det:
"det" in op
? op.det
: {
input: op.detPath,
ratio: op.detRatio,
on: async (r) => {
if (op.onDet) op.onDet(r);
if (op.onProgress) op.onProgress("det", 1, 1);
},
},
rec:
"rec" in op
? op.rec
: {
input: op.recPath,
decodeDic: op.dic,
imgh: op.imgh,
on: async (index, result, t) => {
if (op.onRec) op.onRec(index, result);
if (op.onProgress) op.onProgress("rec", t, index + 1);
},
},
docCls:
"rec" in op
? op.docCls
: op.docClsPath
? {
input: op.docClsPath,
}
: undefined,
analyzeLayout:
"rec" in op
? op.analyzeLayout
: {
columnsTip: op.columnsTip,
docDirs: op.docDirs,
},
...op,
};
const x = await initOCR(xop);
globalOCR = x;
return x;
}
function setOCREnv(op: {
dev?: boolean;
log?: boolean;
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
canvas?: (w: number, h: number) => any;
imageData?;
}) {
dev = Boolean(op.dev);
canlog = dev || Boolean(op.log);
if (!dev) {
task.l = () => {};
task2.l = () => {};
}
if (op.canvas) setCanvas(op.canvas);
if (op.imageData) createImageData = op.imageData;
}
async function loadImg(src: loadImgType) {
let img: HTMLImageElement | HTMLCanvasElement | ImageData;
if (typeof window === "undefined") {
const x = src as ImageData;
if (!x.data || !x.width || !x.height) throw new Error("invalid image data");
return x;
}
if (typeof src === "string") {
img = new Image();
img.src = src;
await new Promise((resolve) => {
(img as HTMLImageElement).onload = resolve;
});
} else if (src instanceof ImageData) {
img = src;
} else {
img = src;
}
if (img instanceof HTMLImageElement) {
const canvas = newCanvas(img.naturalWidth, img.naturalHeight);
const ctx = canvas.getContext("2d");
if (!ctx) throw new Error("canvas context is null");
ctx.drawImage(img, 0, 0);
img = ctx.getImageData(0, 0, img.naturalWidth, img.naturalHeight);
}
if (img instanceof HTMLCanvasElement) {
const ctx = img.getContext("2d");
if (!ctx) throw new Error("canvas context is null");
img = ctx.getImageData(0, 0, img.width, img.height);
}
return img;
}
function checkNode() {
try {
newCanvas(1, 1);
createImageData(new Uint8ClampedArray(4), 1, 1);
} catch (error) {
console.log("nodejs need set canvas, please use setOCREnv to set canvas and imageData");
throw error;
}
}
async function x(i: loadImgType) {
if (!globalOCR) throw new Error("need init");
return globalOCR.ocr(i);
}
async function Det(s: ImageData) {
if (!globalOCR) throw new Error("need init");
return globalOCR.det(s);
}
async function Rec(box: detResultType) {
if (!globalOCR) throw new Error("need init");
return globalOCR.rec(box);
}
/** 主要操作 */
async function initOCR(op: InitOcrBase) {
checkNode();
const ortO: OrtOption = {
ort: op.ort,
ortOption: op.ortOption,
};
const docCls = op.docCls ? await initDocDirCls({ ...op.docCls, ...ortO }) : undefined;
const det = await initDet({ ...op.det, ...ortO });
const rec = await initRec({ ...op.rec, ...ortO });
return {
ocr: async (srcimg: loadImgType) => {
let img = await loadImg(srcimg);
let dir = 0;
if (docCls) {
dir = await docCls.docCls(img);
log("dir", dir);
img = rotateImg(img, 360 - dir);
}
const box = await det.det(img);
const mainLine = await rec.rec(box);
const newMainLine = afAfRec(mainLine, op.analyzeLayout);
log(mainLine, newMainLine);
task.l("end");
return { src: mainLine, ...newMainLine, docDir: dir };
},
det: det.det,
rec: rec.rec,
};
}
function initOrtModel(
ort: OrtOption["ort"],
input: string | ArrayBufferLike | Uint32Array,
ortOptions?: OrtOption["ortOption"],
) {
if (typeof input === "string") {
return ort.InferenceSession.create(input, ortOptions);
}
return ort.InferenceSession.create(input, ortOptions);
}
async function initDocDirCls(op: InitDocClsBase & OrtOption) {
const cls = await initOrtModel(op.ort, op.input, op.ortOption);
const docCls = async (img: ImageData) => {
return Cls(img, op.ort, cls, [0, 90, 180, 270], 224, 224);
};
return { docCls };
}
async function initDet(op: InitDetBase & OrtOption) {
checkNode();
let detRatio = 1;
const det = await initOrtModel(op.ort, op.input, op.ortOption);
if (op.ratio !== undefined) detRatio = op.ratio;
async function Det(srcimg: ImageData) {
const img = srcimg;
if (dev) {
const srcCanvas = data2canvas(img);
putImgDom(srcCanvas);
}
task.l("pre_det");
const { data: beforeDetData, width: resizeW, height: resizeH } = beforeDet(img, detRatio);
const { transposedData, image } = beforeDetData;
task.l("det");
const detResults = await runDet(transposedData, image, det, op.ort);
task.l("aft_det");
const box = afterDet(
{ data: detResults.data, width: detResults.dims[3], height: detResults.dims[2] },
resizeW,
resizeH,
img,
);
op?.on?.(box);
return box;
}
return { det: Det };
}
async function initRec(op: InitRecBase & OrtOption) {
checkNode();
let imgh = 48;
const rec = await initOrtModel(op.ort, op.input, op.ortOption);
const dic = op.decodeDic.split(/\r\n|\r|\n/) || [];
if (dic.at(-1) === "") {
// 多出的换行
dic[dic.length - 1] = " ";
} else {
dic.push(" ");
}
if (op.imgh) imgh = op.imgh;
const opmSpace = op.optimize?.space === undefined ? true : op.optimize.space;
async function Rec(box: detResultType) {
const mainLine: resultType = [];
task.l("bf_rec");
const recL = beforeRec(box, imgh);
let runCount = 0;
for (const [index, item] of recL.entries()) {
const { b, imgH, imgW } = item;
const recResults = await runRec(b, imgH, imgW, rec, op.ort);
const result = afterRec(recResults, dic, { opm: { space: opmSpace } })[0];
mainLine.push({
text: result.text,
mean: result.mean,
box: box[index].box,
style: box[index].style,
});
op?.on?.(index, result, box.length);
runCount++;
}
task.l("rec_end");
return mainLine.filter((x) => x.mean >= 0.5) as resultType;
}
return { rec: Rec };
}
async function runDet(transposedData: number[][][], image: ImageData, det: SessionType, ort: OrtOption["ort"]) {
const detData = Float32Array.from(transposedData.flat(3));
const detTensor = new ort.Tensor("float32", detData, [1, 3, image.height, image.width]);
const detFeed = {};
detFeed[det.inputNames[0]] = detTensor;
const detResults = await det.run(detFeed);
return detResults[det.outputNames[0]];
}
async function runRec(b: number[][][], imgH: number, imgW: number, rec: SessionType, ort: OrtOption["ort"]) {
const recData = Float32Array.from(b.flat(3));
const recTensor = new ort.Tensor("float32", recData, [1, 3, imgH, imgW]);
const recFeed = {};
recFeed[rec.inputNames[0]] = recTensor;
const recResults = await rec.run(recFeed);
return recResults[rec.outputNames[0]];
}
function beforeDet(srcImg: ImageData, detRatio: number) {
const resizeH = Math.max(Math.round((srcImg.height * detRatio) / 32) * 32, 32);
const resizeW = Math.max(Math.round((srcImg.width * detRatio) / 32) * 32, 32);
if (dev) {
const srcCanvas = data2canvas(srcImg);
putImgDom(srcCanvas);
}
const image = resizeImg(srcImg, resizeW, resizeH, "fill");
const transposedData = toPaddleInput(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]);
log(image);
if (dev) {
const srcCanvas = data2canvas(image);
putImgDom(srcCanvas);
}
return { data: { transposedData, image }, width: resizeW, height: resizeH };
}
function afterDet(dataSet: detDataType, _resizeW: number, _resizeH: number, srcData: ImageData) {
task2.l("");
// 考虑到fill模式,小的不变动
const w = Math.min(srcData.width, _resizeW);
const h = Math.min(srcData.height, _resizeH);
const { data, width, height } = dataSet;
const bitData = new Uint8Array(width * height);
for (let i = 0; i < data.length; i++) {
const v = (data[i] as number) > 0.3 ? 255 : 0;
bitData[i] = v;
}
if (dev) {
const clipData = new Uint8ClampedArray(width * height * 4);
for (let i = 0; i < data.length; i++) {
const n = i * 4;
const v = (data[i] as number) > 0.3 ? 255 : 0;
clipData[n] = clipData[n + 1] = clipData[n + 2] = v;
clipData[n + 3] = 255;
bitData[i] = v;
}
const myImageData = createImageData(clipData, width, height);
const srcCanvas = data2canvas(myImageData);
putImgDom(srcCanvas, "det_ru");
}
task2.l("edge");
const edgeRect: detResultType = [];
const src2: number[][] = [];
for (let y = 0; y < height; y++) {
src2.push(Array.from(bitData.slice(y * width, y * width + width)));
}
const contours2: Point[][] = [];
findContours(src2, contours2);
if (dev) {
const xctx = (document.querySelector("#det_ru") as HTMLCanvasElement).getContext("2d")!;
for (const item of contours2) {
xctx.moveTo(item[0].x, item[0].y);
for (const p of item) {
xctx.lineTo(p.x, p.y);
}
xctx.strokeStyle = "red";
xctx.closePath();
xctx.stroke();
}
}
for (let i = 0; i < contours2.length; i++) {
task2.l("get_box");
const minSize = 3;
const l: Contour = contours2[i];
const { points, sside } = getMiniBoxes(l);
if (sside < minSize) continue;
// TODO sort fast
const resultObj = unclip2(points);
const box = resultObj.points;
if (resultObj.sside < minSize + 2) {
continue;
}
const rx = srcData.width / w;
const ry = srcData.height / h;
for (let i = 0; i < box.length; i++) {
box[i][0] *= rx;
box[i][1] *= ry;
}
task2.l("order");
const box1 = orderPointsClockwise(box);
for (const item of box1) {
item[0] = clip(Math.round(item[0]), 0, srcData.width);
item[1] = clip(Math.round(item[1]), 0, srcData.height);
}
const rect_width = int(linalgNorm(box1[0], box1[1]));
const rect_height = int(linalgNorm(box1[0], box1[3]));
if (rect_width <= 3 || rect_height <= 3) continue;
drawBox(box, "", "red", "det_ru");
task2.l("crop");
const c = getRotateCropImage(srcData, box);
task2.l("match best");
const { bg, text } = getImgColor(c);
const bb = matchBestBox(box, c, text);
edgeRect.push({ box: bb, img: c, style: { bg, text } });
}
task2.l("e");
log(edgeRect);
return edgeRect;
}
function polygonPolygonArea(polygon: pointsType) {
let i = -1;
const n = polygon.length;
let a: pointType;
let b = polygon[n - 1];
let area = 0;
while (++i < n) {
a = b;
b = polygon[i];
area += a[1] * b[0] - a[0] * b[1];
}
return area / 2;
}
function polygonPolygonLength(polygon: pointsType) {
let i = -1;
const n = polygon.length;
let b = polygon[n - 1];
let xa: number;
let ya: number;
let xb = b[0];
let yb = b[1];
let perimeter = 0;
while (++i < n) {
xa = xb;
ya = yb;
b = polygon[i];
xb = b[0];
yb = b[1];
xa -= xb;
ya -= yb;
perimeter += Math.hypot(xa, ya);
}
return perimeter;
}
function unclip2(box: pointsType) {
const unclip_ratio = 1.5;
const area = Math.abs(polygonPolygonArea(box));
const length = polygonPolygonLength(box);
const distance = (area * unclip_ratio) / length;
const expandedArr: pointType[] = [];
for (const [i, p] of box.entries()) {
const lastPoint = box.at((i - 1) % 4)!;
const nextPoint = box.at((i + 1) % 4)!;
const x1 = p[0] - lastPoint[0];
const y1 = p[1] - lastPoint[1];
const d1 = Math.sqrt(x1 ** 2 + y1 ** 2);
const dx1 = (x1 / d1) * distance;
const dy1 = (y1 / d1) * distance;
const x2 = p[0] - nextPoint[0];
const y2 = p[1] - nextPoint[1];
const d2 = Math.sqrt(x2 ** 2 + y2 ** 2);
const dx2 = (x2 / d2) * distance;
const dy2 = (y2 / d2) * distance;
expandedArr.push([p[0] + dx1 + dx2, p[1] + dy1 + dy2]);
}
const v1 = [expandedArr[0][0] - expandedArr[1][0], expandedArr[0][1] - expandedArr[1][1]];
const v2 = [expandedArr[2][0] - expandedArr[1][0], expandedArr[2][1] - expandedArr[1][1]];
const cross = v1[0] * v2[1] - v1[1] * v2[0];
return { points: expandedArr as BoxType, sside: Math.abs(cross) };
}
function boxPoints(center: { x: number; y: number }, size: { width: number; height: number }, angle: number) {
const width = size.width;
const height = size.height;
const theta = (angle * Math.PI) / 180.0;
const cosTheta = Math.cos(theta);
const sinTheta = Math.sin(theta);
const cx = center.x;
const cy = center.y;
const dx = width * 0.5;
const dy = height * 0.5;
const rotatedPoints: [number, number][] = [];
// Top-Left
const x1 = cx - dx * cosTheta + dy * sinTheta;
const y1 = cy - dx * sinTheta - dy * cosTheta;
rotatedPoints.push([x1, y1]);
// Top-Right
const x2 = cx + dx * cosTheta + dy * sinTheta;
const y2 = cy + dx * sinTheta - dy * cosTheta;
rotatedPoints.push([x2, y2]);
// Bottom-Right
const x3 = cx + dx * cosTheta - dy * sinTheta;
const y3 = cy + dx * sinTheta + dy * cosTheta;
rotatedPoints.push([x3, y3]);
// Bottom-Left
const x4 = cx - dx * cosTheta - dy * sinTheta;
const y4 = cy - dx * sinTheta + dy * cosTheta;
rotatedPoints.push([x4, y4]);
return rotatedPoints;
}
function getMiniBoxes(contour: Point[]) {
const l = contour;
const boundingBox = minAreaRect(l);
const points = Array.from(boxPoints(boundingBox.center, boundingBox.size, boundingBox.angle)).sort(
(a, b) => a[0] - b[0],
) as pointsType;
let index_1 = 0;
let index_2 = 1;
let index_3 = 2;
let index_4 = 3;
if (points[1][1] > points[0][1]) {
index_1 = 0;
index_4 = 1;
} else {
index_1 = 1;
index_4 = 0;
}
if (points[3][1] > points[2][1]) {
index_2 = 2;
index_3 = 3;
} else {
index_2 = 3;
index_3 = 2;
}
const box = [points[index_1], points[index_2], points[index_3], points[index_4]] as BoxType;
const side = Math.min(boundingBox.size.height, boundingBox.size.width);
return { points: box, sside: side };
}
function flatten(arr: number[] | number[][]) {
return arr.flat();
}
function linalgNorm(p0: pointType, p1: pointType) {
return Math.sqrt((p0[0] - p1[0]) ** 2 + (p0[1] - p1[1]) ** 2);
}
function orderPointsClockwise(pts: BoxType) {
const rect: BoxType = [
[0, 0],
[0, 0],
[0, 0],
[0, 0],
];
const s = pts.map((pt) => pt[0] + pt[1]);
rect[0] = pts[s.indexOf(Math.min(...s))];
rect[2] = pts[s.indexOf(Math.max(...s))];
const tmp = pts.filter((pt) => pt !== rect[0] && pt !== rect[2]);
const diff = tmp[1].map((e, i) => e - tmp[0][i]);
rect[1] = tmp[diff.indexOf(Math.min(...diff))];
rect[3] = tmp[diff.indexOf(Math.max(...diff))];
return rect;
}
function getRotateCropImage(img: ImageData, points: BoxType) {
// todo 根据曲线裁切
const [p0, p1, p2, p3] = points.map((p) => ({ x: p[0], y: p[1] }));
// 计算原始宽高
const width = Math.sqrt((p1.x - p0.x) ** 2 + (p1.y - p0.y) ** 2);
const height = Math.sqrt((p3.x - p0.x) ** 2 + (p3.y - p0.y) ** 2);
// 计算变换矩阵参数
const dx1 = p1.x - p0.x;
const dy1 = p1.y - p0.y;
const dx3 = p3.x - p0.x;
const dy3 = p3.y - p0.y;
const determinant = dx1 * dy3 - dx3 * dy1;
if (determinant === 0) throw new Error("点共线,无法形成矩形");
const a = (width * dy3) / determinant;
const c = (-dx3 * width) / determinant;
const b = (-height * dy1) / determinant;
const d = (dx1 * height) / determinant;
const e = -a * p0.x - c * p0.y;
const f = -b * p0.x - d * p0.y;
const inputCanvas = data2canvas(img);
// 创建输出Canvas
const outputCanvas = newCanvas(Math.ceil(width), Math.ceil(height));
const ctx = outputCanvas.getContext("2d")!;
// 应用变换并绘制
ctx.setTransform(a, b, c, d, e, f);
ctx.drawImage(inputCanvas, 0, 0);
// 重置变换以进行后续操作
ctx.resetTransform();
return ctx.getImageData(0, 0, outputCanvas.width, outputCanvas.height);
}
type color = [number, number, number];
function getImgColor(img: ImageData) {
const histogram = new Map<string, number>();
const data = img.data;
for (let i = 0; i < data.length; i += 4) {
const x = (i / 4) % img.width;
if (x > img.height * 4) continue;
const r = data[i];
const g = data[i + 1];
const b = data[i + 2];
const colorKey = [r, g, b].join(",");
histogram.set(colorKey, (histogram.get(colorKey) || 0) + 1);
}
const colorList = getHighestFrequency(histogram, 20).map((c) => ({
el: c.el.split(",").map(Number) as color,
count: c.count,
}));
const bg = colorList.at(0)?.el || [255, 255, 255];
const textEdge = colorList.at(1)?.el || [0, 0, 0];
let text = textEdge;
const colorD = 100;
if (areColorsSimilar(textEdge, bg) < colorD) {
const colorSplit = colorList.slice(1).filter((c) => areColorsSimilar(c.el, bg) > 50);
if (colorSplit.length > 0) {
text = [0, 1, 2] // rgb各自平均
.map((i) =>
Math.round(average2(colorSplit.map((c) => [c.el[i], c.count] as [number, number]))),
) as color;
}
if (colorSplit.length === 0 || areColorsSimilar(text, bg) < colorD) text = bg.map((x) => 255 - x) as color;
logColor(`rgb(${text.join(",")})`);
}
return {
bg: bg,
text: text,
textEdge: textEdge,
};
}
function areColorsSimilar(color1: color, color2: color) {
const rgb1 = color1;
const rgb2 = color2;
const distance = Math.sqrt((rgb1[0] - rgb2[0]) ** 2 + (rgb1[1] - rgb2[1]) ** 2 + (rgb1[2] - rgb2[2]) ** 2);
return distance;
}
function getHighestFrequency<t>(map: Map<t, number>, c = 1) {
let l: { el: t; count: number }[] = [];
map.forEach((count, name) => {
if (l.length === 0) l.push({ el: name, count });
else {
if (l.length < c) {
l.push({ el: name, count });
} else if (l.find((i) => i.count <= count)) {
l.push({ el: name, count });
}
l.sort((a, b) => b.count - a.count);
if (l.length > c) {
l = l.slice(0, c);
}
}
});
return l;
}
function matchBestBox(box: BoxType, img: ImageData, textEdgeColor: color) {
let yFromTop = 0;
let yFromBottom = img.height;
let xFromLeft = 0;
let xFromRight = img.width;
function match(pix: color) {
return areColorsSimilar(pix, textEdgeColor) < 200;
}
yt: for (let y = yFromTop; y < img.height; y++) {
for (let x = 0; x < img.width; x++) {
const pix = getImgPix(img, x, y);
if (match(pix)) {
yFromTop = y;
break yt;
}
}
}
yb: for (let y = yFromBottom - 1; y >= 0; y--) {
for (let x = 0; x < img.width; x++) {
const pix = getImgPix(img, x, y);
if (match(pix)) {
yFromBottom = y;
break yb;
}
}
}
xl: for (let x = xFromLeft; x < img.width; x++) {
for (let y = yFromTop; y <= yFromBottom; y++) {
const pix = getImgPix(img, x, y);
if (match(pix)) {
xFromLeft = x;
break xl;
}
}
}
xr: for (let x = xFromRight - 1; x >= 0; x--) {
for (let y = yFromTop; y <= yFromBottom; y++) {
const pix = getImgPix(img, x, y);
if (match(pix)) {
xFromRight = x;
break xr;
}
}
}
const dyT = clip(yFromTop - 1, 0, 4);
const dyB = clip(img.height - yFromBottom - 1, 0, 4);
const dxL = clip(xFromLeft - 1, 0, 4);
const dxR = clip(img.width - xFromRight - 1, 0, 4);
const newBox = [
[box[0][0] + dxL, box[0][1] + dyT],
[box[1][0] - dxR, box[1][1] + dyT],
[box[2][0] - dxR, box[2][1] - dyB],
[box[3][0] + dxL, box[3][1] - dyB],
] as BoxType;
return newBox;
}
function getImgPix(img: ImageData, x: number, y: number) {
const index = (y * img.width + x) * 4;
return Array.from(img.data.slice(index, index + 4)) as color;
}
function beforeRec(box: { box: BoxType; img: ImageData }[], imgH: number) {
const l: { b: number[][][]; imgH: number; imgW: number }[] = [];
function resizeNormImg(img: ImageData) {
const w = Math.floor(imgH * (img.width / img.height));
const d = resizeImg(img, w, imgH, undefined, false);
if (dev) putImgDom(data2canvas(d, w, imgH));
return { data: d, w, h: imgH };
}
for (const r of box) {
let img = r.img;
// 模型只支持输入横的图片
if (img.width < img.height) {
img = rotateImg(img, -90);
}
const reImg = resizeNormImg(img);
l.push({ b: toPaddleInput(reImg.data, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), imgH: reImg.h, imgW: reImg.w });
}
log(l);
return l;
}
function afterRec(
data: AsyncType<ReturnType<typeof runRec>>,
character: string[],
op: {
opm: {
space: boolean;
};
},
) {
const predLen = data.dims[2];
const line: { text: string; mean: number }[] = [];
let ml = data.dims[0] - 1;
function getChar(i: number) {
return character.at(i - 1) ?? "";
}
for (let l = 0; l < data.data.length; l += predLen * data.dims[1]) {
const predsIdx: number[] = [];
const predsProb: number[] = [];
for (let i = l; i < l + predLen * data.dims[1]; i += predLen) {
const tmpArr = data.data.slice(i, i + predLen) as Float32Array;
let tmpMax = Number.NEGATIVE_INFINITY;
let tmpIdx = -1;
let tmpSecond = Number.NEGATIVE_INFINITY;
let tmpSecondI = -1;
for (let j = 0; j < tmpArr.length; j++) {
const currentValue = tmpArr[j];
if (currentValue > tmpMax) {
tmpSecond = tmpMax;
tmpMax = currentValue;
tmpIdx = j;
} else if (currentValue > tmpSecond && currentValue < tmpMax) {
tmpSecond = currentValue;
tmpSecondI = j;
}
}
if (op.opm.space) {
if (tmpIdx === 0 && getChar(tmpSecondI) === " " && tmpSecond > 0.001) {
tmpMax = tmpSecond;
tmpIdx = tmpSecondI;
}
}
predsProb.push(tmpMax);
predsIdx.push(tmpIdx);
}
line[ml] = decode(predsIdx, predsProb);
ml--;
}
function decode(textIndex: number[], textProb: number[]) {
const charList: string[] = [];
const confList: number[] = [];
const isRemoveDuplicate = true;
for (let idx = 0; idx < textIndex.length; idx++) {
if (textIndex[idx] === 0) continue;
if (isRemoveDuplicate) {
if (idx > 0 && textIndex[idx - 1] === textIndex[idx]) {
continue;
}
}
charList.push(getChar(textIndex[idx]));
confList.push(textProb[idx]);
}
let text = "";
let mean = 0;
if (charList.length) {
text = charList.join("").trim();
let sum = 0;
for (const item of confList) {
sum += item;
}
mean = sum / confList.length;
}
return { text, mean };
}
return line;
}
/** 排版分析 */
function afAfRec(
l: resultType,
op?: { docDirs?: ReadingDir[]; columnsTip?: ColumnsTip },
): {
columns: {
src: resultType;
outerBox: BoxType;
parragraphs: {
src: resultType;
parse: resultType[0];
}[];
}[];
parragraphs: resultType;
readingDir: ReadingDir;
angle: { reading: { inline: number; block: number }; angle: number };
} {
log(l);
type columnType = "none" | ColumnsTip[0]["type"];
// 假定阅读方向都是统一的
const dirs: ReadingDir[] = op?.docDirs ?? [
{ block: "tb", inline: "lr" },
{ block: "rl", inline: "tb" },
];
const dir: ReadingDir = { block: "tb", inline: "lr" };
const dirVector = {
inline: [1, 0] as VectorType,
block: [0, 1] as VectorType,
};
const baseVector = {
inline: [1, 0] as VectorType,
block: [0, 1] as VectorType,
};
if (l.length === 0) {
return {
columns: [],
parragraphs: [],
readingDir: dir,
angle: { reading: { inline: 0, block: 90 }, angle: 0 },
};
}
const colTip: { box: BoxType; type: columnType }[] = [
{
box: [
[Number.NEGATIVE_INFINITY, Number.NEGATIVE_INFINITY],
[Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY],
[Number.POSITIVE_INFINITY, Number.POSITIVE_INFINITY],
[Number.NEGATIVE_INFINITY, Number.POSITIVE_INFINITY],
],
type: "none",
},
];
const defaultColId = 0;
function findColId(b: BoxType) {
const c = Box.center(b);
for (let id = colTip.length - 1; id >= 0; id--) {
const item = colTip[id];
const box = item.box;
if (c[0] >= box[0][0] && c[0] <= box[1][0] && c[1] >= box[0][1] && c[1] <= box[3][1]) {
return id;
}
}
return defaultColId;
}
const Point = {
center: (p1: pointType, p2: pointType): pointType => [(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2],
disByV: (p1: pointType, p2: pointType, type: "block" | "inline") => {
if (type === "block") {
return Math.abs(Vector.dotMup(p1, baseVector.block) - Vector.dotMup(p2, baseVector.block));
}
return Math.abs(Vector.dotMup(p1, baseVector.inline) - Vector.dotMup(p2, baseVector.inline));
},
compare: (a: pointType, b: pointType, type: "block" | "inline") => {
if (type === "block") {
return Vector.dotMup(a, baseVector.block) - Vector.dotMup(b, baseVector.block);
}
return Vector.dotMup(a, baseVector.inline) - Vector.dotMup(b, baseVector.inline);
},
toInline: (p: pointType) => {
return Vector.dotMup(p, baseVector.inline);
},
toBlock: (p: pointType) => {
return Vector.dotMup(p, baseVector.block);
},
};
const Box = {
inlineStart: (b: BoxType) => Point.center(b[0], b[3]),
inlineEnd: (b: BoxType) => Point.center(b[1], b[2]),
blockStart: (b: BoxType) => Point.center(b[0], b[1]),
blockEnd: (b: BoxType) => Point.center(b[2], b[3]),
inlineSize: (b: BoxType) => b[1][0] - b[0][0],
blockSize: (b: BoxType) => b[3][1] - b[0][1],
inlineStartDis: (a: BoxType, b: BoxType) => Point.disByV(a[0], b[0], "inline"),
inlineEndDis: (a: BoxType, b: BoxType) => Point.disByV(a[1], b[1], "inline"),
blockGap: (newB: BoxType, oldB: BoxType) => Point.disByV(newB[0], oldB[3], "block"),
inlineCenter: (b: BoxType) => (b[2][0] + b[0][0]) / 2,
blockCenter: (b: BoxType) => (b[2][1] + b[0][1]) / 2,
inlineStartCenter: (b: BoxType) => Box.inlineStart(b),
center: (b: BoxType) => Point.center(b[0], b[2]),
};
type VectorType = [number, number];
const Vector = {
fromPonts: (p1: pointType, p2: pointType): pointType => [p1[0] - p2[0], p1[1] - p2[1]],
dotMup: (a: VectorType, b: VectorType) => a[0] * b[0] + a[1] * b[1],
numMup: (a: VectorType, b: number) => [a[0] * b, a[1] * b] as VectorType,
add: (a: VectorType, b: VectorType) => [a[0] + b[0], a[1] + b[1]] as VectorType,
};
function averLineAngles(a: number[]) {
let iav = 0;
let n = 0;
const l: number[] = [];
for (const [index, i] of a.entries()) {
const a1 = i > 180 ? i - 180 : i;
const a2 = a1 - 180;
const a = index === 0 ? a1 : Math.abs(a2 - iav) < Math.abs(a1 - iav) ? a2 : a1;
l.push(a);
iav = (iav * n + a) / (n + 1);
n++;
}
return { av: iav, l };
}
function lineAngleNear(a1: number, a2: number) {
if (Math.abs(a1 - a2) < 45) return true;
if (Math.abs(a1 - (a2 - 180)) < 45) return true;
if (Math.abs(a1 - 180 - a2) < 45) return true;
return false;
}
function median(l: number[]) {
l.sort((a, b) => a - b);
const mid = Math.floor(l.length / 2);
return l.length % 2 === 0 ? (l[mid - 1] + l[mid]) / 2 : l[mid];
}
function dir2xy(d: ReadingDirPart) {
if (d === "lr" || d === "rl") return "x";
return "y";
}
function smallest<I>(l: I[], f: (a: I) => number) {
let min = Number.POSITIVE_INFINITY;
let minIndex = -1;
for (let i = 0; i < l.length; i++) {
const v = f(l[i]);
if (v < min) {
min = v;
minIndex = i;
}
}
return l[minIndex];
}
const tipV: Record<ReadingDirPart, VectorType> = {
lr: [1, 0],
rl: [-1, 0],
tb: [0, 1],
bt: [0, -1],
};
/** 坐标系变换 */
function transXY(old: ReadingDir, target: ReadingDir) {
const oX = tipV[old.inline];
const oY = tipV[old.block];
const tX = tipV[target.inline];
const tY = tipV[target.block];
const tInOX = [Vector.dotMup(tX, oX), Vector.dotMup(tX, oY)] as VectorType;
const tInOY = [Vector.dotMup(tY, oX), Vector.dotMup(tY, oY)] as VectorType;
return (p: pointType) => {
return [Vector.dotMup(p, tInOX), Vector.dotMup(p, tInOY)] as pointType;
};
}
function transBox(old: ReadingDir, target: ReadingDir) {
const t = transXY(old, target);
return {
b: (b: BoxType) => {
for (const p of b) {
const [a, b] = t(p);
p[0] = a;
p[1] = b;
}
},
p: t,
};
}
function reOrderBox(map: number[]) {
return (b: BoxType) => {
const newB: BoxType = [
[0, 0],
[0, 0],
[0, 0],
[0, 0],
];
for (let i = 0; i < map.length; i++) {
newB[i] = b[map[i]];
}
return newB;
};
}
function r(point: pointType, point2: pointType) {
return Math.sqrt((point[0] - point2[0]) ** 2 + (point[1] - point2[1]) ** 2);
}
function outerRect(boxes: BoxType[]) {
const points = boxes.flatMap((i) => i.map((i) => i));
const x1 = Math.min(...points.map((p) => Vector.dotMup(p, baseVector.inline)));
const x2 = Math.max(...points.map((p) => Vector.dotMup(p, baseVector.inline)));
const y1 = Math.min(...points.map((p) => Vector.dotMup(p, baseVector.block)));
const y2 = Math.max(...points.map((p) => Vector.dotMup(p, baseVector.block)));
const o = Vector.add(Vector.numMup(baseVector.inline, x1), Vector.numMup(baseVector.block, y1));
const w = Vector.numMup(baseVector.inline, x2 - x1);
const h = Vector.numMup(baseVector.block, y2 - y1);
return [o, Vector.add(o, w), Vector.add(Vector.add(o, w), h), Vector.add(o, h)] as BoxType;
}
function pushColumn(b: resultType[0]) {
let nearest: number | null = null;
let _jl = Number.POSITIVE_INFINITY;
for (const i in columns) {
const last = columns[i].src.at(-1);
if (!last) continue;
const jl = r(b.box[0], last.box[0]);
if (jl < _jl) {
nearest = Number(i);
_jl = jl;
}
}
if (nearest === null) {
columns.push({ src: [b] });
return;
}
const last = columns[nearest].src.at(-1) as resultType[0]; // 前面已经遍历过了,有-1的才能赋值到nearest
const thisW = Box.inlineSize(b.box);
const lastW = Box.inlineSize(last.box);
const minW = Math.min(thisW, lastW);
const em = Box.blockSize(b.box);
if (
// 左右至少有一边是相近的,中心距离要相近
// 行之间也不要离太远
(Box.inlineStartDis(b.box, last.box) < 3 * em ||
Box.inlineEndDis(b.box, last.box) < 3 * em ||
Point.disByV(Box.center(b.box), Box.center(last.box), "inline") < minW * 0.4) &&
Box.blockGap(b.box, last.box) < em * 1.1
) {
} else {
columns.push({ src: [b] });
return;
}
columns[nearest].src.push(b);
}
function joinResult(p: resultType) {
const cjkv = /\p{Ideographic}/u;
const cjkf = /[。,!?;:“”‘’《》、【】()…—]/;
const res: resultType[0] = {
box: outerRect(p.map((i) => i.box)),
text: "",
mean: average2(p.map((i) => [i.mean, i.text.length])),
style: p[0].style,
};
for (const i of p) {
const lastChar = res.text.at(-1);
if (
lastChar &&
((!lastChar.match(cjkv) && !lastChar.match(cjkf)) ||
(!i.text.at(0)?.match(cjkv) && !i.text.at(0)?.match(cjkf)))
)
res.text += " ";
res.text += i.text;
}
return res satisfies resultType[0];
}
function sortCol(cs: { src: resultType; outerBox: BoxType }[]) {
// 重新排序
// 先按block排序,block相近的inline排序
cs.sort((a, b) => {
const em = a.src.at(0) ? Box.blockSize(a.src.at(0)!.box) : 2;
if (Point.disByV(Box.blockStart(a.outerBox), Box.blockStart(b.outerBox), "block") < em) {
return Point.compare(Box.inlineStart(a.outerBox), Box.inlineStart(b.outerBox), "inline");
}
return Point.compare(Box.blockStart(a.outerBox), Box.blockStart(b.outerBox), "block");
});
}
if (op?.columnsTip) {
for (const i of op.columnsTip) colTip.push(structuredClone(i));
}
// 获取角度 竖排 横排
/** 以x轴为正方向,图形学坐标 */
const rAngle = {
inline: 0,
block: 90,
};
const inlineAngles = l.map((i) => {
const b = i.box;
const w = b[1][0] - b[0][0];
const h = b[3][1] - b[0][1];
let v = { x: 0, y: 0 };
if (w < h) {
const p = Vector.fromPonts(Point.center(b[2], b[3]), Point.center(b[0], b[1]));
v = { x: p[0], y: p[1] };
} else {
const p = Vector.fromPonts(Point.center(b[1], b[2]), Point.center(b[0], b[3]));
v = { x: p[0], y: p[1] };
}
const a = normalAngle(Math.atan2(v.y, v.x) * (180 / Math.PI));
return a;
});
const firstAngleAnalysis = averLineAngles(inlineAngles);
// 排除正交的
const filterAngles = inlineAngles.filter((i) => lineAngleNear(i, firstAngleAnalysis.av));
const md = median(filterAngles);
const MAD = median(filterAngles.map((i) => Math.abs(i - md)));
const filterAngles1 = filterAngles.filter((i) => Math.abs((i - md) / (MAD * 1.4826)) < 2);
const inlineangle = normalAngle(averLineAngles(filterAngles1).av);
log("dir0", inlineAngles, firstAngleAnalysis, filterAngles, filterAngles1, inlineangle);
const blockangle = normalAngle(inlineangle + 90);
const inlineDir = lineAngleNear(inlineangle, 0) ? "x" : "y";
const blockDir = lineAngleNear(blockangle, 90) ? "y" : "x";
const fdir = dirs.find((d) => inlineDir === dir2xy(d.inline) && blockDir === dir2xy(d.block)) ?? dirs.at(0);
if (fdir) {
dir.block = fdir.block;
dir.inline = fdir.inline;
}
const tipAngle: Record<ReadingDirPart, number> = {
lr: 0,
rl: 180,
tb: 90,
bt: 270,
};
rAngle.inline = smallest([inlineangle, inlineangle - 360, inlineangle - 180, inlineangle + 180], (a) =>
Math.abs(a - tipAngle[dir.inline]),
);
rAngle.block = smallest([blockangle, blockangle - 360, blockangle - 180, blockangle + 180], (a) =>
Math.abs(a - tipAngle[dir.block]),
);
dirVector.inline = [Math.cos(rAngle.inline * (Math.PI / 180)), Math.sin(rAngle.inline * (Math.PI / 180))];
dirVector.block = [Math.cos(rAngle.block * (Math.PI / 180)), Math.sin(rAngle.block * (Math.PI / 180))];
log("dir", dir, rAngle, dirVector, inlineangle, blockangle);
// 按照阅读方向,把box内部点重新排序
const reOrderMapX = [
[dir.inline[0], dir.block[0]],
[dir.inline[1], dir.block[0]],
[dir.inline[1], dir.block[1]],
[dir.inline[0], dir.block[1]],
];
const reOrderMap = reOrderMapX.map(
([i, b]) =>
({
lt: 0,
rt: 1,
rb: 2,
lb: 3,
})[i === "l" || i === "r" ? i + b : b + i],
) as number[];
const xyT = transBox({ inline: "lr", block: "tb" }, dir);
const reOrderBoxT = reOrderBox(reOrderMap);
const logicL = l.map((i) => {
const newBox = reOrderBoxT(i.box);
xyT.b(newBox);
return {
...i,
box: newBox,
};
});
for (const i of colTip) {
i.box = reOrderBoxT(i.box);
xyT.b(i.box);
}
// 不考虑整体旋转,只考虑倾斜
baseVector.inline = xyT.p(dirVector.inline);
baseVector.block = xyT.p(dirVector.block);
log("相对坐标系", baseVector);
// 分析那些是同一水平的
const newL_ = logicL.sort((a, b) => Point.compare(Box.blockStart(a.box), Box.blockStart(b.box), "block"));
const newLZ: { line: { src: resultType[0]; colId: number }[] }[] = [];
for (const j of newL_) {
const colId = findColId(j.box);
const last = newLZ.at(-1)?.line.at(-1);
if (!last) {
newLZ.push({ line: [{ src: j, colId }] });
continue;
}
const thisC = Box.center(j.box);
const lastC = Box.center(last.src.box);
if (Point.disByV(thisC, lastC, "block") < 0.5 * Box.blockSize(j.box)) {
const lLast = newLZ.at(-1);
if (!lLast) {
newLZ.push({ line: [{ src: j, colId }] });
} else {
lLast.line.push({ src: j, colId });
}
} else {
newLZ.push({ line: [{ src: j, colId }] });
}
}
// 根据距离,合并或保持拆分
// 有些近,是同一行;有些远,但在水平线上,说明是其他栏的
const newL: { src: resultType[0]; colId: number }[] = [];
for (const l of newLZ) {
if (l.line.length === 1) {
newL.push({ src: l.line[0].src, colId: l.line[0].colId });
continue;
}
const em = average(l.line.map((i) => Box.blockSize(i.src.box)));
l.line.sort((a, b) => Point.compare(Box.inlineStart(a.src.box), Box.inlineStart(b.src.box), "inline"));
let last = l.line.at(0)!;
for (const this_ of l.line.slice(1)) {
const lastBoxInlineEnd = Box.inlineEnd(last.src.box);
const thisInlineStart = Box.inlineStart(this_.src.box);
if (
colTip[this_.colId].type === "table" ||
this_.colId !== last.colId ||
Point.toInline(thisInlineStart) - Point.toInline(lastBoxInlineEnd) > em
) {
newL.push({ ...last });
last = this_;
} else {
last.src.text += this_.src.text;
last.src.mean = (last.src.mean + this_.src.mean) / 2;
last.src.box = outerRect([last.src.box, this_.src.box]);
}
}
newL.push({ ...last });
}
// todo 分割线为边界
// 分栏
// 按很细的粒度去分栏
const columns: { src: resultType }[] = [];
const defaultNewL: typeof newL = [];
const noDefaultColumns: { src: resultType; type: columnType; colId: number }[] = [];
for (const l of newL) {
if (l.colId === defaultColId) {
defaultNewL.push(l);
} else {
const col = noDefaultColumns.find((i) => i.colId === l.colId);
if (col) {
col.src.push(l.src);
} else {
noDefaultColumns.push({ src: [l.src], type: colTip[l.colId].type, colId: l.colId });
}
}
}
defaultNewL.sort((a, b) => Point.compare(Box.blockStart(a.src.box), Box.blockStart(b.src.box), "block"));
for (const b of defaultNewL) {
pushColumn(b.src);
}
// 合并栏,合并上面细粒度的
const columnsInYaxis: {
smallCol: { src: resultType; outerBox: BoxType; x: number; w: number }[];
}[] = [];
for (const [i, col] of columns.entries()) {
const c = col.src;
const outer = outerRect(c.map((b) => b.box));
const x = Box.blockCenter(outer);
const w = Box.inlineSize(outer);
if (i === 0) {
columnsInYaxis.push({ smallCol: [{ src: c, outerBox: outer, x, w }] });
continue;
}
const l = columnsInYaxis.find((oc) => {
const r = oc.smallCol.at(-1)!;
const em = Box.blockSize(c.at(0)!.box);
// 这里还是很严格,所以需要下面的标题合并、末尾合并、和交错合并
if (
Box.inlineStartDis(r.outerBox, outer) < 3 * em &&
Box.inlineEndDis(r.outerBox, outer) < 3 * em &&
Box.blockGap(outer, r.outerBox) < em * 2.1
)
return true;
return false;
});
if (l) {
l.smallCol.push({ src: c, outerBox: outer, x, w });
} else {
columnsInYaxis.push({ smallCol: [{ src: c, outerBox: outer, x, w }] });
}
}
for (const y of columnsInYaxis) {
y.smallCol.sort((a, b) => Point.compare(Box.blockStart(a.outerBox), Box.blockStart(b.outerBox), "block"));
}
for (const c of noDefaultColumns) {
c.src.sort((a, b) => Point.compare(Box.blockStart(a.box), Box.blockStart(b.box), "block"));
}
// columnsInYaxis新的表达形式,结构没变
const newColumns: { src: resultType; outerBox: BoxType; type: columnType }[] = [];
for (const c of columnsInYaxis) {
const o = outerRect(c.smallCol.map((i) => i.outerBox));
const s = c.smallCol.flatMap((i) => i.src);
newColumns.push({ src: s, outerBox: o, type: "none" });
}
sortCol(newColumns);
// 宽度相近的行都合并了,但有两种不合并的,以行20字为例子:(1)20,20,2,20,20 (2)20,20,10,10,10,10,20]
// 分别为段末和分栏
// 合并情况:中间短的行数多
const mergedColumns: typeof newColumns = [];
for (const c of newColumns) {
const last = mergedColumns.at(-1);
if (!last) {
mergedColumns.push(c);
continue;
}
if (last.type !== "none") {
mergedColumns.push(c);
continue;
}
const lastOuter = last.outerBox;
const em = Box.blockSize(c.src[0].box);
if (
(last.src.length === 1 && Box.inlineStartDis(lastOuter, c.out