UNPKG

esearch-ocr

Version:

paddleocr models run on onnx

1,093 lines (1,092 loc) 37.4 kB
var Kt = Object.defineProperty; var Qt = (t, e, o) => e in t ? Kt(t, e, { enumerable: !0, configurable: !0, writable: !0, value: o }) : t[e] = o; var yt = (t, e, o) => Qt(t, typeof e != "symbol" ? e + "" : e, o); let Rt = (t, e) => new OffscreenCanvas(t, e); function st(t, e) { return Rt(t, e); } function Zt(t) { Rt = t; } function zt(t) { return t > 0 ? Math.floor(t) : Math.ceil(t); } function J(t, e, o) { return Math.max(e, Math.min(t, o)); } function Mt(t, e, o, s, a = "high") { return Jt(t, e, o, s, a).getImageData(0, 0, e, o); } function Jt(t, e, o, s, a = "high") { const r = H(t), u = st(e, o).getContext("2d"); return u.imageSmoothingEnabled = a !== !1, a && (u.imageSmoothingQuality = a), s === "fill" ? u.scale(Math.min(e / t.width, 1), Math.min(o / t.height, 1)) : u.scale(e / t.width, o / t.height), u.drawImage(r, 0, 0), u; } function H(t, e, o) { const s = st(e || t.width, o || t.height); return s.getContext("2d").putImageData(t, 0, 0), s; } function kt(t, e, o) { const s = t.data, a = [], r = [], i = []; let u = 0, m = 0; for (let f = 0; f < s.length; f += 4) i[m] || (i[m] = []), r[m] || (r[m] = []), a[m] || (a[m] = []), a[m][u] = (s[f] / 255 - e[0]) / o[0], r[m][u] = (s[f + 1] / 255 - e[1]) / o[1], i[m][u] = (s[f + 2] / 255 - e[2]) / o[2], u++, u === t.width && (u = 0, m++); return [i, r, a]; } class Ft { constructor(e) { yt(this, "tl", []); yt(this, "name"); this.name = e; } l(e) { const o = performance.now(); this.tl.push({ t: e, n: o }); const s = []; for (let r = 1; r < this.tl.length; r++) { const i = this.tl[r].n - this.tl[r - 1].n, u = this.tl[r - 1].t, m = s.find((f) => f.n === u); m ? (m.c++, m.d += i) : s.push({ d: i, n: u, c: 1 }); } const a = []; for (const r of s) { const i = r.c > 1 ? `${r.n}x${r.c}` : r.n; a.push(`${i} ${r.d}`); } a.push(this.tl.at(-1).t), console.log(`${this.name} ${s.map((r) => r.d).reduce((r, i) => r + i, 0)}ms: `, a.join(" ")); } } async function tn(t, e, o, s, a, r) { const { transposedData: i, image: u } = nn(t, a, r), f = (await en(i, u, e, o))[0].data, l = f.reduce((y, x) => Math.max(y, x)), d = f.findIndex((y) => y === l); return s[d]; } function nn(t, e, o) { const s = Mt(t, e, o); return { transposedData: kt(s, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), image: s }; } async function en(t, e, o, s) { const a = t.flat(Number.POSITIVE_INFINITY), r = Float32Array.from(a), i = new o.Tensor("float32", r, [1, 3, e.height, e.width]), u = {}; u[s.inputNames[0]] = i; const m = await s.run(u); return Object.values(m); } function on(t) { if (t.length === 0) throw new Error("Empty contour"); const e = sn([...t]); let o = Number.POSITIVE_INFINITY; const s = { center: { x: 0, y: 0 }, size: { width: 0, height: 0 }, angle: 0 }; for (let a = 0; a < e.length; a++) { const r = e[a], i = e[(a + 1) % e.length], u = { x: i.x - r.x, y: i.y - r.y }, m = Math.hypot(u.x, u.y), [f, l] = [u.x / m, u.y / m]; let d = Number.POSITIVE_INFINITY, y = Number.NEGATIVE_INFINITY, x = Number.POSITIVE_INFINITY, b = Number.NEGATIVE_INFINITY; for (const I of e) { const k = (I.x - r.x) * f + (I.y - r.y) * l; d = Math.min(d, k), y = Math.max(y, k); const N = -(I.x - r.x) * l + (I.y - r.y) * f; x = Math.min(x, N), b = Math.max(b, N); } const p = (y - d) * (b - x); if (p < o) { o = p; const I = (d + y) / 2, k = (x + b) / 2; s.center = { x: r.x + f * I - l * k, y: r.y + l * I + f * k }, s.size = { width: y - d, height: b - x }, s.angle = Math.atan2(l, f) * (180 / Math.PI); } } return s.size.width < s.size.height && ([s.size.width, s.size.height] = [s.size.height, s.size.width], s.angle += 90), s.angle = (s.angle % 180 + 180) % 180, s; } function sn(t) { t.sort((s, a) => s.x - a.x || s.y - a.y); const e = []; for (const s of t) { for (; e.length >= 2 && Vt(e[e.length - 2], e[e.length - 1], s) <= 0; ) e.pop(); e.push(s); } const o = []; for (let s = t.length - 1; s >= 0; s--) { const a = t[s]; for (; o.length >= 2 && Vt(o[o.length - 2], o[o.length - 1], a) <= 0; ) o.pop(); o.push(a); } return e.slice(0, -1).concat(o.slice(0, -1)); } function Vt(t, e, o) { return (e.x - t.x) * (o.y - t.y) - (e.y - t.y) * (o.x - t.x); } function cn(t, e, o = "CHAIN_APPROX_SIMPLE") { const s = t.length, a = s > 0 ? t[0].length : 0, r = Array.from({ length: s }, () => new Array(a).fill(!1)); for (let i = 0; i < s; i++) for (let u = 0; u < a; u++) if (t[i][u] !== 0 && !r[i][u] && Yt(t, u, i)) { const m = rn(t, r, u, i, o === "CHAIN_APPROX_SIMPLE"); e.push(m); } } function Yt(t, e, o) { return t[o][e] !== 0 && (o > 0 && t[o - 1][e] === 0 || o < t.length - 1 && t[o + 1][e] === 0 || e > 0 && t[o][e - 1] === 0 || e < t[0].length - 1 && t[o][e + 1] === 0); } function rn(t, e, o, s, a) { const r = []; let i = { x: o, y: s }, u = { x: o - 1, y: s }; const m = /* @__PURE__ */ new Map(), f = /* @__PURE__ */ new Map(); function l(p) { return p.x + p.y * t[0].length; } function d(p) { const I = Math.floor(p / t[0].length); return { x: p % t[0].length, y: I }; } function y(p, I) { const k = l(p), N = l(I), B = It(I.x - p.x, I.y - p.y), P = It(p.x - I.x, p.y - I.y), O = m.get(k) ?? [], v = m.get(N) ?? []; m.set(k, [...O, B]), m.set(N, [...v, P]); } function x(p) { const I = l(i); u = i, i = { x: i.x + ht[p].dx, y: i.y + ht[p].dy }, y(u, i); const N = (f.get(I) ?? []).filter((B) => B !== p); N.length > 0 ? f.set(I, N) : f.delete(I); } m.set(l(i), [It(-1, 0)]); let b = 0; do { r.push(i), e[i.y][i.x] = !0; const p = ln(t, m, i); if (p.length === 0) { if (f.size === 0) break; const [I, k] = Array.from(f.entries()).at(0), N = k[0]; i = d(I), x(N); } if (p.length >= 1) { const I = l(i); f.set(I, p); const k = p[0]; x(k); } b++; } while (b < 1e9); return a ? an(r) : r; } const ht = [ { dx: 1, dy: 0 }, // Right { dx: 1, dy: -1 }, // Top-Right { dx: 0, dy: -1 }, // Top { dx: -1, dy: -1 }, // Top-Left { dx: -1, dy: 0 }, // Left { dx: -1, dy: 1 }, // Bottom-Left { dx: 0, dy: 1 }, // Bottom { dx: 1, dy: 1 } // Bottom-Right ]; function ln(t, e, o) { function s(i) { return i.x + i.y * t[0].length; } const a = e.get(s(o)) ?? [], r = []; for (const [i, { dx: u, dy: m }] of ht.entries()) { if (a.includes(i)) continue; const f = o.x + u, l = o.y + m; f >= 0 && f < t[0].length && l >= 0 && l < t.length && Yt(t, f, l) && r.push(i); } return r; } function It(t, e) { const o = ht.findIndex(({ dx: s, dy: a }) => t === s && e === a); return o === -1 ? 0 : o; } function an(t) { if (t.length < 3) return [...t]; const e = [t[0]]; for (let o = 1; o < t.length - 1; o++) { const s = e[e.length - 1], a = t[o], r = t[o + 1]; un(s, a, r) || e.push(a); } return e.push(t[t.length - 1]), e; } function un(t, e, o) { return (e.x - t.x) * (o.y - e.y) === (e.y - t.y) * (o.x - e.x); } const G = new Ft("t"), F = new Ft("af_det"); let L = !1, Ct = !1, q = null; function ot(t, e) { var s; const o = document.createElement("canvas"); o.width = t.width, o.height = t.height, o.getContext("2d").drawImage(t, 0, 0), e && (o.id = e); try { (s = document == null ? void 0 : document.body) == null || s.append(o); } catch { } } let ft = (t, e, o) => new ImageData(t, e, o); function z(...t) { Ct && console.log(...t); } function hn(...t) { Ct && console.log(t.map((e) => `%c${e}`).join(""), ...t.map((e) => `color: ${e}`)); } async function Ln(t) { fn(t); const e = { det: "det" in t ? t.det : { input: t.detPath, ratio: t.detRatio, on: async (s) => { t.onDet && t.onDet(s), t.onProgress && t.onProgress("det", 1, 1); } }, rec: "rec" in t ? t.rec : { input: t.recPath, decodeDic: t.dic, imgh: t.imgh, on: async (s, a, r) => { t.onRec && t.onRec(s, a), t.onProgress && t.onProgress("rec", r, s + 1); } }, docCls: "rec" in t ? t.docCls : t.docClsPath ? { input: t.docClsPath } : void 0, analyzeLayout: "rec" in t ? t.analyzeLayout : { columnsTip: t.columnsTip, docDirs: t.docDirs }, ...t }, o = await xn(e); return q = o, o; } function fn(t) { L = !!t.dev, Ct = L || !!t.log, L || (G.l = () => { }, F.l = () => { }), t.canvas && Zt(t.canvas), t.imageData && (ft = t.imageData); } async function dn(t) { let e; if (typeof window > "u") { const o = t; if (!o.data || !o.width || !o.height) throw new Error("invalid image data"); return o; } if (typeof t == "string" ? (e = new Image(), e.src = t, await new Promise((o) => { e.onload = o; })) : (t instanceof ImageData, e = t), e instanceof HTMLImageElement) { const s = st(e.naturalWidth, e.naturalHeight).getContext("2d"); if (!s) throw new Error("canvas context is null"); s.drawImage(e, 0, 0), e = s.getImageData(0, 0, e.naturalWidth, e.naturalHeight); } if (e instanceof HTMLCanvasElement) { const o = e.getContext("2d"); if (!o) throw new Error("canvas context is null"); e = o.getImageData(0, 0, e.width, e.height); } return e; } function Nt() { try { st(1, 1), ft(new Uint8ClampedArray(4), 1, 1); } catch (t) { throw console.log("nodejs need set canvas, please use setOCREnv to set canvas and imageData"), t; } } async function Rn(t) { if (!q) throw new Error("need init"); return q.ocr(t); } async function Fn(t) { if (!q) throw new Error("need init"); return q.det(t); } async function Yn(t) { if (!q) throw new Error("need init"); return q.rec(t); } async function xn(t) { Nt(); const e = { ort: t.ort, ortOption: t.ortOption }, o = t.docCls ? await mn({ ...t.docCls, ...e }) : void 0, s = await gn({ ...t.det, ...e }), a = await bn({ ...t.rec, ...e }); return { ocr: async (r) => { let i = await dn(r), u = 0; o && (u = await o.docCls(i), z("dir", u), i = Gt(i, 360 - u)); const m = await s.det(i), f = await a.rec(m), l = _n(f, t.analyzeLayout); return z(f, l), G.l("end"), { src: f, ...l, docDir: u }; }, det: s.det, rec: a.rec }; } function St(t, e, o) { return t.InferenceSession.create(e, o); } async function mn(t) { const e = await St(t.ort, t.input, t.ortOption); return { docCls: async (s) => tn(s, t.ort, e, [0, 90, 180, 270], 224, 224) }; } async function gn(t) { Nt(); let e = 1; const o = await St(t.ort, t.input, t.ortOption); t.ratio !== void 0 && (e = t.ratio); async function s(a) { var x; const r = a; if (L) { const b = H(r); ot(b); } G.l("pre_det"); const { data: i, width: u, height: m } = In(r, e), { transposedData: f, image: l } = i; G.l("det"); const d = await pn(f, l, o, t.ort); G.l("aft_det"); const y = wn( { data: d.data, width: d.dims[3], height: d.dims[2] }, u, m, r ); return (x = t == null ? void 0 : t.on) == null || x.call(t, y), y; } return { det: s }; } async function bn(t) { var i; Nt(); let e = 48; const o = await St(t.ort, t.input, t.ortOption), s = t.decodeDic.split(/\r\n|\r|\n/) || []; s.at(-1) === "" ? s[s.length - 1] = " " : s.push(" "), t.imgh && (e = t.imgh); const a = ((i = t.optimize) == null ? void 0 : i.space) === void 0 ? !0 : t.optimize.space; async function r(u) { var l; const m = []; G.l("bf_rec"); const f = Pn(u, e); for (const [d, y] of f.entries()) { const { b: x, imgH: b, imgW: p } = y, I = await yn(x, b, p, o, t.ort), k = An(I, s, { opm: { space: a } })[0]; m.push({ text: k.text, mean: k.mean, box: u[d].box, style: u[d].style }), (l = t == null ? void 0 : t.on) == null || l.call(t, d, k, u.length); } return G.l("rec_end"), m.filter((d) => d.mean >= 0.5); } return { rec: r }; } async function pn(t, e, o, s) { const a = Float32Array.from(t.flat(3)), r = new s.Tensor("float32", a, [1, 3, e.height, e.width]), i = {}; return i[o.inputNames[0]] = r, (await o.run(i))[o.outputNames[0]]; } async function yn(t, e, o, s, a) { const r = Float32Array.from(t.flat(3)), i = new a.Tensor("float32", r, [1, 3, e, o]), u = {}; return u[s.inputNames[0]] = i, (await s.run(u))[s.outputNames[0]]; } function In(t, e) { const o = Math.max(Math.round(t.height * e / 32) * 32, 32), s = Math.max(Math.round(t.width * e / 32) * 32, 32); if (L) { const i = H(t); ot(i); } const a = Mt(t, s, o, "fill"), r = kt(a, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]); if (z(a), L) { const i = H(a); ot(i); } return { data: { transposedData: r, image: a }, width: s, height: o }; } function wn(t, e, o, s) { F.l(""); const a = Math.min(s.width, e), r = Math.min(s.height, o), { data: i, width: u, height: m } = t, f = new Uint8Array(u * m); for (let x = 0; x < i.length; x++) { const b = i[x] > 0.3 ? 255 : 0; f[x] = b; } if (L) { const x = new Uint8ClampedArray(u * m * 4); for (let I = 0; I < i.length; I++) { const k = I * 4, N = i[I] > 0.3 ? 255 : 0; x[k] = x[k + 1] = x[k + 2] = N, x[k + 3] = 255, f[I] = N; } const b = ft(x, u, m), p = H(b); ot(p, "det_ru"); } F.l("edge"); const l = [], d = []; for (let x = 0; x < m; x++) d.push(Array.from(f.slice(x * u, x * u + u))); const y = []; if (cn(d, y), L) { const x = document.querySelector("#det_ru").getContext("2d"); for (const b of y) { x.moveTo(b[0].x, b[0].y); for (const p of b) x.lineTo(p.x, p.y); x.strokeStyle = "red", x.closePath(), x.stroke(); } } for (let x = 0; x < y.length; x++) { F.l("get_box"); const b = 3, p = y[x], { points: I, sside: k } = Sn(p); if (k < b) continue; const N = Cn(I), B = N.points; if (N.sside < b + 2) continue; const P = s.width / a, O = s.height / r; for (let _ = 0; _ < B.length; _++) B[_][0] *= P, B[_][1] *= O; F.l("order"); const v = Tn(B); for (const _ of v) _[0] = J(Math.round(_[0]), 0, s.width), _[1] = J(Math.round(_[1]), 0, s.height); const dt = zt(Lt(v[0], v[1])), xt = zt(Lt(v[0], v[3])); if (dt <= 3 || xt <= 3) continue; On(B, "", "red", "det_ru"), F.l("crop"); const W = Bn(s, B); F.l("match best"); const { bg: A, text: $ } = Dn(W), ct = En(B, W, $); l.push({ box: ct, img: W, style: { bg: A, text: $ } }); } return F.l("e"), z(l), l; } function Mn(t) { let e = -1; const o = t.length; let s, a = t[o - 1], r = 0; for (; ++e < o; ) s = a, a = t[e], r += s[1] * a[0] - s[0] * a[1]; return r / 2; } function kn(t) { let e = -1; const o = t.length; let s = t[o - 1], a, r, i = s[0], u = s[1], m = 0; for (; ++e < o; ) a = i, r = u, s = t[e], i = s[0], u = s[1], a -= i, r -= u, m += Math.hypot(a, r); return m; } function Cn(t) { const o = Math.abs(Mn(t)), s = kn(t), a = o * 1.5 / s, r = []; for (const [f, l] of t.entries()) { const d = t.at((f - 1) % 4), y = t.at((f + 1) % 4), x = l[0] - d[0], b = l[1] - d[1], p = Math.sqrt(x ** 2 + b ** 2), I = x / p * a, k = b / p * a, N = l[0] - y[0], B = l[1] - y[1], P = Math.sqrt(N ** 2 + B ** 2), O = N / P * a, v = B / P * a; r.push([l[0] + I + O, l[1] + k + v]); } const i = [r[0][0] - r[1][0], r[0][1] - r[1][1]], u = [r[2][0] - r[1][0], r[2][1] - r[1][1]], m = i[0] * u[1] - i[1] * u[0]; return { points: r, sside: Math.abs(m) }; } function Nn(t, e, o) { const s = e.width, a = e.height, r = o * Math.PI / 180, i = Math.cos(r), u = Math.sin(r), m = t.x, f = t.y, l = s * 0.5, d = a * 0.5, y = [], x = m - l * i + d * u, b = f - l * u - d * i; y.push([x, b]); const p = m + l * i + d * u, I = f + l * u - d * i; y.push([p, I]); const k = m + l * i - d * u, N = f + l * u + d * i; y.push([k, N]); const B = m - l * i - d * u, P = f - l * u + d * i; return y.push([B, P]), y; } function Sn(t) { const o = on(t), s = Array.from(Nn(o.center, o.size, o.angle)).sort( (l, d) => l[0] - d[0] ); let a = 0, r = 1, i = 2, u = 3; s[1][1] > s[0][1] ? (a = 0, u = 1) : (a = 1, u = 0), s[3][1] > s[2][1] ? (r = 2, i = 3) : (r = 3, i = 2); const m = [s[a], s[r], s[i], s[u]], f = Math.min(o.size.height, o.size.width); return { points: m, sside: f }; } function Lt(t, e) { return Math.sqrt((t[0] - e[0]) ** 2 + (t[1] - e[1]) ** 2); } function Tn(t) { const e = [ [0, 0], [0, 0], [0, 0], [0, 0] ], o = t.map((r) => r[0] + r[1]); e[0] = t[o.indexOf(Math.min(...o))], e[2] = t[o.indexOf(Math.max(...o))]; const s = t.filter((r) => r !== e[0] && r !== e[2]), a = s[1].map((r, i) => r - s[0][i]); return e[1] = s[a.indexOf(Math.min(...a))], e[3] = s[a.indexOf(Math.max(...a))], e; } function Bn(t, e) { const [o, s, a, r] = e.map((v) => ({ x: v[0], y: v[1] })), i = Math.sqrt((s.x - o.x) ** 2 + (s.y - o.y) ** 2), u = Math.sqrt((r.x - o.x) ** 2 + (r.y - o.y) ** 2), m = s.x - o.x, f = s.y - o.y, l = r.x - o.x, d = r.y - o.y, y = m * d - l * f; if (y === 0) throw new Error("点共线,无法形成矩形"); const x = i * d / y, b = -l * i / y, p = -u * f / y, I = m * u / y, k = -x * o.x - b * o.y, N = -p * o.x - I * o.y, B = H(t), P = st(Math.ceil(i), Math.ceil(u)), O = P.getContext("2d"); return O.setTransform(x, p, b, I, k, N), O.drawImage(B, 0, 0), O.resetTransform(), O.getImageData(0, 0, P.width, P.height); } function Dn(t) { var m, f; const e = /* @__PURE__ */ new Map(), o = t.data; for (let l = 0; l < o.length; l += 4) { if (l / 4 % t.width > t.height * 4) continue; const y = o[l], x = o[l + 1], b = o[l + 2], p = [y, x, b].join(","); e.set(p, (e.get(p) || 0) + 1); } const s = vn(e, 20).map((l) => ({ el: l.el.split(",").map(Number), count: l.count })), a = ((m = s.at(0)) == null ? void 0 : m.el) || [255, 255, 255], r = ((f = s.at(1)) == null ? void 0 : f.el) || [0, 0, 0]; let i = r; const u = 100; if (at(r, a) < u) { const l = s.slice(1).filter((d) => at(d.el, a) > 50); l.length > 0 && (i = [0, 1, 2].map( (d) => Math.round(jt(l.map((y) => [y.el[d], y.count]))) )), (l.length === 0 || at(i, a) < u) && (i = a.map((d) => 255 - d)), hn(`rgb(${i.join(",")})`); } return { bg: a, text: i, textEdge: r }; } function at(t, e) { const o = t, s = e; return Math.sqrt((o[0] - s[0]) ** 2 + (o[1] - s[1]) ** 2 + (o[2] - s[2]) ** 2); } function vn(t, e = 1) { let o = []; return t.forEach((s, a) => { o.length === 0 ? o.push({ el: a, count: s }) : (o.length < e ? o.push({ el: a, count: s }) : o.find((r) => r.count <= s) && o.push({ el: a, count: s }), o.sort((r, i) => i.count - r.count), o.length > e && (o = o.slice(0, e))); }), o; } function En(t, e, o) { let s = 0, a = e.height, r = 0, i = e.width; function u(x) { return at(x, o) < 200; } t: for (let x = s; x < e.height; x++) for (let b = 0; b < e.width; b++) { const p = lt(e, b, x); if (u(p)) { s = x; break t; } } t: for (let x = a - 1; x >= 0; x--) for (let b = 0; b < e.width; b++) { const p = lt(e, b, x); if (u(p)) { a = x; break t; } } t: for (let x = r; x < e.width; x++) for (let b = s; b <= a; b++) { const p = lt(e, x, b); if (u(p)) { r = x; break t; } } t: for (let x = i - 1; x >= 0; x--) for (let b = s; b <= a; b++) { const p = lt(e, x, b); if (u(p)) { i = x; break t; } } const m = J(s - 1, 0, 4), f = J(e.height - a - 1, 0, 4), l = J(r - 1, 0, 4), d = J(e.width - i - 1, 0, 4); return [ [t[0][0] + l, t[0][1] + m], [t[1][0] - d, t[1][1] + m], [t[2][0] - d, t[2][1] - f], [t[3][0] + l, t[3][1] - f] ]; } function lt(t, e, o) { const s = (o * t.width + e) * 4; return Array.from(t.data.slice(s, s + 4)); } function Pn(t, e) { const o = []; function s(a) { const r = Math.floor(e * (a.width / a.height)), i = Mt(a, r, e, void 0, !1); return L && ot(H(i, r, e)), { data: i, w: r, h: e }; } for (const a of t) { let r = a.img; r.width < r.height && (r = Gt(r, -90)); const i = s(r); o.push({ b: kt(i.data, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), imgH: i.h, imgW: i.w }); } return z(o), o; } function An(t, e, o) { const s = t.dims[2], a = []; let r = t.dims[0] - 1; function i(m) { return e.at(m - 1) ?? ""; } for (let m = 0; m < t.data.length; m += s * t.dims[1]) { const f = [], l = []; for (let d = m; d < m + s * t.dims[1]; d += s) { const y = t.data.slice(d, d + s); let x = Number.NEGATIVE_INFINITY, b = -1, p = Number.NEGATIVE_INFINITY, I = -1; for (let k = 0; k < y.length; k++) { const N = y[k]; N > x ? (p = x, x = N, b = k) : N > p && N < x && (p = N, I = k); } o.opm.space && b === 0 && i(I) === " " && p > 1e-3 && (x = p, b = I), l.push(x), f.push(b); } a[r] = u(f, l), r--; } function u(m, f) { const l = [], d = []; for (let b = 0; b < m.length; b++) m[b] !== 0 && (b > 0 && m[b - 1] === m[b] || (l.push(i(m[b])), d.push(f[b]))); let y = "", x = 0; if (l.length) { y = l.join("").trim(); let b = 0; for (const p of d) b += p; x = b / d.length; } return { text: y, mean: x }; } return a; } function _n(t, e) { var _t; z(t); const o = (e == null ? void 0 : e.docDirs) ?? [ { block: "tb", inline: "lr" }, { block: "rl", inline: "tb" } ], s = { block: "tb", inline: "lr" }, a = { inline: [1, 0], block: [0, 1] }, r = { inline: [1, 0], block: [0, 1] }; if (t.length === 0) return { columns: [], parragraphs: [], readingDir: s, angle: { reading: { inline: 0, block: 90 }, angle: 0 } }; const i = [ { box: [ [Number.NEGATIVE_INFINITY, Number.NEGATIVE_INFINITY], [Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY], [Number.POSITIVE_INFINITY, Number.POSITIVE_INFINITY], [Number.NEGATIVE_INFINITY, Number.POSITIVE_INFINITY] ], type: "none" } ], u = 0; function m(n) { const c = l.center(n); for (let h = i.length - 1; h >= 0; h--) { const w = i[h].box; if (c[0] >= w[0][0] && c[0] <= w[1][0] && c[1] >= w[0][1] && c[1] <= w[3][1]) return h; } return u; } const f = { center: (n, c) => [(n[0] + c[0]) / 2, (n[1] + c[1]) / 2], disByV: (n, c, h) => Math.abs(h === "block" ? d.dotMup(n, r.block) - d.dotMup(c, r.block) : d.dotMup(n, r.inline) - d.dotMup(c, r.inline)), compare: (n, c, h) => h === "block" ? d.dotMup(n, r.block) - d.dotMup(c, r.block) : d.dotMup(n, r.inline) - d.dotMup(c, r.inline), toInline: (n) => d.dotMup(n, r.inline), toBlock: (n) => d.dotMup(n, r.block) }, l = { inlineStart: (n) => f.center(n[0], n[3]), inlineEnd: (n) => f.center(n[1], n[2]), blockStart: (n) => f.center(n[0], n[1]), blockEnd: (n) => f.center(n[2], n[3]), inlineSize: (n) => n[1][0] - n[0][0], blockSize: (n) => n[3][1] - n[0][1], inlineStartDis: (n, c) => f.disByV(n[0], c[0], "inline"), inlineEndDis: (n, c) => f.disByV(n[1], c[1], "inline"), blockGap: (n, c) => f.disByV(n[0], c[3], "block"), inlineCenter: (n) => (n[2][0] + n[0][0]) / 2, blockCenter: (n) => (n[2][1] + n[0][1]) / 2, inlineStartCenter: (n) => l.inlineStart(n), center: (n) => f.center(n[0], n[2]) }, d = { fromPonts: (n, c) => [n[0] - c[0], n[1] - c[1]], dotMup: (n, c) => n[0] * c[0] + n[1] * c[1], numMup: (n, c) => [n[0] * c, n[1] * c], add: (n, c) => [n[0] + c[0], n[1] + c[1]] }; function y(n) { let c = 0, h = 0; const g = []; for (const [w, M] of n.entries()) { const C = M > 180 ? M - 180 : M, T = C - 180, E = w === 0 ? C : Math.abs(T - c) < Math.abs(C - c) ? T : C; g.push(E), c = (c * h + E) / (h + 1), h++; } return { av: c, l: g }; } function x(n, c) { return Math.abs(n - c) < 45 || Math.abs(n - (c - 180)) < 45 || Math.abs(n - 180 - c) < 45; } function b(n) { n.sort((h, g) => h - g); const c = Math.floor(n.length / 2); return n.length % 2 === 0 ? (n[c - 1] + n[c]) / 2 : n[c]; } function p(n) { return n === "lr" || n === "rl" ? "x" : "y"; } function I(n, c) { let h = Number.POSITIVE_INFINITY, g = -1; for (let w = 0; w < n.length; w++) { const M = c(n[w]); M < h && (h = M, g = w); } return n[g]; } const k = { lr: [1, 0], rl: [-1, 0], tb: [0, 1], bt: [0, -1] }; function N(n, c) { const h = k[n.inline], g = k[n.block], w = k[c.inline], M = k[c.block], C = [d.dotMup(w, h), d.dotMup(w, g)], T = [d.dotMup(M, h), d.dotMup(M, g)]; return (E) => [d.dotMup(E, C), d.dotMup(E, T)]; } function B(n, c) { const h = N(n, c); return { b: (g) => { for (const w of g) { const [M, C] = h(w); w[0] = M, w[1] = C; } }, p: h }; } function P(n) { return (c) => { const h = [ [0, 0], [0, 0], [0, 0], [0, 0] ]; for (let g = 0; g < n.length; g++) h[g] = c[n[g]]; return h; }; } function O(n, c) { return Math.sqrt((n[0] - c[0]) ** 2 + (n[1] - c[1]) ** 2); } function v(n) { const c = n.flatMap((D) => D.map((S) => S)), h = Math.min(...c.map((D) => d.dotMup(D, r.inline))), g = Math.max(...c.map((D) => d.dotMup(D, r.inline))), w = Math.min(...c.map((D) => d.dotMup(D, r.block))), M = Math.max(...c.map((D) => d.dotMup(D, r.block))), C = d.add(d.numMup(r.inline, h), d.numMup(r.block, w)), T = d.numMup(r.inline, g - h), E = d.numMup(r.block, M - w); return [C, d.add(C, T), d.add(d.add(C, T), E), d.add(C, E)]; } function dt(n) { let c = null, h = Number.POSITIVE_INFINITY; for (const E in Y) { const D = Y[E].src.at(-1); if (!D) continue; const S = O(n.box[0], D.box[0]); S < h && (c = Number(E), h = S); } if (c === null) { Y.push({ src: [n] }); return; } const g = Y[c].src.at(-1), w = l.inlineSize(n.box), M = l.inlineSize(g.box), C = Math.min(w, M), T = l.blockSize(n.box); if ( // 左右至少有一边是相近的,中心距离要相近 // 行之间也不要离太远 !((l.inlineStartDis(n.box, g.box) < 3 * T || l.inlineEndDis(n.box, g.box) < 3 * T || f.disByV(l.center(n.box), l.center(g.box), "inline") < C * 0.4) && l.blockGap(n.box, g.box) < T * 1.1) ) { Y.push({ src: [n] }); return; } Y[c].src.push(n); } function xt(n) { var w, M; const c = new RegExp("\\p{Ideographic}", "u"), h = /[。,!?;:“”‘’《》、【】()…—]/, g = { box: v(n.map((C) => C.box)), text: "", mean: jt(n.map((C) => [C.mean, C.text.length])), style: n[0].style }; for (const C of n) { const T = g.text.at(-1); T && (!T.match(c) && !T.match(h) || !((w = C.text.at(0)) != null && w.match(c)) && !((M = C.text.at(0)) != null && M.match(h))) && (g.text += " "), g.text += C.text; } return g; } function W(n) { n.sort((c, h) => { const g = c.src.at(0) ? l.blockSize(c.src.at(0).box) : 2; return f.disByV(l.blockStart(c.outerBox), l.blockStart(h.outerBox), "block") < g ? f.compare(l.inlineStart(c.outerBox), l.inlineStart(h.outerBox), "inline") : f.compare(l.blockStart(c.outerBox), l.blockStart(h.outerBox), "block"); }); } if (e != null && e.columnsTip) for (const n of e.columnsTip) i.push(structuredClone(n)); const A = { inline: 0, block: 90 }, $ = t.map((n) => { const c = n.box, h = c[1][0] - c[0][0], g = c[3][1] - c[0][1]; let w = { x: 0, y: 0 }; if (h < g) { const C = d.fromPonts(f.center(c[2], c[3]), f.center(c[0], c[1])); w = { x: C[0], y: C[1] }; } else { const C = d.fromPonts(f.center(c[1], c[2]), f.center(c[0], c[3])); w = { x: C[0], y: C[1] }; } return ut(Math.atan2(w.y, w.x) * (180 / Math.PI)); }), ct = y($), _ = $.filter((n) => x(n, ct.av)), Tt = b(_), Ht = b(_.map((n) => Math.abs(n - Tt))), Bt = _.filter((n) => Math.abs((n - Tt) / (Ht * 1.4826)) < 2), R = ut(y(Bt).av); z("dir0", $, ct, _, Bt, R); const X = ut(R + 90), qt = x(R, 0) ? "x" : "y", Wt = x(X, 90) ? "y" : "x", mt = o.find((n) => qt === p(n.inline) && Wt === p(n.block)) ?? o.at(0); mt && (s.block = mt.block, s.inline = mt.inline); const Dt = { lr: 0, rl: 180, tb: 90, bt: 270 }; A.inline = I( [R, R - 360, R - 180, R + 180], (n) => Math.abs(n - Dt[s.inline]) ), A.block = I( [X, X - 360, X - 180, X + 180], (n) => Math.abs(n - Dt[s.block]) ), a.inline = [Math.cos(A.inline * (Math.PI / 180)), Math.sin(A.inline * (Math.PI / 180))], a.block = [Math.cos(A.block * (Math.PI / 180)), Math.sin(A.block * (Math.PI / 180))], z("dir", s, A, a, R, X); const vt = [ [s.inline[0], s.block[0]], [s.inline[1], s.block[0]], [s.inline[1], s.block[1]], [s.inline[0], s.block[1]] ].map( ([n, c]) => ({ lt: 0, rt: 1, rb: 2, lb: 3 })[n === "l" || n === "r" ? n + c : c + n] ), rt = B({ inline: "lr", block: "tb" }, s), Et = P(vt), $t = t.map((n) => { const c = Et(n.box); return rt.b(c), { ...n, box: c }; }); for (const n of i) n.box = Et(n.box), rt.b(n.box); r.inline = rt.p(a.inline), r.block = rt.p(a.block), z("相对坐标系", r); const Xt = $t.sort((n, c) => f.compare(l.blockStart(n.box), l.blockStart(c.box), "block")), U = []; for (const n of Xt) { const c = m(n.box), h = (_t = U.at(-1)) == null ? void 0 : _t.line.at(-1); if (!h) { U.push({ line: [{ src: n, colId: c }] }); continue; } const g = l.center(n.box), w = l.center(h.src.box); if (f.disByV(g, w, "block") < 0.5 * l.blockSize(n.box)) { const M = U.at(-1); M ? M.line.push({ src: n, colId: c }) : U.push({ line: [{ src: n, colId: c }] }); } else U.push({ line: [{ src: n, colId: c }] }); } const it = []; for (const n of U) { if (n.line.length === 1) { it.push({ src: n.line[0].src, colId: n.line[0].colId }); continue; } const c = wt(n.line.map((g) => l.blockSize(g.src.box))); n.line.sort((g, w) => f.compare(l.inlineStart(g.src.box), l.inlineStart(w.src.box), "inline")); let h = n.line.at(0); for (const g of n.line.slice(1)) { const w = l.inlineEnd(h.src.box), M = l.inlineStart(g.src.box); i[g.colId].type === "table" || g.colId !== h.colId || f.toInline(M) - f.toInline(w) > c ? (it.push({ ...h }), h = g) : (h.src.text += g.src.text, h.src.mean = (h.src.mean + g.src.mean) / 2, h.src.box = v([h.src.box, g.src.box])); } it.push({ ...h }); } const Y = [], gt = [], tt = []; for (const n of it) if (n.colId === u) gt.push(n); else { const c = tt.find((h) => h.colId === n.colId); c ? c.src.push(n.src) : tt.push({ src: [n.src], type: i[n.colId].type, colId: n.colId }); } gt.sort((n, c) => f.compare(l.blockStart(n.src.box), l.blockStart(c.src.box), "block")); for (const n of gt) dt(n.src); const nt = []; for (const [n, c] of Y.entries()) { const h = c.src, g = v(h.map((T) => T.box)), w = l.blockCenter(g), M = l.inlineSize(g); if (n === 0) { nt.push({ smallCol: [{ src: h, outerBox: g, x: w, w: M }] }); continue; } const C = nt.find((T) => { const E = T.smallCol.at(-1), D = l.blockSize(h.at(0).box); return l.inlineStartDis(E.outerBox, g) < 3 * D && l.inlineEndDis(E.outerBox, g) < 3 * D && l.blockGap(g, E.outerBox) < D * 2.1; }); C ? C.smallCol.push({ src: h, outerBox: g, x: w, w: M }) : nt.push({ smallCol: [{ src: h, outerBox: g, x: w, w: M }] }); } for (const n of nt) n.smallCol.sort((c, h) => f.compare(l.blockStart(c.outerBox), l.blockStart(h.outerBox), "block")); for (const n of tt) n.src.sort((c, h) => f.compare(l.blockStart(c.box), l.blockStart(h.box), "block")); const bt = []; for (const n of nt) { const c = v(n.smallCol.map((g) => g.outerBox)), h = n.smallCol.flatMap((g) => g.src); bt.push({ src: h, outerBox: c, type: "none" }); } W(bt); const et = []; for (const n of bt) { const c = et.at(-1); if (!c) { et.push(n); continue; } if (c.type !== "none") { et.push(n); continue; } const h = c.outerBox, g = l.blockSize(n.src[0].box); c.src.length === 1 && l.inlineStartDis(h, n.outerBox) < 3 * g || // 标题 n.src.length === 1 && l.inlineStartDis(h, n.outerBox) < 3 * g || // 末尾 l.inlineStartDis(h, n.outerBox) < 3 * g && l.inlineEndDis(h, n.outerBox) < 3 * g ? (c.src.push(...n.src), c.outerBox = v(c.src.map((w) => w.box))) : et.push(n); } let pt = !1; const j = []; for (const n of et) { const c = j.at(-1), h = { ...n, reCal: !1 }; if (!c) { j.push(h); continue; } const g = l.blockSize(h.src.at(0).box); f.compare(l.blockEnd(h.outerBox), l.blockEnd(c.outerBox), "block") < 0 && (l.inlineStartDis(c.outerBox, h.outerBox) < 3 * g || l.inlineEndDis(c.outerBox, h.outerBox) < 3 * g) ? (c.src.push(...h.src), c.reCal = !0, pt = !0) : j.push(h); } for (const n of j) n.reCal && (n.src.sort((c, h) => f.compare(l.blockStart(c.box), l.blockStart(h.box), "block")), n.outerBox = v(n.src.map((c) => c.box))); tt.length && (pt = !0); for (const n of tt) { const c = v(n.src.map((g) => g.box)), h = n.src; j.push({ src: h, outerBox: c, type: n.type, reCal: !1 }); } pt && W(j); const Pt = B(s, { inline: "lr", block: "tb" }), At = j.map((n) => { const c = n.src, h = []; if (n.type === "auto" || n.type === "none") { const M = {}; for (let S = 1; S < c.length; S++) { const V = c[S - 1].box, Q = c[S].box, Z = f.disByV(l.center(Q), l.center(V), "block"); M[Z] || (M[Z] = 0), M[Z]++; } const C = wt(c.map((S) => l.blockSize(S.box))), T = [[]]; for (const S of Object.keys(M).map((V) => Number(V)).sort()) { const V = T.at(-1), Q = V.at(-1); Q !== void 0 ? Math.abs(Q - S) < C * 0.5 ? V.push(S) : T.push([]) : V.push(S); } const E = T.map((S) => wt(S)).sort((S, V) => S - V).at(0) || 0; z("d", M, T, E), h.push([c[0]]); let D = c[0]; for (let S = 1; S < c.length; S++) { const V = d.add( d.add(l.inlineStartCenter(D.box), d.numMup(r.block, E)), d.numMup(r.inline, -l.inlineStartDis(D.box, n.outerBox)) ), Q = l.inlineStartCenter(c[S].box), Z = l.blockSize(c[S].box); if (l.inlineEndDis(D.box, n.outerBox) > 2 * Z || O(V, Q) > Z * 0.5) h.push([c[S]]); else { const Ot = h.at(-1); Ot ? Ot.push(c[S]) : h.push([c[S]]); } D = c[S]; } } else (n.type === "table" || n.type === "raw" || n.type === "raw-blank") && h.push(c); for (const M of c) Pt.b(M.box); Pt.b(n.outerBox); const g = []; for (const [M, C] of vt.entries()) g[C] = M; const w = P(g); for (const M of c) M.box = w(M.box); return n.outerBox = w(n.outerBox), z(h), { src: c, outerBox: n.outerBox, parragraphs: h.map((M) => ({ src: M, parse: xt(M) })) }; }), Ut = At.flatMap((n) => n.parragraphs.map((c) => c.parse)); let K = 0; return s.inline === "lr" && (K = A.inline), s.inline === "rl" && (K = A.inline - 180), s.block === "lr" && (K = A.block), s.block === "rl" && (K = A.block - 180), z("angle", K), { columns: At, parragraphs: Ut, readingDir: s, angle: { reading: A, angle: K } }; } function wt(t) { return t.reduce((e, o) => e + o, 0) / t.length; } function jt(t) { const e = t.map((s) => s[1]).reduce((s, a) => s + a, 0); let o = 0; for (const s of t) o += s[0] * s[1] / e; return o; } function ut(t) { return (t % 360 + 360) % 360; } function Gt(t, e) { const o = ut(e); if (o === 0) return t; if (![90, 180, 270].includes(o)) throw new Error("只支持90度的旋转"); const s = new Uint8ClampedArray(t.height * t.width * 4); for (let i = 0; i < t.height; i++) for (let u = 0; u < t.width; u++) { const m = i * t.width + u, f = o === 90 ? u * t.height + (t.height - i - 1) : o === 180 ? t.width - u - 1 + (t.height - i - 1) * t.width : (t.width - u - 1) * t.height + i; s.set(t.data.slice(m * 4, m * 4 + 4), f * 4); } const a = o === 90 || o === 270 ? t.height : t.width, r = o === 90 || o === 270 ? t.width : t.height; return ft(s, a, r); } function On(t, e = "", o, s, a) { if (!L) return; const i = document.querySelector(`#${s}`).getContext("2d"); i.beginPath(), i.strokeStyle = o, i.moveTo(t[0][0], t[0][1]), i.lineTo(t[1][0], t[1][1]), i.lineTo(t[2][0], t[2][1]), i.lineTo(t[3][0], t[3][1]), i.lineTo(t[0][0], t[0][1]), i.stroke(), i.strokeStyle = "black", i.strokeText(e, t[0][0], t[0][1]); } export { _n as analyzeLayout, Fn as det, Ln as init, gn as initDet, mn as initDocDirCls, bn as initRec, dn as loadImg, Rn as ocr, Yn as rec, Gt as rotateImg, fn as setOCREnv };