ppu-paddle-ocr
Version:
Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection
1 lines • 3.67 kB
JavaScript
export function groupBoxesIntoLines(boxes){if(boxes.length===0)return[];let sorted=[...boxes].sort((a,b)=>a.box.y-b.box.y||a.box.x-b.box.x);let lines=[];let firstSorted=sorted[0];if(!firstSorted)return[];let currentLine=[firstSorted];let currentLineHeightSum=firstSorted.box.height;let avgHeight=firstSorted.box.height;for(let i=1;i<sorted.length;i++){let current=sorted[i];let previous=sorted[i-1];if(!current||!previous)continue;let verticalGap=Math.abs(current.box.y-previous.box.y);let threshold=avgHeight*0.5;if(verticalGap<=threshold){currentLine.push(current);currentLineHeightSum+=current.box.height;avgHeight=currentLineHeightSum/currentLine.length}else{currentLine.sort((a,b)=>a.box.x-b.box.x);lines.push(currentLine);currentLine=[current];currentLineHeightSum=current.box.height;avgHeight=current.box.height}}if(currentLine.length>0){currentLine.sort((a,b)=>a.box.x-b.box.x);lines.push(currentLine)}return lines}export function mergeLineCrop(sourceCanvas,lineBoxes,createCanvas,canvasOps){let minX=Math.min(...lineBoxes.map((b)=>b.box.x));let minY=Math.min(...lineBoxes.map((b)=>b.box.y));let maxRight=Math.max(...lineBoxes.map((b)=>b.box.x+b.box.width));let maxBottom=Math.max(...lineBoxes.map((b)=>b.box.y+b.box.height));let mergedBox={x:minX,y:minY,width:maxRight-minX,height:maxBottom-minY};let commonHeight=maxBottom-minY;let commonWidth=lineBoxes.reduce((sum,b)=>sum+Math.round(b.box.width*(commonHeight/b.box.height)),0);let mergedCanvas=createCanvas(commonWidth,commonHeight);let ctx=mergedCanvas.getContext("2d");let offsetX=0;for(const{box}of lineBoxes){let cropped=canvasOps.getToolkit().crop({bbox:{x0:box.x,y0:box.y,x1:box.x+box.width,y1:box.y+box.height},canvas:sourceCanvas});let scaleX=commonHeight/box.height;let stretchedWidth=Math.round(box.width*scaleX);ctx.drawImage(cropped,0,0,box.width,box.height,offsetX,0,stretchedWidth,commonHeight);offsetX+=stretchedWidth}return{mergedCanvas,mergedBox}}export function splitBatchTextByWidths(text,cropWidths){if(cropWidths.length===1){return[text]}let totalWidth=cropWidths.reduce((a,b)=>a+b,0);let chars=[...text];let charWidth=chars.length>0?totalWidth/chars.length:0;let result=[];let charIdx=0;for(let i=0;i<cropWidths.length;i++){let proportionalChars=i<cropWidths.length-1?Math.round((cropWidths[i]??0)/charWidth):chars.length-charIdx;let end=Math.min(charIdx+proportionalChars,chars.length);result.push(chars.slice(charIdx,end).join(""));charIdx=end}return result}export function packIntoBatches(items,widthOf,targetWidth,separatorGap){let sorted=[...items].sort((a,b)=>widthOf(b)-widthOf(a));let batches=[];let widths=[];for(let item of sorted){let placed=false;for(let b=0;b<batches.length;b++){let batch=batches[b];let width=widths[b];if(batch===undefined||width===undefined)continue;let gap=separatorGap*batch.length;if(width+gap+widthOf(item)<=targetWidth){batch.push(item);widths[b]=width+widthOf(item);placed=true;break}}if(!placed){batches.push([item]);widths.push(widthOf(item))}}return batches}export function distributeLineText(boxes,lineText,confidence){if(boxes.length===1){let first=boxes[0];return[{text:lineText.trim(),box:first?.box??{x:0,y:0,width:0,height:0},confidence}]}let words=lineText.trim().split(/\s+/).filter((w)=>w.length>0);let totalBoxWidth=boxes.reduce((sum,b)=>sum+b.box.width,0);let results=[];let wordIdx=0;for(const{box}of boxes){if(wordIdx>=words.length){results.push({text:"",box,confidence});continue}let proportion=box.width/totalBoxWidth;let wordsForBox=Math.max(1,Math.round(words.length*proportion));let end=Math.min(wordIdx+wordsForBox,words.length);results.push({text:words.slice(wordIdx,end).join(" "),box,confidence});wordIdx=end}return results}