ppu-paddle-ocr
Version:
Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection
2 lines • 1.09 kB
JavaScript
export function groupRecognitionResultsByLine(recognition){let result={text:"",lines:[],confidence:0};if(!recognition.length){return result}let totalConfidence=recognition.reduce((sum,r)=>sum+r.confidence,0);result.confidence=totalConfidence/recognition.length;let firstRec=recognition[0];if(!firstRec)return result;let currentLine=[firstRec];let currentLineHeightSum=firstRec.box.height;let fullText=firstRec.text;let avgHeight=firstRec.box.height;for(let i=1;i<recognition.length;i++){let current=recognition[i];let previous=recognition[i-1];if(!current||!previous)continue;let verticalGap=Math.abs(current.box.y-previous.box.y);let threshold=avgHeight*0.5;if(verticalGap<=threshold){currentLine.push(current);currentLineHeightSum+=current.box.height;fullText+=` ${current.text}`;avgHeight=currentLineHeightSum/currentLine.length}else{result.lines.push([...currentLine]);fullText+=`
${current.text}`;currentLine=[current];currentLineHeightSum=current.box.height;avgHeight=current.box.height}}if(currentLine.length>0){result.lines.push([...currentLine])}result.text=fullText;return result}