ppu-paddle-ocr
Version:
Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection
3 lines • 4.27 kB
JavaScript
export class BaseRecognitionService{options;debugging;session;platform;engine;constructor(platform,session,options={},debugging={},engine="opencv"){this.platform=platform;this.session=session;this.options={...DEFAULT_RECOGNITION_OPTIONS,...options};this.debugging={...DEFAULT_DEBUGGING_OPTIONS,...debugging};if(engine==="opencv"&&!this.platform.imageProcessor){this.engine="canvas-native"}else{this.engine=engine}}log(message){if(this.debugging.verbose){console.log(`[RecognitionService] ${message}`)}}async run(image,detection,charactersDictionary,strategy="per-line"){this.log("Starting text recognition process");try{let sourceCanvasForCrop;if(this.platform.isCanvas(image)){sourceCanvasForCrop=image}else if(this.engine==="opencv"&&this.platform.imageProcessor){sourceCanvasForCrop=await this.platform.imageProcessor.prepareCanvas(image)}else{sourceCanvasForCrop=await this.platform.canvas.prepareCanvas(image)}let validBoxes=this.filterValidBoxes(detection);if(validBoxes.length===0){return[]}let ctx=this.buildContext();switch(strategy){case"cross-line":return runCrossLineStrategy(sourceCanvasForCrop,validBoxes,ctx,charactersDictionary);case"per-line":return runLineStrategy(sourceCanvasForCrop,validBoxes,ctx,charactersDictionary);case"per-box":default:return runPerBoxStrategy(sourceCanvasForCrop,validBoxes,ctx,(canvas,box,index,total,debugPath,dict)=>this.processBox(canvas,box,index,total,debugPath,dict),charactersDictionary)}}catch(error){console.error("Error during text recognition:",error instanceof Error?error.message:String(error));return[]}}buildContext(){return{platform:this.platform,options:this.options,debugging:this.debugging,engine:this.engine,runInference:(t)=>this.runInference(t)}}filterValidBoxes(boxes){return boxes.map((box,index)=>({box,index})).filter(({box,index})=>this.isValidBox(box,index))}async processBox(sourceCanvas,box,index,totalBoxes,debugPath,charactersDictionary){let start=Date.now();try{let cropCanvas=this.platform.canvas.getToolkit().crop({bbox:{x0:box.x,y0:box.y,x1:box.x+box.width,y1:box.y+box.height},canvas:sourceCanvas});let ctx=this.buildContext();const{text:recognizedText,confidence}=await this.recognizeTextViaContext(cropCanvas,ctx,charactersDictionary);if(this.debugging.debug&&debugPath){await this.platform.saveDebugImage(cropCanvas,`crop_${String(index).padStart(3,"0")}.png`,debugPath);let processingTime=Date.now()-start;this.log(`Box ${index+1}/${totalBoxes}: [x:${box.x}, y:${box.y}, w:${box.width}, h:${box.height}]`+`
→ "${recognizedText}" (processed in ${processingTime}ms)
`)}return{text:recognizedText,box,confidence}}catch(e){let err=e instanceof Error?e:new Error(String(e));console.error(`Error processing box ${index+1}: ${err.message}`,err.stack);return null}}async recognizeTextViaContext(cropCanvas,ctx,charactersDictionary){const{preprocessImage}=await import("./recognition/image-tensor.js");const{decodeResults}=await import("./recognition/ctc.js");let targetHeight=ctx.options.imageHeight??48;let imageProcessor=ctx.engine==="opencv"?ctx.platform.imageProcessor:undefined;const{imageTensor,tensorWidth,tensorHeight}=await preprocessImage(cropCanvas,targetHeight,imageProcessor,ctx.platform.canvas.createProcessor.bind(ctx.platform.canvas));let inputTensor;try{inputTensor=new ctx.platform.ort.Tensor("float32",imageTensor,[1,3,tensorHeight,tensorWidth]);let result=await ctx.runInference(inputTensor);let dict=charactersDictionary??ctx.options.charactersDictionary??[];return decodeResults(result,dict,tensorWidth,this.debugging.verbose)}finally{inputTensor?.dispose()}}isValidBox(box,index){if(box.width<=0||box.height<=0){console.warn(`Skipping invalid box ${index+1}: w=${box.width}, h=${box.height}`);return false}return true}async runInference(inputTensor){let feeds={x:inputTensor};let results=await this.session.run(feeds);let outputNodeName=Object.keys(results)[0];let outputTensor=outputNodeName?results[outputNodeName]:undefined;if(!outputTensor){throw new Error(`Recognition output tensor '${outputNodeName}' not found. Available keys: ${Object.keys(results)}`)}return outputTensor}}import{DEFAULT_DEBUGGING_OPTIONS,DEFAULT_RECOGNITION_OPTIONS}from"../constants.js";import{runCrossLineStrategy,runLineStrategy,runPerBoxStrategy}from"./recognition/strategies.js";