UNPKG

ppu-paddle-ocr

Version:

Blazing-fast and lightweight PaddleOCR library for Node.js and Bun. Perform accurate text detection, recognition, and image deskew with a simple, modern, and type-safe API. Ideal for document processing, data extraction, and computer vision tasks.

11 lines 10 kB
export class PaddleOcrService{options=DEFAULT_PADDLE_OPTIONS;detectionSession=null;recognitionSession=null;constructor(options){this.options=merge({},DEFAULT_PADDLE_OPTIONS,options);this.options.session=this.options.session||DEFAULT_PADDLE_OPTIONS.session}log(message){if(this.options.debugging?.verbose){console.log(`[PaddleOcrService] ${message}`)}}async _fetchAndCache(url){let fileName=path.basename(new URL(url).pathname);let cachePath=path.join(CACHE_DIR,fileName);if(existsSync(cachePath)){this.log(`Loading cached resource from: ${cachePath}`);let buf=readFileSync(cachePath);return buf.buffer.slice(buf.byteOffset,buf.byteOffset+buf.byteLength)}console.log(`[PaddleOcrService] Downloading resource: ${fileName} `+` Cached at: ${CACHE_DIR}`);this.log(`Fetching resource from URL: ${url}`);let response=await fetch(url);if(!response.ok){throw new Error(`Failed to fetch resource from ${url}`)}if(!response.body){throw new Error("Response body is null or undefined")}let contentLength=response.headers.get("Content-Length");let totalLength=contentLength?parseInt(contentLength,10):0;let receivedLength=0;let chunks=[];let reader=response.body.getReader();while(true){const{done,value}=await reader.read();if(done){break}chunks.push(value);receivedLength+=value.length;if(totalLength>0){let percentage=(receivedLength/totalLength*100).toFixed(2);process.stdout.write(`\rDownloading... ${percentage}%`)}}process.stdout.write(` `);let buffer=new Uint8Array(receivedLength);let position=0;for(let chunk of chunks){buffer.set(chunk,position);position+=chunk.length}this.log(`Caching resource to: ${cachePath}`);if(!existsSync(CACHE_DIR)){mkdirSync(CACHE_DIR,{recursive:true})}writeFileSync(cachePath,Buffer.from(buffer));return buffer.buffer}async _loadResource(source,defaultUrl){if(source instanceof ArrayBuffer){this.log("Loading resource from ArrayBuffer");return source}if(typeof source==="string"){if(source.startsWith("http")){return this._fetchAndCache(source)}else{let resolvedPath=path.resolve(process.cwd(),source);this.log(`Loading resource from path: ${resolvedPath}`);let buf=readFileSync(resolvedPath);return buf.buffer.slice(buf.byteOffset,buf.byteOffset+buf.byteLength)}}return this._fetchAndCache(defaultUrl)}async initialize(){try{this.log("Initializing PaddleOcrService...");let detModelBuffer=await this._loadResource(this.options.model?.detection,`${GITHUB_BASE_URL}paddleocr-detection.onnx`);this.detectionSession=await ort.InferenceSession.create(new Uint8Array(detModelBuffer),this.options.session);this.options.model.detection=detModelBuffer;this.log(`Detection ONNX model loaded successfully input: ${this.detectionSession.inputNames} output: ${this.detectionSession.outputNames}`);let recModelBuffer=await this._loadResource(this.options.model?.recognition,`${GITHUB_BASE_URL}paddleocr-recognition.onnx`);this.recognitionSession=await ort.InferenceSession.create(new Uint8Array(recModelBuffer),this.options.session);this.options.model.recognition=recModelBuffer;this.log(`Recognition ONNX model loaded successfully input: ${this.recognitionSession.inputNames} output: ${this.recognitionSession.outputNames}`);let dictBuffer=await this._loadResource(this.options.model?.charactersDictionary,`${GITHUB_BASE_URL}ppocrv5_en_dict.txt`);let dictionaryContent=Buffer.from(dictBuffer).toString("utf-8");let charactersDictionary=dictionaryContent.split(` `);if(charactersDictionary.length===0){throw new Error("Character dictionary is empty or could not be loaded.")}this.options.model.charactersDictionary=dictBuffer;this.options.recognition.charactersDictionary=charactersDictionary;this.log(`Character dictionary loaded with ${charactersDictionary.length} entries.`)}catch(error){console.error("Failed to initialize PaddleOcrService:",error);throw error}}isInitialized(){return this.detectionSession!==null&&this.recognitionSession!==null}async changeDetectionModel(model){this.log("Changing detection model...");let modelBuffer=await this._loadResource(model,`${GITHUB_BASE_URL}paddleocr-detection.onnx`);await this.detectionSession?.release();this.detectionSession=await ort.InferenceSession.create(new Uint8Array(modelBuffer),this.options.session);this.options.model.detection=modelBuffer;this.log("Detection model changed successfully.")}async changeRecognitionModel(model){this.log("Changing recognition model...");let modelBuffer=await this._loadResource(model,`${GITHUB_BASE_URL}paddleocr-recognition.onnx`);await this.recognitionSession?.release();this.recognitionSession=await ort.InferenceSession.create(new Uint8Array(modelBuffer),this.options.session);this.options.model.recognition=modelBuffer;this.log("Recognition model changed successfully.")}async changeTextDictionary(dictionary){this.log("Changing text dictionary...");let dictBuffer=await this._loadResource(dictionary,`${GITHUB_BASE_URL}ppocrv5_en_dict.txt`);let dictionaryContent=Buffer.from(dictBuffer).toString("utf-8");let charactersDictionary=dictionaryContent.split(` `);if(charactersDictionary.length===0){throw new Error("Character dictionary is empty or could not be loaded.")}this.options.model.charactersDictionary=dictBuffer;this.options.recognition.charactersDictionary=charactersDictionary;this.log(`Character dictionary changed successfully with ${charactersDictionary.length} entries.`)}async recognize(image,options){if(!this.isInitialized()){throw new Error("PaddleOcrService is not initialized. Call initialize() first.")}await ImageProcessor.initRuntime();let imageBuffer;if(image instanceof ArrayBuffer){imageBuffer=image}else{if(typeof image.toBuffer==="function"){let buffer=image.toBuffer("image/png");let arrayBuffer=new ArrayBuffer(buffer.byteLength);new Uint8Array(arrayBuffer).set(new Uint8Array(buffer));imageBuffer=arrayBuffer}else if(typeof image.toDataURL==="function"){let dataURL=image.toDataURL("image/png");let base64Data=dataURL.replace(/^data:image\/png;base64,/,"");let buffer=Buffer.from(base64Data,"base64");let arrayBuffer=new ArrayBuffer(buffer.byteLength);new Uint8Array(arrayBuffer).set(new Uint8Array(buffer));imageBuffer=arrayBuffer}else{let ctx=image.getContext("2d");let imageData=ctx.getImageData(0,0,image.width,image.height);imageBuffer=new ArrayBuffer(imageData.data.byteLength);new Uint8Array(imageBuffer).set(new Uint8Array(imageData.data.buffer,imageData.data.byteOffset,imageData.data.byteLength))}}let cacheKey=ImageCache.generateKey(imageBuffer);let cacheResult=!options?.noCache&&!options?.dictionary?globalImageCache.get(cacheKey):undefined;if(cacheResult){this.log("Using cached OCR result");if(options?.flatten){return{text:cacheResult.text,results:this.getFlattenedResults(cacheResult.lines),confidence:cacheResult.confidence}}return cacheResult}let detector=new DetectionService(this.detectionSession,this.options.detection,this.options.debugging);let recognitor=new RecognitionService(this.recognitionSession,this.options.recognition,this.options.debugging);let charactersDictionary;if(options?.dictionary){let dictBuffer=await this._loadResource(options.dictionary,"");let dictionaryContent=Buffer.from(dictBuffer).toString("utf-8");charactersDictionary=dictionaryContent.split(` `);if(charactersDictionary.length===0){throw new Error("Custom character dictionary is empty or could not be loaded.")}}let detection=await detector.run(image);let recognition=await recognitor.run(image,detection,charactersDictionary);let processed=this.processRecognition(recognition);let result=options?.flatten?{text:processed.text,results:recognition,confidence:processed.confidence}:processed;if(!options?.noCache&&!options?.dictionary){globalImageCache.set(cacheKey,result)}return result}getFlattenedResults(lines){return lines.flat()}processRecognition(recognition){let result={text:"",lines:[],confidence:0};if(!recognition.length){return result}let totalConfidence=recognition.reduce((sum,r)=>sum+r.confidence,0);result.confidence=totalConfidence/recognition.length;let currentLine=[recognition[0]];let fullText=recognition[0].text;let avgHeight=recognition[0].box.height;for(let i=1;i<recognition.length;i++){let current=recognition[i];let previous=recognition[i-1];let verticalGap=Math.abs(current.box.y-previous.box.y);let threshold=avgHeight*0.5;if(verticalGap<=threshold){currentLine.push(current);fullText+=` ${current.text}`;avgHeight=currentLine.reduce((sum,r)=>sum+r.box.height,0)/currentLine.length}else{result.lines.push([...currentLine]);fullText+=` ${current.text}`;currentLine=[current];avgHeight=current.box.height}}if(currentLine.length>0){result.lines.push([...currentLine])}result.text=fullText;return result}async deskewImage(image){if(!this.isInitialized()){throw new Error("PaddleOcrService is not initialized. Call initialize() first.")}await ImageProcessor.initRuntime();let detector=new DetectionService(this.detectionSession,this.options.detection,this.options.debugging);let detection=await detector.deskew(image);return detection}clearModelCache(){if(existsSync(CACHE_DIR)){this.log(`Clearing model cache at: ${CACHE_DIR}`);rmSync(CACHE_DIR,{recursive:true,force:true});console.log(`[PaddleOcrService] Model cache cleared: ${CACHE_DIR}`)}else{this.log("Cache directory does not exist, nothing to clear.")}}async destroy(){await this.detectionSession?.release();await this.recognitionSession?.release();this.detectionSession=null;this.recognitionSession=null}}import{existsSync,mkdirSync,readFileSync,rmSync,writeFileSync}from"fs";import*as ort from"onnxruntime-node";import*as os from"os";import*as path from"path";import{ImageProcessor}from"ppu-ocv";import merge from"lodash.merge";import{DEFAULT_PADDLE_OPTIONS}from"../constants.js";import{DetectionService}from"./detection.service.js";import{globalImageCache,ImageCache}from"./image-cache.js";import{RecognitionService}from"./recognition.service.js";let GITHUB_BASE_URL="https://raw.githubusercontent.com/PT-Perkasa-Pilar-Utama/ppu-paddle-ocr/main/models/";let CACHE_DIR=path.join(os.homedir(),".cache","ppu-paddle-ocr");export default PaddleOcrService;