UNPKG

ppu-paddle-ocr

Version:

Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection

7 lines (6 loc) 4.6 kB
import os from"node:os";import path from"node:path";import{DEFAULT_MODEL_URLS,MODEL_PRESETS}from"../index.js";import{PaddleOcrService}from"../processor/paddle-ocr.service.js";import{CliError,expandPatterns,isMissingLocalFile,loadImageInput,logStderr,writeOutput}from"./io.js";import{buildBatchOptions,buildPaddleOptions,buildRecognizeOptions}from"./options.js";function stringify(value,values){return JSON.stringify(value,null,values.pretty?2:undefined)}function errMessage(reason){return reason instanceof Error?reason.message:String(reason)}function assertLocalFilesExist(files){let missing=files.filter(isMissingLocalFile);if(missing.length>0){throw new CliError(`No such file${missing.length>1?"s":""}: ${missing.join(", ")}`)}}export async function runRecognize(images,values){const[image]=images;if(!image||images.length!==1){throw new CliError("recognize takes exactly one image; use 'batch' for multiple",2)}let service=new PaddleOcrService(buildPaddleOptions(values));try{logStderr("Loading models...",Boolean(values.quiet));await service.initialize();let input=await loadImageInput(image);let opts=buildRecognizeOptions(values);let result=values.flatten?await service.recognize(input,{...opts,flatten:true}):await service.recognize(input,{...opts,flatten:false});if(values.json){writeOutput(stringify(result,values),values.output)}else{writeOutput(result.text,values.output)}}finally{await service.destroy()}}export async function runBatch(patterns,values){let files=expandPatterns(patterns);if(files.length===0)throw new CliError("batch needs at least one image",2);assertLocalFilesExist(files);let service=new PaddleOcrService(buildPaddleOptions(values));try{logStderr(`Loading models, then OCR'ing ${files.length} image(s)...`,Boolean(values.quiet));await service.initialize();let inputs=async function*(){for(let file of files)yield loadImageInput(file)};let settled=await service.batchRecognize(inputs(),{...buildBatchOptions(values),settle:true,onProgress:(done)=>logStderr(` ${done}/${files.length}`,Boolean(values.quiet))});let entries=settled.map((item)=>{let file=files[item.index]??"?";return item.status==="fulfilled"?{file,status:"fulfilled",result:item.value}:{file,status:"rejected",error:errMessage(item.reason)}});if(values.json){writeOutput(stringify(entries,values),values.output)}else{let blocks=entries.map((e)=>e.result?`==> ${e.file} <== ${e.result.text}`:`==> ${e.file} <== ERROR: ${e.error}`);writeOutput(blocks.join(` `),values.output)}if(entries.some((e)=>e.status==="rejected")){throw new CliError(`${entries.filter((e)=>e.status==="rejected").length} image(s) failed`)}}finally{await service.destroy()}}export async function runStream(patterns,values){let files=expandPatterns(patterns);if(files.length===0)throw new CliError("stream needs at least one image",2);assertLocalFilesExist(files);let service=new PaddleOcrService(buildPaddleOptions(values));let failures=0;try{logStderr("Loading models...",Boolean(values.quiet));await service.initialize();let inputs=async function*(){for(let file of files)yield loadImageInput(file)};for await(let item of service.batchRecognizeStream(inputs(),{...buildBatchOptions(values),settle:true})){let file=files[item.index]??"?";if(item.status==="fulfilled"){let entry={file,status:"fulfilled",result:item.value};writeOutput(values.json?stringify(entry,values):`==> ${file} <== ${item.value.text}`)}else{failures++;let error=errMessage(item.reason);let entry={file,status:"rejected",error};if(values.json)writeOutput(stringify(entry,values));else logStderr(`==> ${file} <== ERROR: ${error}`,Boolean(values.quiet))}}}finally{await service.destroy()}if(failures>0)throw new CliError(`${failures} image(s) failed`)}export async function runDownloadModels(values){await PaddleOcrService.downloadModels({verbose:!values.quiet});logStderr("Models cached.",Boolean(values.quiet))}export function runClearCache(values){new PaddleOcrService().clearModelCache();logStderr("Cache cleared.",Boolean(values.quiet))}export function runModels(values){let built=buildPaddleOptions(values);let info={cacheDir:path.join(os.homedir(),".cache","ppu-paddle-ocr"),models:{detection:built.model?.detection??DEFAULT_MODEL_URLS.detection,recognition:built.model?.recognition??DEFAULT_MODEL_URLS.recognition,charactersDictionary:built.model?.charactersDictionary??DEFAULT_MODEL_URLS.charactersDictionary},strategy:built.recognition?.strategy??"per-box",engine:built.processing?.engine??"opencv",executionProviders:built.session?.executionProviders??["cpu"],presets:Object.keys(MODEL_PRESETS)};writeOutput(stringify(info,values),values.output)}