UNPKG

tesseract.js

Version:
53 lines (52 loc) 1.92 kB
<html> <head> <script src="/dist/tesseract.min.js"></script> </head> <body> <div> <input type="file" id="uploader"> <button id="download-pdf" disabled="true">Download PDF</button> </div> <textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> <script type="module"> const { createWorker } = Tesseract; const worker = await createWorker("eng", 1, { corePath: '/node_modules/tesseract.js-core', workerPath: "/dist/worker.min.js", logger: m => console.log(m), }); const uploader = document.getElementById('uploader'); const dlBtn = document.getElementById('download-pdf'); let pdf; const recognize = async ({ target: { files } }) => { const res = await worker.recognize(files[0],{pdfTitle: "Example PDF"},{pdf: true}); pdf = res.data.pdf; const text = res.data.text; const board = document.getElementById('board'); board.value = text; dlBtn.disabled = false; }; const downloadPDF = async () => { const filename = 'tesseract-ocr-result.pdf'; const blob = new Blob([new Uint8Array(pdf)], { type: 'application/pdf' }); if (navigator.msSaveBlob) { // IE 10+ navigator.msSaveBlob(blob, filename); } else { const link = document.createElement('a'); if (link.download !== undefined) { const url = URL.createObjectURL(blob); link.setAttribute('href', url); link.setAttribute('download', filename); link.style.visibility = 'hidden'; document.body.appendChild(link); link.click(); document.body.removeChild(link); } } }; uploader.addEventListener('change', recognize); dlBtn.addEventListener('click', downloadPDF); </script> </body> </html>