@kiran.kk.phonpe/node-native-ocr
Version:
Native Node.js bindings for the Tesseract OCR project.
222 lines (186 loc) • 7.27 kB
JavaScript
const fs = require('fs')
const path = require('path')
const shell = require('shelljs')
const process = require('process')
const requiredCMakeVersion = '3.15'
const cmakeBuildType = 'Release'
shell.config.fatal = true // thrown an exception on any error
let commonEnvVariables = {
CMAKE_BUILD_TYPE: cmakeBuildType,
CMAKE_INSTALL_PREFIX: '${PWD}/bin',
BUILD_SHARED_LIBS: 'OFF',
CMAKE_POSITION_INDEPENDENT_CODE: 'ON',
CMAKE_MSVC_RUNTIME_LIBRARY: 'MultiThreaded',
CMAKE_POLICY_DEFAULT_CMP0091: 'NEW',
CMAKE_OSX_DEPLOYMENT_TARGET: '10.9'
}
const buildForArch = process.env['BUILD_FOR_ARCH'] || process.arch
shell.echo('buildForArch', buildForArch)
if (buildForArch === 'arm64') {
shell.echo('arm64 build')
commonEnvVariables = {
...commonEnvVariables,
CMAKE_OSX_ARCHITECTURES: '"arm64"'
}
} else if (buildForArch === 'x64') {
shell.echo('x64 build')
commonEnvVariables = {
...commonEnvVariables,
CMAKE_OSX_ARCHITECTURES: '"x86_64"'
}
}
// ------ startup ------
shell.echo('build-tesseract script start.')
if (!shell.which('git')) {
shell.echo('This script requires Git.')
shell.exit(1)
}
checkCMakeVersion()
const homeDir = path.resolve(__dirname, '..')
shell.cd(homeDir)
shell.echo(`Working directory: ${homeDir}`)
// ------ libraries ------
downloadAndBuildLib('https://github.com/libsdl-org/libtiff.git', 'libtiff', dirName => {
const filePath = path.resolve(__dirname, '..', dirName, 'CMakeLists.txt')
shell.echo(`Patching ${filePath} for Mac.`)
let cmakeConfig = fs.readFileSync(filePath, 'utf8')
// disable codecs: otherwise we will get a linker error during compilation of tesseract
cmakeConfig = cmakeConfig.replace('include(LZMACodec)', '# include(LZMACodec)')
cmakeConfig = cmakeConfig.replace('include(WebPCodec)', '# include(WebPCodec)')
cmakeConfig = cmakeConfig.replace('include(ZSTDCodec)', '# include(ZSTDCodec)')
fs.writeFileSync(filePath, cmakeConfig, 'utf8')
shell.echo(`Disabled LZMA, Webp and ZSTD Codecs. Not needed for tesseract.`)
})
downloadAndBuildLib('https://github.com/madler/zlib.git', 'zlib')
downloadAndBuildLib('https://github.com/glennrp/libpng.git', 'libpng')
downloadAndBuildLib('https://github.com/tamaskenez/libjpeg-cmake.git', 'libjpeg')
buildLeptonica('leptonica')
buildTesseract('tesseract')
shell.echo('build-tesseract script end.')
function checkCMakeVersion() {
let versionOK = false
shell.echo(`This script requires CMake version ${requiredCMakeVersion} or later.`)
if (!shell.which('cmake')) {
shell.echo('CMake not found on this system.')
} else {
const reply = shell.exec('cmake --version', { silent: true })
foundVersion = /\d+.\d+.\d+/gm.exec(reply)[0]
versionOK = checkVersion(foundVersion, requiredCMakeVersion) >= 0
if (versionOK) {
shell.echo(`CMake ${foundVersion} found on this system.`)
} else {
shell.echo(`CMake ${foundVersion} found on this system is too old.`)
}
}
if (!versionOK) shell.exit(1)
}
// https://codereview.stackexchange.com/questions/236647/comparing-version-numbers-with-javascript
function checkVersion(a, b) {
const x = a.split('.').map(e => parseInt(e, 10))
const y = b.split('.').map(e => parseInt(e, 10))
for (const i in x) {
y[i] = y[i] || 0
if (x[i] === y[i]) {
continue
} else if (x[i] > y[i]) {
return 1
} else {
return -1
}
}
return y.length > x.length ? -1 : 0
}
function downloadAndBuildLib(repoUrl, dirName, patchConfig) {
printTitle('Building ' + dirName)
if (shell.test('-e', dirName)) {
shell.echo(`The ${dirName} directory already exists.`)
} else {
shell.exec(`git clone ${repoUrl} ${dirName}`)
}
if (patchConfig) patchConfig(dirName)
runCMakeBuild(dirName, cmakeBuildType)
}
function buildLeptonica(dirName) {
printTitle('\nBuilding Leptonica.')
runCMakeBuild(
dirName,
cmakeBuildType,
{
SW_BUILD: 'OFF',
CMAKE_FIND_USE_CMAKE_SYSTEM_PATH: 'FALSE',
CMAKE_FIND_USE_SYSTEM_ENVIRONMENT_PATH: process.platform === 'darwin' ? 'FALSE' : 'TRUE',
CMAKE_PREFIX_PATH: '"${PWD}/../../libtiff/build;${PWD}/../../libjpeg/build;${PWD}/../../zlib/build;${PWD}/../../libpng/build"',
CMAKE_INCLUDE_PATH:
'"${PWD}/../../libtiff/build/bin/include;${PWD}/../../libjpeg/build/bin/include;${PWD}/../../zlib/build/bin/include;${PWD}/../../libpng/build/bin/include"',
CMAKE_LIBRARY_PATH:
'"${PWD}/../../libtiff/build/bin/lib;${PWD}/../../libjpeg/build/bin/lib;${PWD}/../../zlib/build/bin/lib;${PWD}/../../libpng/build/bin/lib"'
},
(dirName, cmakeConfig, envVars) => {
// patch config_auto.h between config and build
if (process.platform === 'darwin') {
const filePath = path.resolve(__dirname, '..', dirName, 'build', 'src', 'config_auto.h')
shell.echo(`Patching ${filePath} for Mac.`)
let autoConfig = fs.readFileSync(filePath, 'utf8')
const searchText = /^#define\s+HAVE_FMEMOPEN\s+1/gm
const replacementText = '#define HAVE_FMEMOPEN 0'
const foundText = autoConfig.match(searchText)
if (foundText) {
const updatedConfig = autoConfig.replace(searchText, replacementText)
fs.writeFileSync(filePath, updatedConfig, 'utf8')
shell.echo(`The '${foundText}' directive was replaced with '${replacementText}'.`)
} else {
shell.echo(`The '#define HAVE_FMEMOPEN 1' directive was not found.`)
shell.echo('This may lead to a build that does not run on all macOS machines.')
}
}
}
)
}
function buildTesseract(dirName) {
printTitle('\nBuilding Tesseract.')
runCMakeBuild(dirName, cmakeBuildType, {
STATIC: 'ON',
CPPAN_BUILD: 'OFF',
BUILD_TRAINING_TOOLS: 'OFF',
AUTO_OPTIMIZE: 'OFF',
Leptonica_DIR: '../leptonica/build'
})
}
function runCMakeBuild(dirName, cmakeBuildType, envVars, patchConfig) {
createAndEnterBuildDir(dirName)
let cmakeCmd = 'cmake'
cmakeCmd += formatEnvVars(envVars)
cmakeCmd += formatEnvVars(commonEnvVariables)
cmakeCmd += ' ../ '
shell.echo(`Configuring a ${cmakeBuildType} build.`)
shell.echo(cmakeCmd)
shell.exec(cmakeCmd)
if (patchConfig) patchConfig(dirName, cmakeBuildType, envVars)
shell.echo(`Creating a ${cmakeBuildType} build.`)
shell.exec(`cmake --build . --config ${cmakeBuildType}`)
shell.echo(`Installing a ${cmakeBuildType} build.`)
shell.exec('cmake --install .')
leaveBuildDir()
}
function createAndEnterBuildDir(dirName) {
shell.pushd('-q', dirName)
if (!shell.test('-e', 'build')) shell.mkdir('build')
shell.pushd('-q', 'build')
}
function leaveBuildDir() {
shell.popd('-q')
shell.popd('-q')
}
function formatEnvVars(envVars) {
const continuation = process.platform === 'win32' ? '' : ' \\\n'
let args = ''
for (key in envVars) {
args += ` -D${key}=${envVars[key]}${continuation}`
}
return args
}
function printTitle(title) {
console.log('\n' + '='.repeat(title.length))
console.log(title)
console.log('='.repeat(title.length))
}