node-native-ocr
Version:
Native Node.js bindings for the Tesseract OCR project.
351 lines (306 loc) • 12.4 kB
JavaScript
const fs = require('fs')
const path = require('path')
const shell = require('shelljs')
const process = require('process')
const requiredCMakeVersion = '3.15'
const cmakeBuildType = 'Release'
shell.config.fatal = true // thrown an exception on any error
let commonEnvVariables = {
CMAKE_BUILD_TYPE: cmakeBuildType,
CMAKE_INSTALL_PREFIX: '${PWD}/bin',
BUILD_SHARED_LIBS: 'OFF',
CMAKE_POSITION_INDEPENDENT_CODE: 'ON',
CMAKE_MSVC_RUNTIME_LIBRARY: 'MultiThreaded',
CMAKE_POLICY_DEFAULT_CMP0091: 'NEW',
CMAKE_POLICY_VERSION_MINIMUM: '3.5',
CMAKE_OSX_DEPLOYMENT_TARGET: '10.9'
}
const buildForArch = process.env['BUILD_FOR_ARCH'] || process.arch
shell.echo('buildForArch', buildForArch)
const dependencyPrefixPath =
'"${PWD}/../../zlib/build/bin;${PWD}/../../libpng/build/bin;${PWD}/../../libjpeg/build/bin;${PWD}/../../libtiff/build/bin"'
const dependencyIncludePath =
'"${PWD}/../../zlib/build/bin/include;${PWD}/../../libpng/build/bin/include;${PWD}/../../libjpeg/build/bin/include;${PWD}/../../libtiff/build/bin/include"'
const dependencyLibraryPath =
'"${PWD}/../../zlib/build/bin/lib;${PWD}/../../libpng/build/bin/lib;${PWD}/../../libjpeg/build/bin/lib;${PWD}/../../libtiff/build/bin/lib"'
const tiffIncludePath = '"${PWD}/../../libtiff/build/bin/include"'
if (buildForArch === 'arm64') {
shell.echo('arm64 build')
commonEnvVariables = {
...commonEnvVariables,
CMAKE_OSX_ARCHITECTURES: '"arm64"'
}
} else if (buildForArch === 'x64') {
shell.echo('x64 build')
commonEnvVariables = {
...commonEnvVariables,
CMAKE_OSX_ARCHITECTURES: '"x86_64"'
}
}
// ------ startup ------
shell.echo('build-tesseract script start.')
if (!shell.which('git')) {
shell.echo('This script requires Git.')
shell.exit(1)
}
checkCMakeVersion()
const homeDir = path.resolve(__dirname, '..')
shell.cd(homeDir)
shell.echo(`Working directory: ${homeDir}`)
// ------ libraries ------
downloadAndBuildLib('https://github.com/madler/zlib.git', 'zlib', null, null, 'v1.3.1')
downloadAndBuildLib(
'https://github.com/glennrp/libpng.git',
'libpng',
null,
{
CMAKE_FIND_USE_CMAKE_SYSTEM_PATH: 'FALSE',
CMAKE_FIND_USE_SYSTEM_ENVIRONMENT_PATH: 'TRUE',
CMAKE_PREFIX_PATH: '"${PWD}/../../zlib/build/bin"',
CMAKE_INCLUDE_PATH: '"${PWD}/../../zlib/build/bin/include"',
CMAKE_LIBRARY_PATH: '"${PWD}/../../zlib/build/bin/lib"'
},
'libpng16'
)
downloadAndBuildLib('https://github.com/libjpeg-turbo/libjpeg-turbo.git', 'libjpeg', null, null, '3.0.4')
downloadAndBuildLib(
'https://github.com/libsdl-org/libtiff.git',
'libtiff',
dirName => {
const filePath = path.resolve(__dirname, '..', dirName, 'CMakeLists.txt')
shell.echo(`Patching ${filePath} for Mac.`)
let cmakeConfig = fs.readFileSync(filePath, 'utf8')
// disable codecs: otherwise we will get a linker error during compilation of tesseract
cmakeConfig = cmakeConfig.replace('include(LZMACodec)', '# include(LZMACodec)')
cmakeConfig = cmakeConfig.replace('include(WebPCodec)', '# include(WebPCodec)')
cmakeConfig = cmakeConfig.replace('include(ZSTDCodec)', '# include(ZSTDCodec)')
fs.writeFileSync(filePath, cmakeConfig, 'utf8')
shell.echo(`Disabled LZMA, Webp and ZSTD Codecs. Not needed for tesseract.`)
},
{
'tiff-tools': 'OFF',
'tiff-tests': 'OFF',
'tiff-contrib': 'OFF',
'tiff-docs': 'OFF',
CMAKE_PREFIX_PATH: dependencyPrefixPath,
CMAKE_INCLUDE_PATH: dependencyIncludePath,
CMAKE_LIBRARY_PATH: dependencyLibraryPath
},
'v4.6.0'
)
buildLeptonica('leptonica')
buildTesseract('tesseract')
shell.echo('build-tesseract script end.')
function checkCMakeVersion() {
let versionOK = false
shell.echo(`This script requires CMake version ${requiredCMakeVersion} or later.`)
if (!shell.which('cmake')) {
shell.echo('CMake not found on this system.')
} else {
const reply = shell.exec('cmake --version', { silent: true })
foundVersion = /\d+.\d+.\d+/gm.exec(reply)[0]
versionOK = checkVersion(foundVersion, requiredCMakeVersion) >= 0
if (versionOK) {
shell.echo(`CMake ${foundVersion} found on this system.`)
} else {
shell.echo(`CMake ${foundVersion} found on this system is too old.`)
}
}
if (!versionOK) shell.exit(1)
}
// https://codereview.stackexchange.com/questions/236647/comparing-version-numbers-with-javascript
function checkVersion(a, b) {
const x = a.split('.').map(e => parseInt(e, 10))
const y = b.split('.').map(e => parseInt(e, 10))
for (const i in x) {
y[i] = y[i] || 0
if (x[i] === y[i]) {
continue
} else if (x[i] > y[i]) {
return 1
} else {
return -1
}
}
return y.length > x.length ? -1 : 0
}
function downloadAndBuildLib(repoUrl, dirName, patchConfig, envVars, ref) {
printTitle('Building ' + dirName)
if (shell.test('-e', dirName)) {
shell.echo(`The ${dirName} directory already exists.`)
if (shell.test('-e', `${dirName}/.git`)) {
shell.exec(`git -C ${dirName} remote set-url origin ${repoUrl}`)
if (ref) {
shell.exec(`git -C ${dirName} fetch --depth 1 --tags origin ${ref}`)
shell.exec(`git -C ${dirName} checkout -f FETCH_HEAD`)
}
shell.exec(`git -C ${dirName} clean -fdx`)
} else {
shell.rm('-rf', dirName)
const refArgs = ref ? `--branch ${ref} --depth 1 ` : ''
shell.exec(`git clone ${refArgs}${repoUrl} ${dirName}`)
}
} else {
const refArgs = ref ? `--branch ${ref} --depth 1 ` : ''
shell.exec(`git clone ${refArgs}${repoUrl} ${dirName}`)
}
if (patchConfig) patchConfig(dirName)
runCMakeBuild(dirName, cmakeBuildType, envVars)
}
function buildLeptonica(dirName) {
printTitle('\nBuilding Leptonica.')
const leptonicaCMakeListsPath = path.resolve(__dirname, '..', dirName, 'src', 'CMakeLists.txt')
if (fs.existsSync(leptonicaCMakeListsPath)) {
let leptonicaCMakeLists = fs.readFileSync(leptonicaCMakeListsPath, 'utf8')
const tiffIncludeCompatSnippet = [
'if (TIFF_LIBRARIES)',
' if (TIFF_INCLUDE_DIRS)',
' target_include_directories (leptonica PUBLIC ${TIFF_INCLUDE_DIRS})',
' elseif (TIFF_INCLUDE_DIR)',
' target_include_directories (leptonica PUBLIC ${TIFF_INCLUDE_DIR})',
' endif()',
' set(_TIFF_LIBRARIES ${TIFF_LIBRARIES})',
' list(REMOVE_ITEM _TIFF_LIBRARIES CMath::CMath)',
' list(REMOVE_ITEM _TIFF_LIBRARIES $<LINK_ONLY:CMath::CMath>)',
' target_link_libraries (leptonica ${_TIFF_LIBRARIES})',
'endif()'
].join('\n')
if (!leptonicaCMakeLists.includes('if (TIFF_INCLUDE_DIRS)')) {
leptonicaCMakeLists = leptonicaCMakeLists.replace(
/if \(TIFF_LIBRARIES\)[\s\S]*?endif\(\)/,
tiffIncludeCompatSnippet
)
fs.writeFileSync(leptonicaCMakeListsPath, leptonicaCMakeLists, 'utf8')
shell.echo(`Patched ${leptonicaCMakeListsPath} for TIFF include compatibility.`)
}
}
runCMakeBuild(
dirName,
cmakeBuildType,
{
SW_BUILD: 'OFF',
CMAKE_DISABLE_FIND_PACKAGE_PNG: 'OFF',
CMAKE_DISABLE_FIND_PACKAGE_TIFF: 'OFF',
TIFF_INCLUDE_DIR: tiffIncludePath,
TIFF_INCLUDE_DIRS: tiffIncludePath,
CMAKE_FIND_USE_CMAKE_SYSTEM_PATH: 'FALSE',
CMAKE_FIND_USE_SYSTEM_ENVIRONMENT_PATH: 'TRUE',
CMAKE_PREFIX_PATH: dependencyPrefixPath,
CMAKE_INCLUDE_PATH: dependencyIncludePath,
CMAKE_LIBRARY_PATH: dependencyLibraryPath
},
(dirName, cmakeConfig, envVars) => {
// patch config_auto.h between config and build
if (process.platform === 'darwin') {
const filePath = path.resolve(__dirname, '..', dirName, 'build', 'src', 'config_auto.h')
shell.echo(`Patching ${filePath} for Mac.`)
let autoConfig = fs.readFileSync(filePath, 'utf8')
const searchText = /^#define\s+HAVE_FMEMOPEN\s+1/gm
const replacementText = '#define HAVE_FMEMOPEN 0'
const foundText = autoConfig.match(searchText)
if (foundText) {
const updatedConfig = autoConfig.replace(searchText, replacementText)
fs.writeFileSync(filePath, updatedConfig, 'utf8')
shell.echo(`The '${foundText}' directive was replaced with '${replacementText}'.`)
} else {
shell.echo(`The '#define HAVE_FMEMOPEN 1' directive was not found.`)
shell.echo('This may lead to a build that does not run on all macOS machines.')
}
}
}
)
const leptonicaConfigPath = path.resolve(__dirname, '..', dirName, 'build', 'LeptonicaConfig.cmake')
if (fs.existsSync(leptonicaConfigPath)) {
let configContent = fs.readFileSync(leptonicaConfigPath, 'utf8')
if (!configContent.includes('include(CMakeFindDependencyMacro)')) {
configContent =
'include(CMakeFindDependencyMacro)\n' +
'find_dependency(ZLIB)\n' +
'find_dependency(JPEG)\n\n' +
configContent
fs.writeFileSync(leptonicaConfigPath, configContent, 'utf8')
shell.echo(`Patched ${leptonicaConfigPath} with find_dependency declarations.`)
}
}
}
function buildTesseract(dirName) {
printTitle('\nBuilding Tesseract.')
const tesseractCMakeListsPath = path.resolve(__dirname, '..', dirName, 'CMakeLists.txt')
if (fs.existsSync(tesseractCMakeListsPath)) {
let tesseractCMakeLists = fs.readFileSync(tesseractCMakeListsPath, 'utf8')
tesseractCMakeLists = tesseractCMakeLists.replace(
'target_link_libraries(tesseract tiff)',
'target_link_libraries(tesseract ${TIFF_LIBRARIES})'
)
fs.writeFileSync(tesseractCMakeListsPath, tesseractCMakeLists, 'utf8')
shell.echo(`Patched ${tesseractCMakeListsPath} to link TIFF via resolved library paths.`)
}
const tesseractHelpersPath = path.resolve(__dirname, '..', dirName, 'src', 'ccutil', 'helpers.h')
if (fs.existsSync(tesseractHelpersPath)) {
let helpersContent = fs.readFileSync(tesseractHelpersPath, 'utf8')
if (!helpersContent.includes('#include <cstdint>')) {
helpersContent = helpersContent.replace('#include <string>', '#include <string>\n#include <cstdint>')
fs.writeFileSync(tesseractHelpersPath, helpersContent, 'utf8')
shell.echo(`Patched ${tesseractHelpersPath} to include <cstdint>.`)
}
}
const tesseractMatchdefsPath = path.resolve(__dirname, '..', dirName, 'src', 'dict', 'matchdefs.h')
if (fs.existsSync(tesseractMatchdefsPath)) {
let matchdefsContent = fs.readFileSync(tesseractMatchdefsPath, 'utf8')
if (!matchdefsContent.includes('#include <cstdint>')) {
matchdefsContent = matchdefsContent.replace('#include <cstdio>', '#include <cstdio>\n#include <cstdint>')
fs.writeFileSync(tesseractMatchdefsPath, matchdefsContent, 'utf8')
shell.echo(`Patched ${tesseractMatchdefsPath} to include <cstdint>.`)
}
}
runCMakeBuild(dirName, cmakeBuildType, {
STATIC: 'ON',
CPPAN_BUILD: 'OFF',
BUILD_TRAINING_TOOLS: 'OFF',
AUTO_OPTIMIZE: 'OFF',
Leptonica_DIR: '../leptonica/build',
CMAKE_FIND_USE_CMAKE_SYSTEM_PATH: 'FALSE',
CMAKE_FIND_USE_SYSTEM_ENVIRONMENT_PATH: 'TRUE',
CMAKE_PREFIX_PATH: dependencyPrefixPath,
CMAKE_INCLUDE_PATH: dependencyIncludePath,
CMAKE_LIBRARY_PATH: dependencyLibraryPath
})
}
function runCMakeBuild(dirName, cmakeBuildType, envVars, patchConfig) {
createAndEnterBuildDir(dirName)
let cmakeCmd = 'cmake'
cmakeCmd += formatEnvVars(envVars)
cmakeCmd += formatEnvVars(commonEnvVariables)
cmakeCmd += ' ../ '
shell.echo(`Configuring a ${cmakeBuildType} build.`)
shell.echo(cmakeCmd)
shell.exec(cmakeCmd)
if (patchConfig) patchConfig(dirName, cmakeBuildType, envVars)
shell.echo(`Creating a ${cmakeBuildType} build.`)
shell.exec(`cmake --build . --config ${cmakeBuildType}`)
shell.echo(`Installing a ${cmakeBuildType} build.`)
shell.exec('cmake --install .')
leaveBuildDir()
}
function createAndEnterBuildDir(dirName) {
shell.pushd('-q', dirName)
if (!shell.test('-e', 'build')) shell.mkdir('build')
shell.pushd('-q', 'build')
}
function leaveBuildDir() {
shell.popd('-q')
shell.popd('-q')
}
function formatEnvVars(envVars) {
const continuation = process.platform === 'win32' ? '' : ' \\\n'
let args = ''
for (key in envVars) {
args += ` -D${key}=${envVars[key]}${continuation}`
}
return args
}
function printTitle(title) {
console.log('\n' + '='.repeat(title.length))
console.log(title)
console.log('='.repeat(title.length))
}