zimmer
Version:
streaming parser for the ZIM aka OpenZIM file format http://www.openzim.org/wiki/ZIM_file_format
161 lines (136 loc) • 4.96 kB
JavaScript
var zim = require('./')
var fs = require('fs')
var path = require('path')
var ndjson = require('ndjson')
var through = require('through2')
var pump = require('pump')
var minimist = require('minimist')
var mkdirp = require('mkdirp')
var args = minimist(process.argv.slice(2))
var cmd = args._[0]
var filename = args._[args._.length - 1]
if (!cmd) usage()
if (!filename || args._.length === 1) {
filename = cmd
cmd = 'stream'
}
if (cmd === 'extract') {
filename = args._[1]
}
function usage () {
console.log(`zimmer zim parser - usage
zimmer <filename> - streams all entry metadata + data to stdout'
zimmer extract <filename> <out> - extracts and writes files to output dir
zimmer header <num> <filename> - reads zim header
zimmer entries <filename> - streams all entry metadata to stdout
zimmer entry <num> <filename> - reads specific entry data by index
zimmer clusters <filename> - streams all cluster metadata to stdout
zimmer cluster <num> <filename> - reads specific cluster data by index`
)
process.exit(1)
}
fs.open(filename, 'r', function (err, file) {
if (err) throw err
zim.readHeader(file, function(err, header) {
if (err) throw err
if (cmd === 'header') {
console.log(JSON.stringify(header))
return
}
if (cmd === 'stream') {
var pointers = zim.createEntryPointerStream(filename, header)
var reader = through.obj(function(entry, enc, cb) {
zim.readDirectoryEntry(filename, file, header, entry, function (err, entry) {
if (err) return cb(err)
if (!entry.cluster) return cb(null, {entry: entry})
zim.readCluster(filename, file, header, {index: entry.cluster}, function (err, cluster) {
if (err) return cb(err)
entry.contents = cluster.blobs[entry.blob].toString()
cb(null, entry)
})
})
})
pump(pointers, reader, ndjson.serialize(), process.stdout, function (err) {
if (err) throw err
})
return
}
if (cmd === 'entries') {
var pointers = zim.createEntryPointerStream(filename, header)
var reader = through.obj(function(entry, enc, cb) {
zim.readDirectoryEntry(filename, file, header, entry, cb)
})
pump(pointers, reader, ndjson.serialize(), process.stdout, function (err) {
if (err) throw err
})
return
}
if (cmd === 'entry') {
var idx = args._[1]
zim.readDirectoryEntry(filename, file, header, {index: idx}, function (err, entry) {
if (err) throw err
zim.readCluster(filename, file, header, {index: entry.cluster}, function (err, cluster) {
if (err) throw err
var contents = cluster.blobs[entry.blob].toString()
entry.contents = contents
console.log(JSON.stringify(entry))
})
})
return
}
if (cmd === 'extract') {
var out = args._[2] || process.cwd()
mkdirp(out, function (err) {
if (err) throw err
extract()
})
function extract () {
var pointers = zim.createEntryPointerStream(filename, header)
var writer = through.obj(function(entry, enc, cb) {
zim.readDirectoryEntry(filename, file, header, entry, function (err, entry) {
if (err) return cb(err)
if (!entry.cluster) return cb()
zim.readCluster(filename, file, header, {index: entry.cluster}, function (err, cluster) {
if (err) return cb(err)
var contents = cluster.blobs[entry.blob].toString()
var saveAs = path.join(out, entry.url)
var saveDir = path.dirname(saveAs)
mkdirp(saveDir, function (err) {
if (err) return cb(err)
fs.writeFile(saveAs, contents, function (err) {
if (err) return cb(err)
cb(null, {finished: true, path: saveAs})
})
})
})
})
})
pump(pointers, writer, ndjson.serialize(), process.stdout, function (err) {
if (err) throw err
})
}
return
}
if (cmd === 'clusters') {
var pointers = zim.createClusterPointerStream(filename, header)
var reader = through.obj(function(entry, enc, cb) {
zim.readDirectoryEntry(filename, file, header, entry, cb)
})
pump(pointers, ndjson.serialize(), process.stdout, function (err) {
if (err) throw err
})
return
}
if (cmd === 'cluster') {
var idx = args._[1]
zim.readCluster(filename, file, header, {index: idx}, function (err, cluster) {
if (err) throw err
cluster.blobs = cluster.blobs.map(function (b) { return b.toString() })
console.log(JSON.stringify(cluster))
})
return
}
usage()
})
})