doc88-download
Version:
Download doc88.com document as PNG and HTML
225 lines (214 loc) • 10 kB
JavaScript
var path = require('path'),
fs = require('fs'),
app = require('electron').app,
ipc = require('electron').ipcMain,
session = require('electron').session,
BrowserWindow = require('electron').BrowserWindow,
request = require('request'),
mkdirp = require('mkdirp'),
async = require('async'),
jimp = require('jimp'),
debug = require('debug')('doc88-download'),
program = require('commander'),
doc88util = require('./doc88util');
function myParseInt(string, defaultValue) {
var int = parseInt(string, 10);
return typeof int === 'number' ? int : defaultValue;
}
program
.version(require('./package.json').version)
.usage('[options] <url>')
.option('-o, --out-dir <dir>', 'output directory, default "./output"', 'output')
.option('-f, --plugin-flash-path <path>', 'flash Player plugin path')
.option('-d, --force-download', 'force download if PNG already exists')
.option('-w, --wait <ms>', 'milliseconds to wait before capture, default 500', myParseInt, 500)
.option('-c, --concurrent-worker <max>', 'max concurrent worker for capture, default 1', myParseInt, 1)
.option('-t, --scale-up-factor <t>', 'scale up factor, default 5', myParseInt, 5)
.option('-s, --skip-check', 'skip captured image checking')
.parse(process.argv);
if (!program.args || program.args.length == 0) program.help();
debug('args: out-dir: %s', program.outDir);
debug('args: plugin-flash-path: %s', program.pluginFlashPath);
debug('args: force-download: %s', program.forceDownload ? true : false);
debug('args: wait: %s', program.wait);
debug('args: concurrent-worker: %s', program.concurrentWorker);
debug('args: scale-up-factor: %s', program.scaleUpFactor);
debug('args: url: %s', program.args);
var ppapiFlashPath;
if (program.pluginFlashPath) {
ppapiFlashPath = program.pluginFlashPath;
} else if (process.platform == 'win32') {
ppapiFlashPath = path.join(__dirname, 'plugins', 'pepflashplayer.dll');
} else if (process.platform == 'linux') {
ppapiFlashPath = path.join(__dirname, 'plugins', 'libpepflashplayer.so');
} else if (process.platform == 'darwin') {
ppapiFlashPath = path.join(__dirname, 'plugins', 'PepperFlashPlayer.plugin');
}
app.commandLine.appendSwitch('ppapi-flash-path', ppapiFlashPath);
debug('append switch ppapi-flash-path="%s"', ppapiFlashPath);
var mainWin;
app.on('ready', main);
app.on('window-all-closed', function () { /* do nothing */ });
mkdirp(program.outDir);
function main() {
debug('evt: ready');
session.defaultSession.webRequest.onBeforeSendHeaders(function (details, callback) {
var cancel = details.resourceType != 'mainFrame';
// only allow critical script
if (details.resourceType == 'script' && [
'/jquery',
'/view-mini.js'
].map(function (value, index) {
return details.url.indexOf(value) != -1;
}).indexOf(true) != -1) {
cancel = false;
}
if (!cancel) debug('req: ' + details.method + ' - ' + details.url + ' [' + details.resourceType + `]`);
callback({
cancel: cancel
});
});
mainWin = new BrowserWindow({
show: false
});
console.log('Fetching ' + program.args[0]);
mainWin.loadURL(program.args[0]);
mainWin.webContents.once('did-finish-load', function () {
debug('evt: did-finish-load');
mainWin.webContents.executeJavaScript(`
require('electron').ipcRenderer.sendSync('document:retrieved', {
'html': document.documentElement.innerHTML,
'mpib': window.mpib,
'mtp': window.mtp,
'mhost': window.mhost,
'mhi': window.mhi,
'mpebt': window.mpebt,
'madif': window.madif,
'p_s': window.p_s,
'product_code': window.product_code
});
`);
});
}
ipc.on('document:retrieved', function (evt, props) {
debug('evt: document:retrieved')
console.log('Page loaded')
if (!props.mpib) {
console.log('Unable to get page context, please check network connection and URL');
return app.exit(1);
}
var pageContext = doc88util.decodePageContext(props.mpib).split(',');
var docDir = path.resolve(path.join(program.outDir, props.product_code));
var htmlDir = path.join(docDir, 'html');
var pngDir = path.join(docDir, 'png');
mkdirp.sync(docDir);
mkdirp.sync(htmlDir);
mkdirp.sync(pngDir);
if (props && props.product_code && props.html) {
var fullHtmlFile = path.join(docDir, 'raw.html');
fs.writeFileSync(fullHtmlFile, props.html);
console.log(`Wrote raw html file '${fullHtmlFile}'`);
}
var totalPageStrLength = String(props.mtp).length;
console.log('Total pages: ' + props.mtp);
var pageDimension = [];
var htmlFiles = [];
for (var i = 0; i < props.mtp; i++) {
var flashVars = doc88util.constructFlashParams(i + 1, pageContext, props.mtp, props.mhost, props.mhi, props.mpebt, props.madif, props.p_s);
var pageContextCodes = pageContext[i].split('-');
var width = parseInt(pageContextCodes[1]) * program.scaleUpFactor;
var height = parseInt(pageContextCodes[2]) * program.scaleUpFactor;
var html =
`<!DOCTYPE html>\n<html><body style="width:${width}px;height:${height}px;overflow:hidden;margin:0">` +
'<object type="application/x-shockwave-flash" data="http://assets.doc88.com/assets/swf/pv.swf?v=1.7" width="100%" height="100%" style="visibility: visible;">' +
'<param name="hasPriority" value="true"><param name="wmode" value="transparent"><param name="swliveconnect" value="true">' +
'<param name="FlashVars" value="' + flashVars + '">' +
'<param name="allowScriptAccess" value="always"></object></body></html>';
var htmlFile = path.join(htmlDir, `${padLeft(i + 1, totalPageStrLength)}.html`);
fs.writeFileSync(htmlFile, html);
htmlFiles.push(htmlFile);
pageDimension[i] = {
width: width,
height: height
};
}
console.log(`Wrote page html to directory '${htmlDir}'`);
session.defaultSession.webRequest.onBeforeSendHeaders(null);
async.eachOfLimit(htmlFiles, program.concurrentWorker, function (item, key, callback) {
var file = htmlFiles[key];
var pngFile = path.join(pngDir, `${padLeft(key + 1, totalPageStrLength)}.png`);
if (!program.forceDownload && fs.existsSync(pngFile)) {
console.log(`Image for page ${key + 1} already exists, skip processing '${file}'`)
return callback();
}
var win = new BrowserWindow({
useContentSize: true,
frame: false,
show: false,
autoHideMenuBar: true,
enableLargerThanScreen: true,
backgroundColor: '#FF0000',
webPreferences: {
plugins: true
}
});
win.webContents.once('did-finish-load', function () {
debug('evt: did-finish-load: `%s`', file);
win.setSize(pageDimension[key].width, pageDimension[key].height);
var resultImage;
var compareImage;
var redImage;
var checkCount = 0;
async.doWhilst(function (callback) {
setTimeout(function () {
debug("capturing image of '%s'", file);
win.capturePage(function (nativeImage) {
resultImage = nativeImage;
jimp.read(nativeImage.resize({ height: 480, quality: 'good' }).toPNG(), function (err, jimpImage) {
if (err) return callback(new Error('Unable to convert NativeImage to Jimp image'));
compareImage = jimpImage;
new jimp(compareImage.bitmap.width, compareImage.bitmap.height, 0xFF0000FF, function (err, jimgRedImage) {
if (err) return callback(new Error('Unable to create red image for comparison'));
redImage = jimgRedImage;
callback();
});
});
});
}, program.wait);
}, function () {
if (program.skipCheck) return false;
debug("checking image captured from '%s'", file);
var diff = jimp.diff(compareImage, redImage);
debug('image pixel diff: %s', diff.percent);
return checkCount++ < 10 && diff.percent < 0.005;
}, function (err) {
if (err) {
console.log("Unable to capture image of '%s': %s", file, err);
win.destroy();
callback();
} else {
fs.writeFile(pngFile, resultImage.toPNG(), function (err) {
if (err) {
console.log("Unable to write file '%s': %s", pngFile, err);
} else {
console.log(`Processed '${file}', image saved at '${pngFile}'`);
}
win.destroy();
callback();
});
}
});
});
debug('load ' + file);
win.loadURL('file://' + file);
}, function (err) {
if (err) console.err(err);
console.log('Processed all pages');
app.exit(0);
});
evt.returnValue = true;
mainWin.destroy();
function padLeft(nr, n, str) {
return Array(n - String(nr).length + 1).join(str || '0') + nr;
}
});