cejs
Version:
A JavaScript module framework that is simple to use.
554 lines (491 loc) • 18.3 kB
JavaScript
/**
* @name CeL module for downloading qTcms version 20170501-20190606010315
* comics.
*
* @fileoverview 本檔案包含了解析並處理、批量下載中國大陸常見漫畫管理系統: 晴天漫画CMS (晴天漫画系统 晴天漫画程序, 晴天新漫画系统)
* PC端网站 + 手机端网站(行動版 mobile version) 的工具。
*
* <code>
CeL.qTcms2017(configuration).start(work_id);
</code>
*
* modify from 9mdm.js→dagu.js, mh160.js
*
* @see qTcms 晴天漫画程序 晴天漫画系统 http://manhua.qingtiancms.com/
*
* @since 2019/2/3 模組化。
*/
// More examples:
// @see comic.cmn-Hans-CN/nokiacn.js
;
// --------------------------------------------------------------------------------------------
// 不採用 if 陳述式,可以避免 Eclipse JSDoc 與 format 多縮排一層。
typeof CeL === 'function' && CeL.run({
// module name
name : 'application.net.work_crawler.sites.qTcms2017',
require : 'application.net.work_crawler.',
// 設定不匯出的子函式。
no_extend : '*',
// 為了方便格式化程式碼,因此將 module 函式主體另外抽出。
code : module_code
});
function module_code(library_namespace) {
// requiring
// --------------------------------------------------------------------------------------------
var default_configuration = {
// 所有的子檔案要修訂註解說明時,應該都要順便更改在CeL.application.net.comic中Comic_site.prototype內的母comments,並以其為主體。
// 因為要經過轉址,所以一個圖一個圖來。
// one_by_one : true,
// base_URL : '',
// fs.readdirSync('.').forEach(function(d){if(/^\d+\s/.test(d))fs.renameSync(d,'manhua-'+d);})
// fs.readdirSync('.').forEach(function(d){if(/^manhua-/.test(d))fs.renameSync(d,d.replace(/^manhua-/,''));})
// 所有作品都使用這種作品類別catalog前綴。
// common_catalog : 'manhua',
// 規範 work id 的正規模式;提取出引數中的作品id 以回傳。
extract_work_id : function(work_information) {
if ((this.common_catalog ? /^[a-z\-\d]+$/ : /^[a-z]+_[a-z\-\d]+$/)
.test(work_information))
return work_information;
},
// --------------------------------------
// search comic via web page
// 解析 作品名稱 → 作品id get_work()
search_URL_web : 'statics/search.aspx?key=',
parse_search_result_web : function(html, get_label) {
// console.log(html);
html = html.between('<div class="cy_list">', '</div>');
// console.log(html);
var id_list = [], id_data = [];
html.each_between('<li class="title">', '</li>', function(token) {
// console.log(token);
var matched = token.match(
// [ id, title ]
/<a href="\/([a-z]+\/[a-z\-\d]+)\/"[^<>]*?>([^<>]+)/);
// console.log(matched);
if (this.common_catalog
// 去掉所有不包含作品類別catalog前綴者。
&& !matched[1].startsWith(this.common_catalog + '/'))
return;
id_list.push(this.common_catalog
//
? matched[1].slice((this.common_catalog + '/').length)
// catalog/latin name
: matched[1].replace('/', '_'));
id_data.push(get_label(matched[2]));
}, this);
// console.log([ id_list, id_data ]);
return [ id_list, id_data ];
},
// --------------------------------------
// default: search comic via API
// copy from 360taofu.js
// 解析 作品名稱 → 作品id get_work()
search_URL : function(work_title) {
return [ 'statics/qingtiancms.ashx', {
cb : 'jQuery' + ('1.7.2' + Math.random()).replace(/\D/g, "")
// @see .expando
+ '_' + Date.now(),
key : work_title,
action : 'GetSear1',
_ : Date.now()
} ];
},
parse_search_result : function(html, get_label) {
// console.log(html);
var data = eval(html.between('(', {
tail : ')'
}));
// console.log(data);
return [ data, data ];
},
id_of_search_result : function(data) {
// console.log(data);
// PC version: .u: webdir + classid1pinyin + titlepinyin + "/"
// webdir: "/"
// classid1pinyin: latin + "/"
// titlepinyin: latin
var matched = data.u
// mobile version
|| data.url;
matched = matched.match(/(?:\/|^)([a-z]+)\/([a-z\-\d]+)\/$/);
// assert: !!matched === true
if (!this.common_catalog)
return matched[1] + '_' + matched[2];
// assert: this.common_catalog === matched[1]
return matched[2];
},
title_of_search_result : 't',
// --------------------------------------
// for mobile version
// 解析 作品名稱 → 作品id get_work()
search_URL_mobile : function(work_title) {
return [ 'statics/qingtiancms.ashx', {
action : 'GetWapSear1',
key : work_title
} ];
},
parse_search_result_mobile : function(html, get_label) {
/**
* @example <code>
{"result": 1000,"msg": "提交成功","data": [{name:'读书成圣',last_update_chapter_name:'014 禁忌十八式',last_updatetime:'',types:'',authors:'',url:'/rexue/dushuchengsheng/'}],"page_data": ""}
</code>
*/
// console.log(JSON.stringify(html));
var data;
try {
eval('data=' + html);
data = data.data;
} catch (e) {
// e.g., "{err!}"
data = [];
}
// console.log(data);
return [ data, data ];
},
title_of_search_result_mobile : 'name',
// --------------------------------------
// 取得作品的章節資料。 get_work_data()
work_URL : function(work_id) {
return (this.common_catalog ? this.common_catalog + '/' + work_id
// replace only the first '_' to '/'
: work_id.replace('_', '/')) + '/';
},
parse_work_data : function(html, get_label, extract_work_data) {
// console.log(html);
var work_data = html.between('qingtiancms_Details=', ';var');
if (work_data) {
/**
* PC version:
*
* @example <code>
var qingtiancms_Details={G_mubanpage:".html",id:"6638",hits:"9454",webdir:"/",pinglunid:"10",pinglunid1:"",pinglunid2:"cytdbnhsU",pinglunid3:"prod_1368b8102b9177303c660debbbbd257c",title:"读书成圣",classid1pinyin:"rexue/",titlepinyin:"dushuchengsheng"};var uyan_config = {'su':'/6638/'};
</code>
*/
eval('work_data=' + work_data);
} else {
// dagu.js: has NO `qingtiancms_Details`
work_data = Object.create(null);
}
// PC version: nokiacn.js, iqg365.js, 733dm.js
extract_work_data(work_data, html.between(
// <div class="cy_title">\n <h1>相合之物</h1>
'<h1>', ' id="comic-description">'),
/<span>([^<>:]+):([\s\S]*?)<\/span>/g);
// PC version: 360taofu.js
extract_work_data(work_data, html.between(
// <div class="mh-date-info fl">\n <div class="mh-date-info-name">
'<div class="mh-date-info', '<div class="work-author">'),
// <span class="one"> 作者: <em>... </span>
// <span> 人气: <em... </span>
// 人气: 收藏数: 吐槽: 状态:
/<span[^<>]*>([^<>:]+):([\s\S]*?)<\/span>/g);
// PC version 共通
extract_work_data(work_data, html.between(
// <div class="cy_zhangjie">...<div class="cy_zhangjie_top">
'<div class="cy_zhangjie_top">',
// <div class="cy_plist" id="play_0">
' class="cy_plist"'), /<p>([^<>:]+):([\s\S]*?)<\/p>/g);
// PC version, mobile version 共通
extract_work_data(work_data, html);
Object.assign(work_data, this.is_mobile ? {
// 必要屬性:須配合網站平台更改。
last_update : html.between('<span class="date">', '</span>'),
// 選擇性屬性:須配合網站平台更改。
// 網頁中列的description比meta中的完整。
description : get_label(html.between(
// 友绘漫画网
// <p class="txtDesc autoHeight">介绍:...</p>
'<p class="txtDesc autoHeight">', '</p>'))
} : {
// 避免覆寫
qTid : work_data.id,
// 必要屬性:須配合網站平台更改。
title : work_data.title
// nokiacn.js, iqg365.js, 733dm.js
|| get_label(html.between('<h1>', '</h1>')),
author : work_data.作者,
status : work_data.状态,
last_update : work_data.更新时间,
latest_chapter : work_data.最新话,
latest_chapter_url : html.between('最新话:<a href="', '"'),
// 選擇性屬性:須配合網站平台更改。
评分 : work_data.评分 || get_label(html.between(
// 360taofu.js: <p class="fl">评分:<strong class="ui-text-orange"
// id="comicStarDis">...</p>
' id="comicStarDis">', '</p>')),
// 網頁中列的description比meta中的完整。
description : get_label(html.between(
// nokiacn.js, iqg365.js, 733dm.js
// <p id="comic-description">...</p>
' id="comic-description">', '</')) || get_label(html.between(
// 360taofu.js: <div id="workint" class="work-ov">
' id="workint"', '</div>').between('>'))
});
// console.log(work_data);
return work_data;
},
get_chapter_list : function(work_data, html, get_label) {
html = html.between('<div class="cy_plist', '</div>')
// mobile version: <div id="list">
// <ul class="Drama autoHeight" id="mh-chapter-list-ol-0">
// 88bag.js: <div id="list" >
|| html.between('<div id="list"', '</ul>');
// console.log(html);
/**
* <code>
76.js: <li><a target="_blank" href="/chuanyue/tangyinzaiyijie/351280.html"><p>杀手唐寅</p><i></i></a></li>
</code>
*/
var matched, PATTERN_chapter =
// matched: [ all, url, inner ]
/<li><a [^<>]*?href="([^<>"]+)"[^<>]*>([\s\S]+?)<\/li>/g;
work_data.chapter_list = [];
while (matched = PATTERN_chapter.exec(html)) {
var chapter_data = {
url : matched[1],
title : get_label(matched[2])
};
work_data.chapter_list.push(chapter_data);
}
// PC version, mobile version 共通
work_data.chapter_list.reverse();
// console.log(work_data.chapter_list);
},
parse_chapter_data : function(html, work_data, get_label, chapter_NO) {
// modify from mh160.js
// console.log(html);
var chapter_data = html.between('qTcms_S_m_murl_e="', '"');
// console.log(chapter_data);
if (chapter_data) {
// 對於非utf-8編碼之中文,不能使用 atob()
// e.g., http://www.aikanmh.cn/xuanhuan/zhutianji/499631.html
chapter_data = base64_decode(chapter_data)
.split("$qingtiandy$");
}
if (!Array.isArray(chapter_data)) {
library_namespace.warn({
// gettext_config:{"id":"unable-to-parse-chapter-data-for-«$1»-§$2"}
T : [ '無法解析《%1》§%2 之章節資料!', work_data.title, chapter_NO ]
});
return;
}
// console.log(chapter_data);
// console.log(JSON.stringify(chapter_data));
// console.log(chapter_data.length);
// library_namespace.set_debug(6);
// e.g., http://m.88bag.net/rexue/zuomeigongyu/36279.html
// @see
// http://m.88bag.net/template/wap1/css/d7s/js/show.20170501.js?20190722091626
if (chapter_data.length === 1
&& /^(--|\+)https?:\/\//.test(chapter_data[0])) {
chapter_data = {
limited : chapter_data[0].startsWith('+') ? '对不起,该章节已经下架!!本站仅提供检索服务,请尊重作品版权'
: '请点击下方链接开始观看本期漫画:' + chapter_data[0].slice(2)
};
return chapter_data;
}
// 設定必要的屬性。
chapter_data = {
image_list : chapter_data.map(function(url) {
// 2019/10/20: 採用 base64_decode() 取代 atob() 後,
// aikanmh 不可再 encodeURI()。
// url = encodeURI(url);
// 获取当前图片 function f_qTcms_Pic_curUrl_realpic(v)
// http://www.xatxwh.com/template/skin1/css/d7s/js/show.20170501.js?20190117082944
// f_qTcms_Pic_curUrl() → f_qTcms_Pic_curUrl_realpic(v) @
// http://www.nokiacn.net/template/skin2/css/d7s/js/show.20170501.js?20180805095630
if (this.for_each_image) {
// 733dm.js
// for_each_image:function(url,parameters,base64_encode){return(url);}
url = this.for_each_image(url, {
qTcms_S_m_id : html
.between('qTcms_Pic_m_if="', '"'),
qTcms_S_p_id : html.between('qTcms_S_p_id="', '"')
}, base64_encode);
} else if (url.startsWith('/')) {
// e.g., nokiacn.js
var image_base_url = this.image_base_url;
if (!image_base_url && image_base_url !== '') {
// default: url = qTcms_m_weburl + url;
image_base_url = html.between('qTcms_m_weburl="',
'"');
}
url = image_base_url + url;
} else if (html.between('qTcms_Pic_m_if="', '"') !== "2") {
// e.g.,
// http://www.nokiacn.net/lianai/caozuo100/134257.html
url = url.replace(/\?/gi, "a1a1")
.replace(/&/gi, "b1b1").replace(/%/gi, "c1c1");
url = (this.qTcms_m_indexurl
// this.qTcms_m_indexurl: e.g., 517.js
|| html.between('qTcms_m_indexurl="', '"') || '/')
+ "statics/pic/?p="
+ escape(url)
+ "&picid="
+ html.between('qTcms_S_m_id="', '"')
+ "&m_httpurl="
+ escape(base64_decode(html.between(
'qTcms_S_m_mhttpurl="', '"')));
// Should get Status Code: 302 Found
}
return {
url : url
};
}, this)
};
// console.log(JSON.stringify(chapter_data));
return chapter_data;
}
};
// --------------------------------------------------------------------------------------------
// http://www.aikanmh.cn/template/skin1/css/d7s/js/show.20170501.js?20191014154954
function utf8_decode(str_data) {
var tmp_arr = [], i = 0, ac = 0, c1 = 0, c2 = 0, c3 = 0;
str_data += '';
while (i < str_data.length) {
c1 = str_data.charCodeAt(i);
if (c1 < 128) {
tmp_arr[ac++] = String.fromCharCode(c1);
i++;
} else if ((c1 > 191) && (c1 < 224)) {
c2 = str_data.charCodeAt(i + 1);
tmp_arr[ac++] = String.fromCharCode(((c1 & 31) << 6)
| (c2 & 63));
i += 2;
} else {
c2 = str_data.charCodeAt(i + 1);
c3 = str_data.charCodeAt(i + 2);
tmp_arr[ac++] = String.fromCharCode(((c1 & 15) << 12)
| ((c2 & 63) << 6) | (c3 & 63));
i += 3;
}
}
return tmp_arr.join('');
}
// 對於非utf-8編碼之中文,不能使用 atob()
function base64_decode(data) {
var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
var o1, o2, o3, h1, h2, h3, h4, bits, i = 0, ac = 0, dec = "", tmp_arr = [];
if (!data) {
return data;
}
data += '';
do {
h1 = b64.indexOf(data.charAt(i++));
h2 = b64.indexOf(data.charAt(i++));
h3 = b64.indexOf(data.charAt(i++));
h4 = b64.indexOf(data.charAt(i++));
bits = h1 << 18 | h2 << 12 | h3 << 6 | h4;
o1 = bits >> 16 & 0xff;
o2 = bits >> 8 & 0xff;
o3 = bits & 0xff;
if (h3 == 64) {
tmp_arr[ac++] = String.fromCharCode(o1);
} else if (h4 == 64) {
tmp_arr[ac++] = String.fromCharCode(o1, o2);
} else {
tmp_arr[ac++] = String.fromCharCode(o1, o2, o3);
}
} while (i < data.length);
dec = tmp_arr.join('');
dec = utf8_decode(dec);
return dec;
}
// ------------------------------------------
function utf8_encode(argString) {
var string = (argString + '');
var utftext = "";
var start, end;
var stringl = 0;
start = end = 0;
stringl = string.length;
for (var n = 0; n < stringl; n++) {
var c1 = string.charCodeAt(n);
var enc = null;
if (c1 < 128) {
end++;
} else if (c1 > 127 && c1 < 2048) {
enc = String.fromCharCode((c1 >> 6) | 192)
+ String.fromCharCode((c1 & 63) | 128);
} else {
enc = String.fromCharCode((c1 >> 12) | 224)
+ String.fromCharCode(((c1 >> 6) & 63) | 128)
+ String.fromCharCode((c1 & 63) | 128);
}
if (enc !== null) {
if (end > start) {
utftext += string.substring(start, end);
}
utftext += enc;
start = end = n + 1;
}
}
if (end > start) {
utftext += string.substring(start, string.length);
}
return utftext;
}
// btoa()
function base64_encode(data) {
var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
var o1, o2, o3, h1, h2, h3, h4, bits, i = 0, ac = 0, enc = "", tmp_arr = [];
if (!data) {
return data;
}
data = utf8_encode(data + '');
do {
o1 = data.charCodeAt(i++);
o2 = data.charCodeAt(i++);
o3 = data.charCodeAt(i++);
bits = o1 << 16 | o2 << 8 | o3;
h1 = bits >> 18 & 0x3f;
h2 = bits >> 12 & 0x3f;
h3 = bits >> 6 & 0x3f;
h4 = bits & 0x3f;
tmp_arr[ac++] = b64.charAt(h1) + b64.charAt(h2) + b64.charAt(h3)
+ b64.charAt(h4);
} while (i < data.length);
enc = tmp_arr.join('');
switch (data.length % 3) {
case 1:
enc = enc.slice(0, -2) + '==';
break;
case 2:
enc = enc.slice(0, -1) + '=';
break;
}
return enc;
}
// --------------------------------------------------------------------------------------------
function new_qTcms2017_comics_crawler(configuration) {
var using_configuration = Object.clone(default_configuration);
if (configuration.using_web_search) {
Object.assign(using_configuration, {
search_URL : using_configuration.search_URL_web,
parse_search_result :
//
using_configuration.parse_search_result_web,
id_of_search_result : null,
title_of_search_result : null
});
} else if (configuration.is_mobile === undefined) {
using_configuration.is_mobile = configuration.base_URL
.includes('://m.');
if (using_configuration.is_mobile) {
Object.assign(using_configuration, {
search_URL : using_configuration.search_URL_mobile,
parse_search_result :
//
using_configuration.parse_search_result_mobile,
title_of_search_result :
//
using_configuration.title_of_search_result_mobile
});
}
}
// 每次呼叫皆創建一個新的實體。
return new library_namespace.work_crawler(Object.assign(
using_configuration, configuration));
}
return new_qTcms2017_comics_crawler;
}