ptt-scr
Version:
168 lines (145 loc) • 5.74 kB
JavaScript
;
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = void 0;
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
var _request = _interopRequireDefault(require("./request"));
var _cheerio = _interopRequireDefault(require("cheerio"));
var _getUrlContent = _interopRequireDefault(require("./getUrlContent"));
var mainContentSelector = 'div#main-content';
var authorSelector = 'div#main-content .article-metaline:nth-child(1) .article-meta-value';
var titleSelector = 'div#main-content .article-metaline:nth-child(3) .article-meta-value';
var timeSelector = 'div#main-content .article-metaline:nth-child(4) .article-meta-value';
var pushSelector = 'div#main-content div.push';
var spanF2Selector = 'div#main-content span.f2';
var contentLinkSelector = 'div#main-content a';
var ipReg = /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/;
var isStr = function isStr(str) {
return typeof str === 'string';
};
var isUrl = function isUrl(str) {
return /^((https?|ftp|file):\/\/)?([\da-z\\.-]+)\.([a-z\\.]{2,6})([\\/\w \\.-]*)*\/?$/.test(str);
};
var initItemLink = function initItemLink(item) {
return isStr(item) ? isUrl(item) && {
link: item
} : item;
};
var _default =
/*#__PURE__*/
function () {
var _ref = (0, _asyncToGenerator2.default)(
/*#__PURE__*/
_regenerator.default.mark(function _callee(item) {
var session, html, $, author, title, datetime, publishIP, editedIP, reply, push, neutral, boo, urls, quoteFrom, quote, content;
return _regenerator.default.wrap(function _callee$(_context) {
while (1) {
switch (_context.prev = _context.next) {
case 0:
item = Object.assign({}, initItemLink(item));
if (item.link) {
_context.next = 3;
break;
}
return _context.abrupt("return", {});
case 3:
_context.next = 5;
return (0, _request.default)();
case 5:
session = _context.sent;
_context.next = 8;
return (0, _getUrlContent.default)(session, item.link);
case 8:
html = _context.sent;
$ = _cheerio.default.load(html, {
withDomLvl1: true,
normalizeWhitespace: true,
xmlMode: true
});
author = $(authorSelector).text();
title = $(titleSelector).text();
datetime = $(timeSelector).text();
publishIP = '';
editedIP = '';
$(spanF2Selector).contents().map(function (i, el) {
if (el.data && el.data.includes('發信站') && ipReg.test(el.data)) {
publishIP = ipReg.exec(el.data)[0];
}
if (el.data && el.data.includes('編輯') && ipReg.test(el.data)) {
editedIP = ipReg.exec(el.data)[0];
}
return '';
});
reply = [];
push = [];
neutral = [];
boo = [];
$(pushSelector).map(function (i, el) {
var tag = $(el).children().filter('.push-tag').text().trim();
var userid = $(el).children().filter('.push-userid').text().trim();
var content = $(el).children().filter('.push-content').text().trim().replace(':', '');
var ip = $(el).children().filter('.push-ipdatetime').text().trim().match(/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/g);
var time = $(el).children().filter('.push-ipdatetime').text().trim().match(/(\d{1,2}\/\d{1,2}\s\d{1,2}:\d{1,2})/g); // console.log(content);
// console.log(tag);
var item = {
tag: tag,
userid: userid,
content: content
};
ip && Object.assign(item, {
ip: ip.join()
});
time && Object.assign(item, {
time: time.join()
});
reply.push(item);
tag === '推' && push.push(item);
tag === '→' && neutral.push(item);
tag === '噓' && boo.push(item);
return 0;
});
urls = [];
$(contentLinkSelector).map(function (i, el) {
$(el).text() && urls.push($(el).text());
return urls;
});
quoteFrom = [];
$(mainContentSelector).children('.f2').each(function (index, ele) {
quoteFrom.push($(ele).text());
});
quote = [];
$("".concat(mainContentSelector)).children('.f6').each(function (index, ele) {
quote.push($(ele).text());
});
content = $(mainContentSelector).children().remove().end().text();
item.content = Object.assign({}, {
author: author,
title: title,
datetime: datetime,
urls: urls,
quoteFrom: quoteFrom,
quote: quote,
content: content,
publishIP: publishIP,
editedIP: editedIP,
reply: reply,
push: push,
boo: boo,
neutral: neutral
});
return _context.abrupt("return", item);
case 30:
case "end":
return _context.stop();
}
}
}, _callee, this);
}));
return function (_x) {
return _ref.apply(this, arguments);
};
}();
exports.default = _default;