UNPKG

ptt-scr

Version:
168 lines (145 loc) 5.74 kB
"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); Object.defineProperty(exports, "__esModule", { value: true }); exports.default = void 0; var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator")); var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator")); var _request = _interopRequireDefault(require("./request")); var _cheerio = _interopRequireDefault(require("cheerio")); var _getUrlContent = _interopRequireDefault(require("./getUrlContent")); var mainContentSelector = 'div#main-content'; var authorSelector = 'div#main-content .article-metaline:nth-child(1) .article-meta-value'; var titleSelector = 'div#main-content .article-metaline:nth-child(3) .article-meta-value'; var timeSelector = 'div#main-content .article-metaline:nth-child(4) .article-meta-value'; var pushSelector = 'div#main-content div.push'; var spanF2Selector = 'div#main-content span.f2'; var contentLinkSelector = 'div#main-content a'; var ipReg = /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/; var isStr = function isStr(str) { return typeof str === 'string'; }; var isUrl = function isUrl(str) { return /^((https?|ftp|file):\/\/)?([\da-z\\.-]+)\.([a-z\\.]{2,6})([\\/\w \\.-]*)*\/?$/.test(str); }; var initItemLink = function initItemLink(item) { return isStr(item) ? isUrl(item) && { link: item } : item; }; var _default = /*#__PURE__*/ function () { var _ref = (0, _asyncToGenerator2.default)( /*#__PURE__*/ _regenerator.default.mark(function _callee(item) { var session, html, $, author, title, datetime, publishIP, editedIP, reply, push, neutral, boo, urls, quoteFrom, quote, content; return _regenerator.default.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: item = Object.assign({}, initItemLink(item)); if (item.link) { _context.next = 3; break; } return _context.abrupt("return", {}); case 3: _context.next = 5; return (0, _request.default)(); case 5: session = _context.sent; _context.next = 8; return (0, _getUrlContent.default)(session, item.link); case 8: html = _context.sent; $ = _cheerio.default.load(html, { withDomLvl1: true, normalizeWhitespace: true, xmlMode: true }); author = $(authorSelector).text(); title = $(titleSelector).text(); datetime = $(timeSelector).text(); publishIP = ''; editedIP = ''; $(spanF2Selector).contents().map(function (i, el) { if (el.data && el.data.includes('發信站') && ipReg.test(el.data)) { publishIP = ipReg.exec(el.data)[0]; } if (el.data && el.data.includes('編輯') && ipReg.test(el.data)) { editedIP = ipReg.exec(el.data)[0]; } return ''; }); reply = []; push = []; neutral = []; boo = []; $(pushSelector).map(function (i, el) { var tag = $(el).children().filter('.push-tag').text().trim(); var userid = $(el).children().filter('.push-userid').text().trim(); var content = $(el).children().filter('.push-content').text().trim().replace(':', ''); var ip = $(el).children().filter('.push-ipdatetime').text().trim().match(/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/g); var time = $(el).children().filter('.push-ipdatetime').text().trim().match(/(\d{1,2}\/\d{1,2}\s\d{1,2}:\d{1,2})/g); // console.log(content); // console.log(tag); var item = { tag: tag, userid: userid, content: content }; ip && Object.assign(item, { ip: ip.join() }); time && Object.assign(item, { time: time.join() }); reply.push(item); tag === '推' && push.push(item); tag === '→' && neutral.push(item); tag === '噓' && boo.push(item); return 0; }); urls = []; $(contentLinkSelector).map(function (i, el) { $(el).text() && urls.push($(el).text()); return urls; }); quoteFrom = []; $(mainContentSelector).children('.f2').each(function (index, ele) { quoteFrom.push($(ele).text()); }); quote = []; $("".concat(mainContentSelector)).children('.f6').each(function (index, ele) { quote.push($(ele).text()); }); content = $(mainContentSelector).children().remove().end().text(); item.content = Object.assign({}, { author: author, title: title, datetime: datetime, urls: urls, quoteFrom: quoteFrom, quote: quote, content: content, publishIP: publishIP, editedIP: editedIP, reply: reply, push: push, boo: boo, neutral: neutral }); return _context.abrupt("return", item); case 30: case "end": return _context.stop(); } } }, _callee, this); })); return function (_x) { return _ref.apply(this, arguments); }; }(); exports.default = _default;