UNPKG

ptt-scr

Version:
79 lines (64 loc) 2.19 kB
"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); Object.defineProperty(exports, "__esModule", { value: true }); exports.default = void 0; var _slicedToArray2 = _interopRequireDefault(require("@babel/runtime/helpers/slicedToArray")); var _cheerio = _interopRequireDefault(require("cheerio")); var prePageSelector = '.btn-group-paging a:nth-child(2)'; var listSelector = '.r-ent'; var titleSelector = '.title a'; var titleLinkSelector = '.title a'; var authorSelector = '.meta .author'; var dateSelector = '.meta .date'; var pushContentSelector = '.nrec'; var pushContent = function pushContent(value) { return value ? value.trim() : ''; }; var getCategory = function getCategory(value, categoryPattern) { if (value && typeof value === 'string') { return value.match(categoryPattern) ? value.match(categoryPattern)[1].trim() : '標題格式錯誤'; } }; var fullLink = function fullLink(value) { return value ? "https://www.ptt.cc/".concat(value) : ''; }; var getPrePageNumber = function getPrePageNumber(value) { if (value) { var _$exec = /index(\d+).html/.exec(value), _$exec2 = (0, _slicedToArray2.default)(_$exec, 2), number = _$exec2[1]; return +number; } return 0; }; var _default = function _default(html) { var categoryPattern = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : /\[(.+)\]/; var $ = _cheerio.default.load(html); var items = []; var prePageNumber = getPrePageNumber($(prePageSelector).attr('href')); $(listSelector).each(function (i, el) { $ = $.load(el); var title = $(titleSelector).text(); var category = getCategory(title, categoryPattern); var link = fullLink($(titleLinkSelector).attr('href')); var author = $(authorSelector).text(); var push = pushContent($(pushContentSelector).text()); var date = $(dateSelector).text(); var item = { title: title, category: category, link: link, author: author, push: push, date: date }; items.push(item); }); return { prePageNumber: prePageNumber, items: items }; }; exports.default = _default;