ptt-scr
Version:
79 lines (64 loc) • 2.19 kB
JavaScript
;
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = void 0;
var _slicedToArray2 = _interopRequireDefault(require("@babel/runtime/helpers/slicedToArray"));
var _cheerio = _interopRequireDefault(require("cheerio"));
var prePageSelector = '.btn-group-paging a:nth-child(2)';
var listSelector = '.r-ent';
var titleSelector = '.title a';
var titleLinkSelector = '.title a';
var authorSelector = '.meta .author';
var dateSelector = '.meta .date';
var pushContentSelector = '.nrec';
var pushContent = function pushContent(value) {
return value ? value.trim() : '';
};
var getCategory = function getCategory(value, categoryPattern) {
if (value && typeof value === 'string') {
return value.match(categoryPattern) ? value.match(categoryPattern)[1].trim() : '標題格式錯誤';
}
};
var fullLink = function fullLink(value) {
return value ? "https://www.ptt.cc/".concat(value) : '';
};
var getPrePageNumber = function getPrePageNumber(value) {
if (value) {
var _$exec = /index(\d+).html/.exec(value),
_$exec2 = (0, _slicedToArray2.default)(_$exec, 2),
number = _$exec2[1];
return +number;
}
return 0;
};
var _default = function _default(html) {
var categoryPattern = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : /\[(.+)\]/;
var $ = _cheerio.default.load(html);
var items = [];
var prePageNumber = getPrePageNumber($(prePageSelector).attr('href'));
$(listSelector).each(function (i, el) {
$ = $.load(el);
var title = $(titleSelector).text();
var category = getCategory(title, categoryPattern);
var link = fullLink($(titleLinkSelector).attr('href'));
var author = $(authorSelector).text();
var push = pushContent($(pushContentSelector).text());
var date = $(dateSelector).text();
var item = {
title: title,
category: category,
link: link,
author: author,
push: push,
date: date
};
items.push(item);
});
return {
prePageNumber: prePageNumber,
items: items
};
};
exports.default = _default;