@fcc-cdc/it-events
Version:
IT Events Crawler of China
194 lines (193 loc) • 8.32 kB
JavaScript
;
var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var g = generator.apply(thisArg, _arguments || []), i, q = [];
return i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i;
function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }
function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
function fulfill(value) { resume("next", value); }
function reject(value) { resume("throw", value); }
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.descendDate = exports.mergeStream = exports.delay = exports.diffEvent = exports.eventList = exports.sameParentOf = exports.getCSSSelector = exports.CSSSelectorPrecision = exports.saveFile = exports.stringifyCSV = exports.makeDate = exports.logTime = void 0;
require("array-unique-proposal");
const fs_extra_1 = require("fs-extra");
const jsdom_1 = require("jsdom");
const path_1 = require("path");
const url_1 = require("url");
const util_1 = require("util");
const logTime = (target, context) => function (...input) {
const title = context.name.toString();
console.time(title);
const output = target.apply(this, input), end = () => {
console.log();
console.timeEnd(title);
console.log();
};
if (output instanceof Promise)
output.finally(end);
else
end();
return output;
};
exports.logTime = logTime;
function makeDate(raw) {
const date = new Date(((raw || '') + '')
.replace(/\s+(\d+:)/, 'T$1')
.replace(/\.\d{3}Z?/, '')
.replace(/[^\d\-T:]+/g, '-')
.replace(/^-*|-*$/g, ''));
if (!isNaN(+date))
return date;
}
exports.makeDate = makeDate;
function stringifyCSV(list) {
const header = [];
const body = list.map(item => {
const row = [];
for (const [key, value] of Object.entries(item)) {
let index = header.indexOf(key);
if (index === -1)
index += header.push(key);
row[index] = value;
}
return row;
});
return [header, ...body]
.map(row => row.map(value => JSON.stringify(value)).join(','))
.join('\n');
}
exports.stringifyCSV = stringifyCSV;
async function saveFile(data, ...pathParts) {
const path = (0, path_1.join)(...pathParts);
await (0, fs_extra_1.outputFile)(path, data);
console.log(`[save] ${path}`);
return path;
}
exports.saveFile = saveFile;
var CSSSelectorPrecision;
(function (CSSSelectorPrecision) {
CSSSelectorPrecision[CSSSelectorPrecision["Low"] = 0] = "Low";
CSSSelectorPrecision[CSSSelectorPrecision["Medium"] = 1] = "Medium";
CSSSelectorPrecision[CSSSelectorPrecision["High"] = 2] = "High";
})(CSSSelectorPrecision = exports.CSSSelectorPrecision || (exports.CSSSelectorPrecision = {}));
function getCSSSelector(toElement, fromElement = toElement.getRootNode(), precision = CSSSelectorPrecision.Low) {
const selectors = [];
do {
const { tagName, className, parentNode } = toElement;
const selector = tagName.toLowerCase() +
(className.trim()
? '.' + className.split(/\s+/).filter(Boolean).join('.')
: precision === CSSSelectorPrecision.High ||
(precision === CSSSelectorPrecision.Medium && !selectors[0])
? `:nth-child(${[...parentNode.children].indexOf(toElement) + 1})`
: '');
selectors.unshift(selector);
toElement = parentNode;
} while (fromElement ? fromElement !== toElement : toElement);
return selectors.join(' > ');
}
exports.getCSSSelector = getCSSSelector;
function sameParentOf(first, second) {
do {
const { parentNode } = first;
if (parentNode.contains(second))
return parentNode;
first = parentNode;
} while (first);
}
exports.sameParentOf = sameParentOf;
/**
* @param source - Web URL or document
* @param list - CSS Selector of Event container
* @param title - CSS Selector of Event title
* @param start - CSS Selector of Event start date
* @param address - CSS Selector of Event address
* @param banner - CSS Selector of Event banner image
* @param link - CSS Selector of Event URL
* @param tags - CSS Selector of Event tags
*/
function eventList(source, list, title, start, address, banner, link, tags) {
var _a;
return __asyncGenerator(this, arguments, function* eventList_1() {
const { window: { document } } = typeof source === 'string' ? yield __await(jsdom_1.JSDOM.fromURL(source)) : source;
const group = document.querySelectorAll(list);
if (!group[0])
return yield __await(void 0);
console.warn(document.URL);
for (const item of group) {
let data = {
title: item.querySelector(title).textContent.trim(),
start: item.querySelector(start).textContent.trim()
}, _banner_, _link_;
if (address)
data.address = (_a = item.querySelector(address)) === null || _a === void 0 ? void 0 : _a.textContent.trim();
if (banner && (_banner_ = item.querySelector(banner))) {
const { dataset, src } = _banner_;
for (const key in dataset)
if (dataset[key].startsWith('http')) {
data.banner = new url_1.URL(dataset[key]);
break;
}
if (!data.banner)
data.banner = new url_1.URL(src);
}
if (link && (_link_ = item.querySelector(link))) {
const { searchParams } = (data.link = new url_1.URL(_link_.href));
for (const key of searchParams.keys())
if (key.startsWith('utm_'))
searchParams.delete(key);
}
if (tags)
data.tags = Array.from(item.querySelectorAll(tags), item => item.textContent.trim());
yield yield __await(data);
}
});
}
exports.eventList = eventList;
function diffEvent(Old, New) {
var _a, _b;
const diff = {};
for (const key of new Set([...Object.keys(Old), ...Object.keys(New)]))
if (['start', 'end'].includes(key)) {
if (new Date(Old[key]) < new Date(New[key]))
diff[key] = New[key];
}
else if (((_a = Old[key]) === null || _a === void 0 ? void 0 : _a.length) < ((_b = New[key]) === null || _b === void 0 ? void 0 : _b.length))
diff[key] = New[key];
for (const key in diff)
return diff;
}
exports.diffEvent = diffEvent;
exports.delay = (0, util_1.promisify)(setTimeout);
/**
* @param list
* @param sorter - https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/sort#Parameters
* @param interval - Seconds
*
* @yield Data from `list` of Iterators
*/
function mergeStream(list, sorter, interval) {
return __asyncGenerator(this, arguments, function* mergeStream_1() {
const wait = Array(list.length);
while (true) {
for (let i = 0; i < wait.length; i++)
if (wait[i] === undefined)
wait[i] = (yield __await(list[i].next())).value;
const [top] = wait.filter(item => item != null).sort(sorter);
if (top === undefined)
break;
wait[wait.indexOf(top)] = undefined;
yield yield __await(top);
yield __await((0, exports.delay)(interval * 1000));
}
});
}
exports.mergeStream = mergeStream;
function descendDate({ start: A }, { start: B }) {
return +B - +A;
}
exports.descendDate = descendDate;