mongodb-simplecrawler-queue
Version:
MongoDB FetchQueue Implementation for Simplecrawler
113 lines • 5.51 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
const typings_1 = require("../typings");
/**
* Operations class - single tasks operation
*/
class Operations {
/**
* Single Monitoring task operation - collect all statistic data about state of the queue
* and put it to statistic collection as new Item
*
* @param queueCollection source QueueCollection of {@link QueueItem}
* @param statisticCollection destination Statistic Collection of {@link AggregationResult} items
*/
static async monitorTask(queueCollection, statisticCollection) {
const currentTime = new Date().getTime();
const totalCountPromise = queueCollection.countDocuments();
const fetchedCountPromise = queueCollection.countDocuments({ fetched: true });
const aggregationResultPromise = queueCollection.aggregate([
{ $match: { fetched: true } },
{
$group: {
_id: 'null',
actualDataSizeMax: { $max: '$stateData.actualDataSize' },
contentLengthMax: { $max: '$stateData.contentLength' },
downloadTimeMax: { $max: '$stateData.downloadTime' },
requestLatencyMax: { $max: '$stateData.requestLatency' },
requestTimeMax: { $max: '$stateData.requestTime' },
actualDataSizeMin: { $min: '$stateData.actualDataSize' },
contentLengthMin: { $min: '$stateData.contentLength' },
downloadTimeMin: { $min: '$stateData.downloadTime' },
requestLatencyMin: { $min: '$stateData.requestLatency' },
requestTimeMin: { $min: '$stateData.requestTime' },
actualDataSizeAvg: { $avg: '$stateData.actualDataSize' },
contentLengthAvg: { $avg: '$stateData.contentLength' },
downloadTimeAvg: { $avg: '$stateData.downloadTime' },
requestLatencyAvg: { $avg: '$stateData.requestLatency' },
requestTimeAvg: { $avg: '$stateData.requestTime' },
},
},
]);
const aggregationResultArrPromise = queueCollection.aggregate([
{
$group: {
_id: '$status',
total: { $sum: 1 },
},
},
]);
const aggregationResultCrawlersPromise = queueCollection.aggregate([
{
$group: {
_id: '$modifiedBy',
total: { $sum: 1 },
},
},
]);
try {
const [totalCountPromiseRes, fetchedCountPromiseRes, aggregationResultPromiseRes, aggregationResultArrPromiseRes, aggregationResultCrawlersRes,] = await Promise.all([
totalCountPromise,
fetchedCountPromise,
aggregationResultPromise,
aggregationResultArrPromise,
aggregationResultCrawlersPromise,
]);
let aggregationResult = await aggregationResultPromiseRes.next();
if (!aggregationResult) {
aggregationResult = {};
}
aggregationResult.totalCount = totalCountPromiseRes;
aggregationResult.fetchedCount = fetchedCountPromiseRes;
aggregationResult.timestamp = currentTime;
aggregationResult.timestampFinish = new Date().getTime();
const aggregationResultArr = await aggregationResultArrPromiseRes.toArray();
for (let i = 0; i < aggregationResultArr.length; i += 1) {
aggregationResult[aggregationResultArr[i]._id] = aggregationResultArr[i].total;
}
const aggregationResultCrawlers = await aggregationResultCrawlersRes.toArray();
aggregationResult.crawlers = [];
for (let i = 0; i < aggregationResultCrawlers.length; i += 1) {
aggregationResult.crawlers.push({
crawlerName: aggregationResultCrawlers[i]._id,
modifiedCount: aggregationResultCrawlers[i].total,
});
}
delete aggregationResult._id;
await statisticCollection.insertOne(aggregationResult);
return aggregationResult;
}
catch (error) {
console.log(error);
throw error instanceof Error ? error : new Error(error);
}
}
/**
* Single garbage collector task. Roll back all spooled, but not fetched items with "old" modification timestamp
*
* @param queueCollection source QueueCollection of {@link QueueItem}
* @param invalidPeriodMs invalid period in milliseconds, after this amount of ms item is considered as "old"
*/
static async gcTask(queueCollection, invalidPeriodMs) {
const currentTime = new Date().getTime();
return queueCollection.updateMany({
$and: [
{ fetched: { $ne: true } },
{ status: { $ne: typings_1.QueueItemStatus.Queued } },
{ modificationTimestamp: { $lt: currentTime - invalidPeriodMs } },
],
}, { $set: { status: typings_1.QueueItemStatus.Queued, modificationTimestamp: currentTime } });
}
}
exports.Operations = Operations;
//# sourceMappingURL=operations.js.map