UNPKG

mongodb-simplecrawler-queue

Version:
113 lines 5.51 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const typings_1 = require("../typings"); /** * Operations class - single tasks operation */ class Operations { /** * Single Monitoring task operation - collect all statistic data about state of the queue * and put it to statistic collection as new Item * * @param queueCollection source QueueCollection of {@link QueueItem} * @param statisticCollection destination Statistic Collection of {@link AggregationResult} items */ static async monitorTask(queueCollection, statisticCollection) { const currentTime = new Date().getTime(); const totalCountPromise = queueCollection.countDocuments(); const fetchedCountPromise = queueCollection.countDocuments({ fetched: true }); const aggregationResultPromise = queueCollection.aggregate([ { $match: { fetched: true } }, { $group: { _id: 'null', actualDataSizeMax: { $max: '$stateData.actualDataSize' }, contentLengthMax: { $max: '$stateData.contentLength' }, downloadTimeMax: { $max: '$stateData.downloadTime' }, requestLatencyMax: { $max: '$stateData.requestLatency' }, requestTimeMax: { $max: '$stateData.requestTime' }, actualDataSizeMin: { $min: '$stateData.actualDataSize' }, contentLengthMin: { $min: '$stateData.contentLength' }, downloadTimeMin: { $min: '$stateData.downloadTime' }, requestLatencyMin: { $min: '$stateData.requestLatency' }, requestTimeMin: { $min: '$stateData.requestTime' }, actualDataSizeAvg: { $avg: '$stateData.actualDataSize' }, contentLengthAvg: { $avg: '$stateData.contentLength' }, downloadTimeAvg: { $avg: '$stateData.downloadTime' }, requestLatencyAvg: { $avg: '$stateData.requestLatency' }, requestTimeAvg: { $avg: '$stateData.requestTime' }, }, }, ]); const aggregationResultArrPromise = queueCollection.aggregate([ { $group: { _id: '$status', total: { $sum: 1 }, }, }, ]); const aggregationResultCrawlersPromise = queueCollection.aggregate([ { $group: { _id: '$modifiedBy', total: { $sum: 1 }, }, }, ]); try { const [totalCountPromiseRes, fetchedCountPromiseRes, aggregationResultPromiseRes, aggregationResultArrPromiseRes, aggregationResultCrawlersRes,] = await Promise.all([ totalCountPromise, fetchedCountPromise, aggregationResultPromise, aggregationResultArrPromise, aggregationResultCrawlersPromise, ]); let aggregationResult = await aggregationResultPromiseRes.next(); if (!aggregationResult) { aggregationResult = {}; } aggregationResult.totalCount = totalCountPromiseRes; aggregationResult.fetchedCount = fetchedCountPromiseRes; aggregationResult.timestamp = currentTime; aggregationResult.timestampFinish = new Date().getTime(); const aggregationResultArr = await aggregationResultArrPromiseRes.toArray(); for (let i = 0; i < aggregationResultArr.length; i += 1) { aggregationResult[aggregationResultArr[i]._id] = aggregationResultArr[i].total; } const aggregationResultCrawlers = await aggregationResultCrawlersRes.toArray(); aggregationResult.crawlers = []; for (let i = 0; i < aggregationResultCrawlers.length; i += 1) { aggregationResult.crawlers.push({ crawlerName: aggregationResultCrawlers[i]._id, modifiedCount: aggregationResultCrawlers[i].total, }); } delete aggregationResult._id; await statisticCollection.insertOne(aggregationResult); return aggregationResult; } catch (error) { console.log(error); throw error instanceof Error ? error : new Error(error); } } /** * Single garbage collector task. Roll back all spooled, but not fetched items with "old" modification timestamp * * @param queueCollection source QueueCollection of {@link QueueItem} * @param invalidPeriodMs invalid period in milliseconds, after this amount of ms item is considered as "old" */ static async gcTask(queueCollection, invalidPeriodMs) { const currentTime = new Date().getTime(); return queueCollection.updateMany({ $and: [ { fetched: { $ne: true } }, { status: { $ne: typings_1.QueueItemStatus.Queued } }, { modificationTimestamp: { $lt: currentTime - invalidPeriodMs } }, ], }, { $set: { status: typings_1.QueueItemStatus.Queued, modificationTimestamp: currentTime } }); } } exports.Operations = Operations; //# sourceMappingURL=operations.js.map