mongodb-simplecrawler-queue
Version:
MongoDB FetchQueue Implementation for Simplecrawler
106 lines • 5.13 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const mongodb_1 = require("mongodb");
const operations_1 = require("./operations");
/*
Helper to convert from ms to other time units
*/
var TimeUnit;
(function (TimeUnit) {
TimeUnit[TimeUnit["SECOND"] = 1000] = "SECOND";
TimeUnit[TimeUnit["MINUTE"] = 60000] = "MINUTE";
})(TimeUnit || (TimeUnit = {}));
class Utils {
static wait(ms) {
return new Promise((resolve) => {
setTimeout(() => resolve(), ms);
});
}
static async runMonitoring(connConfig, countToFinish = 0) {
if (!connConfig || !connConfig.monitorConfig || !connConfig.monitorConfig.statisticCollectionName) {
console.log('Monitor Config configured to not run GC tasks');
return;
}
const client = new mongodb_1.MongoClient(connConfig.url, { useNewUrlParser: true });
await client.connect();
const db = client.db(connConfig.dbName || 'crawler');
const queue = db.collection(connConfig.collectionName || 'queue');
const statisticCollection = db.collection(connConfig.monitorConfig.statisticCollectionName);
let crawlerJobFinished = false;
let noChangesIterations = 0;
while (!crawlerJobFinished) {
// eslint-disable-next-line no-await-in-loop
const aggregationResult = await operations_1.Operations.monitorTask(queue, statisticCollection);
if (aggregationResult.totalCount === aggregationResult.fetchedCount) {
noChangesIterations += 1;
}
else {
noChangesIterations = 0;
}
if ((noChangesIterations === countToFinish) && (countToFinish !== 0)) {
const template = 'INFO: Count of total items equal to count of fetched items for 15 minutes. Stop the monitor';
console.log(`[${aggregationResult.timestamp}]: ${template}`);
crawlerJobFinished = true;
}
// wait for 1 minute
// eslint-disable-next-line no-await-in-loop
await Utils.wait(TimeUnit.MINUTE);
}
await client.close();
}
static async runGC(connectionConfig, countToFinish = 0) {
if (!connectionConfig || !connectionConfig.GCConfig || !connectionConfig.GCConfig.run) {
console.log('GC Config configured to not run GC tasks');
return;
}
const client = new mongodb_1.MongoClient(connectionConfig.url, { useNewUrlParser: true });
await client.connect();
const db = client.db(connectionConfig.dbName || 'crawler');
const queueCollection = db.collection(connectionConfig.collectionName || 'queue');
let totalCount = 0;
let fetchedCount = 0;
let noChangesIterations = 0;
let crawlerJobFinished = false;
// invalid period in milliseconds (=10 minutes)
const invalidPeriod = connectionConfig.GCConfig.msInterval || TimeUnit.MINUTE * 10;
while (!crawlerJobFinished) {
// eslint-disable-next-line no-await-in-loop
totalCount = await queueCollection.countDocuments();
// eslint-disable-next-line no-await-in-loop
fetchedCount = await queueCollection.find({ fetched: true }).count();
const res = operations_1.Operations.gcTask(queueCollection, invalidPeriod);
if (res && res.result && res.result.ok === 1) {
console.log(`${res.result.nModified} document were rolled back to Queued status`);
}
if (totalCount === fetchedCount) {
noChangesIterations += 1;
}
else {
noChangesIterations = 0;
}
if ((noChangesIterations === countToFinish) && (countToFinish !== 0)) {
const msg = ' INFO: Count of total items equal to count of fetched items for 15 minutes. Stop the GC';
console.log(`[${new Date().getTime()}]: ${msg}`);
crawlerJobFinished = true;
}
// eslint-disable-next-line no-await-in-loop
await Utils.wait(invalidPeriod);
}
await client.close();
}
static async dropQueue(connectionConfig) {
const client = new mongodb_1.MongoClient(connectionConfig.url, { useNewUrlParser: true });
await client.connect();
const db = client.db(connectionConfig.dbName);
const statisticCollName = connectionConfig.monitorConfig.statisticCollectionName || 'statistic';
await db.createCollection(connectionConfig.collectionName);
await db.createCollection(statisticCollName);
const queueCollection = db.collection(connectionConfig.collectionName);
const statisticCollection = db.collection(statisticCollName);
await statisticCollection.drop();
await queueCollection.drop();
await client.close();
}
}
exports.Utils = Utils;
//# sourceMappingURL=util.js.map