mwoffliner
Version:
MediaWiki ZIM scraper
185 lines • 6.21 kB
JavaScript
export default class RedisKvs {
redisClient;
dbName;
dehydratedKeys;
hydratedKeys;
constructor(redisClient, dbName, keyMapping) {
this.redisClient = redisClient;
this.dbName = dbName;
if (keyMapping) {
this.hydratedKeys = Object.values(keyMapping);
this.dehydratedKeys = Object.keys(keyMapping);
}
}
async get(prop) {
const val = await this.redisClient.hGet(this.dbName, prop);
return this.hydrateObject(val);
}
async getMany(prop) {
const replies = await this.redisClient.hmGet(this.dbName, prop);
const result = {};
for (let u = 0; u < prop.length; u += 1) {
result[prop[u]] = this.hydrateObject(replies[u]);
}
return result;
}
exists(prop) {
return this.redisClient.hExists(this.dbName, prop);
}
async existsMany(prop, blocking = false) {
// array of keys
const multi = this.redisClient.multi();
prop.forEach((index) => {
multi.hExists(this.dbName, index);
});
const replies = await multi.exec(!blocking);
const result = {};
for (let u = 0; u < prop.length; u += 1) {
result[prop[u]] = replies[u];
}
return result;
}
set(prop, val) {
return this.redisClient.hSet(this.dbName, prop, this.dehydrateObject(val));
}
async setMany(val) {
const keys = Object.keys(val);
if (!keys.length) {
return 0;
}
const data = {};
for (const key of keys) {
data[key] = this.dehydrateObject(val[key]);
}
return this.redisClient.hSet(this.dbName, data);
}
delete(prop) {
return this.redisClient.hDel(this.dbName, prop);
}
deleteMany(prop) {
return this.redisClient.hDel(this.dbName, prop);
}
keys() {
return this.redisClient.hKeys(this.dbName);
}
len() {
return this.redisClient.hLen(this.dbName);
}
/**
* Iteratively call function passed with batches of items from the underlying KVS hash, with a given number of workers
* working in parallel.
*
* This function scans the underlying KVS hash for items to batch. Batch size is "around" 10 items (Redis does
* not garantee exact batch size for SCAN operation). For every batch retrieved, passed "func" is called with the
* "items" retrieved.
*
* "numWorkers" are started in parallel, each processing a batch of items. Number of "runningWorkers" is passed to
* "func" called, mostly useful for logging / debugging purposes.
*
* This function returns when all items have been batched and passed to processing function.
*/
iterateItems(numWorkers, func) {
return new Promise((resolve, reject) => {
let runningWorkers = 0;
let isScanning = false;
let done = false;
let isResolved = false;
let scanCursor = 0;
const scan = async () => {
if (runningWorkers >= numWorkers || isScanning || isResolved) {
return;
}
if (done) {
if (!runningWorkers) {
isResolved = true;
resolve(null);
}
return;
}
isScanning = true;
try {
runningWorkers += 1;
const { cursor, items } = await this.scan(scanCursor);
scanCursor = cursor;
if (scanCursor === 0) {
done = true;
}
setImmediate(workerFunc, items);
}
catch (err) {
if (!isResolved) {
isResolved = true;
reject(err);
}
}
isScanning = false;
setImmediate(scan);
};
const workerFunc = async (items) => {
try {
await func(items, runningWorkers);
runningWorkers -= 1;
setImmediate(scan);
}
catch (err) {
if (!isResolved) {
isResolved = true;
reject(err);
}
}
};
scan();
});
}
async scan(scanCursor) {
const { cursor, tuples } = await this.redisClient.hScan(this.dbName, scanCursor);
const items = {};
for (const { field, value } of tuples) {
items[field] = this.hydrateObject(value);
}
return {
cursor,
items,
};
}
flush() {
return this.redisClient.del(this.dbName);
}
hydrateObject(value) {
if (!value) {
return value;
}
const obj = JSON.parse(value);
if (this.hydratedKeys && typeof obj === 'object') {
const mappedVal = {};
for (const key of Object.keys(obj)) {
const mapIndex = this.dehydratedKeys.indexOf(key);
if (mapIndex !== -1) {
mappedVal[this.hydratedKeys[mapIndex]] = obj[key];
}
else {
mappedVal[key] = obj[key];
}
}
return mappedVal;
}
return obj;
}
dehydrateObject(obj) {
let mappedVal = obj;
if (obj && this.dehydratedKeys && typeof obj === 'object') {
mappedVal = {};
for (const key of Object.keys(obj)) {
const mapIndex = this.hydratedKeys.indexOf(key);
if (mapIndex !== -1) {
mappedVal[this.dehydratedKeys[mapIndex]] = obj[key];
}
else {
mappedVal[key] = obj[key];
}
}
}
return typeof mappedVal === 'string' ? mappedVal : JSON.stringify(mappedVal);
}
}
//# sourceMappingURL=RedisKvs.js.map