UNPKG

crawfishcloud

Version:
84 lines (83 loc) 4.05 kB
import { Readable } from 'stream'; import { isMatch } from 'micromatch'; import { asVfile, asVinyl, asS3 } from './exporters'; import { s3urlToConfigWfilters, s3ConfigToUrl, loadObjectList } from './utils'; export const crawler = function (input, ...filters) { var _a; const config = { filters, body: true, MaxKeys: (_a = input.maxkeys) !== null && _a !== void 0 ? _a : 1000, BucketsPrefixes: filters.map(s3urlToConfigWfilters), ...input, }; const { s3c } = input; const { MaxKeys } = config; const iter = async function* (i, ...filters) { var _a; const bucketPrefixes = filters.length > 0 ? filters.map(s3urlToConfigWfilters) : config.BucketsPrefixes; for (let j = bucketPrefixes.length - 1; j >= 0; j--) { const { Bucket, Key, prefix, suffix } = bucketPrefixes[j]; const objListResp = await s3c.listObjectsV2({ Bucket, MaxKeys, Prefix: prefix, ContinuationToken: i.NextContinuationToken }).promise(); const keyList = (_a = objListResp.Contents) !== null && _a !== void 0 ? _a : []; const keyListFiltered = await Promise.all(keyList.filter(e => { var _a; return isMatch((_a = e.Key) !== null && _a !== void 0 ? _a : '', `${prefix}${suffix}`, { bash: true }); })); if (!i.body) { const mappedList = await Promise.all(keyListFiltered.map((s3obj, k) => i.using({ ...s3obj, Bucket, Body: '' }, k))); yield* mappedList; if (objListResp.NextContinuationToken) { yield* iter({ body: i.body, using: i.using, NextContinuationToken: objListResp.NextContinuationToken }, s3ConfigToUrl({ Bucket, Key })); } } else { const namedObjList = await loadObjectList(s3c, Bucket, ...keyListFiltered); const r = await Promise.all(namedObjList.map((s3ObjwBody, k) => i.using({ ...s3ObjwBody, Bucket, Body: s3ObjwBody.Body }, k))); yield* r; if (objListResp.NextContinuationToken) { yield* iter({ body: i.body, using: i.using, NextContinuationToken: objListResp.NextContinuationToken }, s3ConfigToUrl({ Bucket, Key })); } } } }; const stream = (i, ...filters) => { return Readable.from(iter(i, ...filters), { objectMode: true }); }; const all = async (i, ...filters) => { const acc = []; for await (const f of iter(i, ...filters)) { acc.push(f); } return acc; }; const reduce = async (init, using, reducer, ...filters) => { let j = 0; for await (const elem of iter({ body: true, using }, ...filters)) { init = reducer(init, elem, j); j++; } return init; }; return { iter, all, stream, reduce, vfileStream: (...filters) => crawler(input).stream({ body: true, using: asVfile }, ...filters), vinylStream: (...filters) => crawler(input).stream({ body: true, using: asVinyl }, ...filters), s3Stream: (...filters) => crawler({ ...input }).stream({ body: true, using: asS3 }, ...filters), vfileIter: (...filters) => crawler(input).iter({ body: true, using: asVfile }, ...filters), vinylIter: (...filters) => crawler(input).iter({ body: true, using: asVinyl }, ...filters), s3Iter: (...filters) => crawler({ ...input }).iter({ body: true, using: asS3 }, ...filters), vfileArray: (...filters) => crawler(input).all({ body: true, using: asVfile }, ...filters), vinylArray: (...filters) => crawler(input).all({ body: true, using: asVinyl }, ...filters), s3Array: (...filters) => crawler({ ...input }).all({ body: true, using: asS3 }, ...filters), }; }; export default crawler;