@near-lake/framework
Version:
JS Library to connect to the NEAR Lake S3 and stream the data
174 lines (173 loc) • 9.33 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __await = (this && this.__await) || function (v) { return this instanceof __await ? (this.v = v, this) : new __await(v); }
var __asyncGenerator = (this && this.__asyncGenerator) || function (thisArg, _arguments, generator) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var g = generator.apply(thisArg, _arguments || []), i, q = [];
return i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i;
function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }
function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }
function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }
function fulfill(value) { resume("next", value); }
function reject(value) { resume("throw", value); }
function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }
};
var __asyncValues = (this && this.__asyncValues) || function (o) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var m = o[Symbol.asyncIterator], i;
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.startStream = exports.stream = void 0;
const client_s3_1 = require("@aws-sdk/client-s3");
const s3fetchers_1 = require("./s3fetchers");
const primitives_1 = require("@near-lake/primitives");
const utils_1 = require("./utils");
const FATAL_ERRORS = ['CredentialsProviderError'];
function batchStream(config) {
return __asyncGenerator(this, arguments, function* batchStream_1() {
const s3Client = new client_s3_1.S3Client({
credentials: config.credentials,
region: config.s3RegionName,
endpoint: config.s3Endpoint,
forcePathStyle: config.s3ForcePathStyle
});
let startBlockHeight = config.startBlockHeight;
while (true) {
let blockHeights;
try {
blockHeights = yield __await((0, s3fetchers_1.listBlocks)(s3Client, config.s3BucketName, startBlockHeight, config.blocksPreloadPoolSize));
}
catch (err) {
if (FATAL_ERRORS.includes(err.name)) {
throw err;
}
console.error('Failed to list blocks. Retrying.', err);
continue;
}
if (blockHeights.length === 0) {
// Throttling when there are no new blocks
const NO_NEW_BLOCKS_THROTTLE_MS = 700;
yield __await((0, utils_1.sleep)(NO_NEW_BLOCKS_THROTTLE_MS));
continue;
}
yield yield __await(blockHeights.map(blockHeight => (0, s3fetchers_1.fetchStreamerMessage)(s3Client, config.s3BucketName, blockHeight)));
// eslint-disable-next-line prefer-spread
startBlockHeight = Math.max.apply(Math, blockHeights) + 1;
}
});
}
function fetchAhead(seq, stepsAhead = 10) {
return __asyncGenerator(this, arguments, function* fetchAhead_1() {
const queue = [];
while (true) {
while (queue.length < stepsAhead) {
queue.push(seq[Symbol.asyncIterator]().next());
}
const { value, done } = yield __await(queue.shift());
if (done)
return yield __await(void 0);
yield yield __await(value);
}
});
}
function stream(config) {
return __asyncGenerator(this, arguments, function* stream_1() {
var _a, e_1, _b, _c;
let lastProcessedBlockHash;
let startBlockHeight = config.startBlockHeight;
while (true) {
try {
try {
for (var _d = true, _e = (e_1 = void 0, __asyncValues(fetchAhead(batchStream(Object.assign(Object.assign({}, config), { startBlockHeight }))))), _f; _f = yield __await(_e.next()), _a = _f.done, !_a;) {
_c = _f.value;
_d = false;
try {
const promises = _c;
for (const promise of promises) {
const streamerMessage = yield __await(promise);
// check if we have `lastProcessedBlockHash` (might be not set only on start)
// compare lastProcessedBlockHash` with `streamerMessage.block.header.prevHash` of the current
// block (ensure we never skip blocks even if there is some incident on Lake Indexer side)
// retrieve the data from S3 if hashes don't match and repeat the main loop step
if (lastProcessedBlockHash &&
lastProcessedBlockHash !== streamerMessage.block.header.prevHash) {
throw new Error(`The hash of the last processed block ${lastProcessedBlockHash} doesn't match the prevHash ${streamerMessage.block.header.prevHash} of the new one ${streamerMessage.block.header.hash}.`);
}
yield yield __await(streamerMessage);
lastProcessedBlockHash = streamerMessage.block.header.hash;
startBlockHeight = streamerMessage.block.header.height + 1;
}
}
finally {
_d = true;
}
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (!_d && !_a && (_b = _e.return)) yield __await(_b.call(_e));
}
finally { if (e_1) throw e_1.error; }
}
}
catch (e) {
if (FATAL_ERRORS.includes(e.name)) {
throw e;
}
// TODO: Should there be limit for retries?
console.log('Retrying on error when fetching blocks', e, 'Refetching the data from S3 in 200ms');
yield __await((0, utils_1.sleep)(200));
}
}
});
}
exports.stream = stream;
function startStream(config, onStreamerMessageReceived) {
var _a, e_2, _b, _c;
return __awaiter(this, void 0, void 0, function* () {
let context = new primitives_1.LakeContext();
const queue = [];
try {
for (var _d = true, _e = __asyncValues(stream(config)), _f; _f = yield _e.next(), _a = _f.done, !_a;) {
_c = _f.value;
_d = false;
try {
const streamerMessage = _c;
// `queue` here is used to achieve throttling as streamer would run ahead without a stop
// and if we start from genesis it will spawn millions of `onStreamerMessageReceived` callbacks.
// This implementation has a pipeline that fetches the data from S3 while `onStreamerMessageReceived`
// is being processed, so even with a queue size of 1 there is already a benefit.
// TODO: Reliable error propagation for onStreamerMessageReceived?
let block = primitives_1.Block.fromStreamerMessage(streamerMessage);
queue.push(onStreamerMessageReceived(block, context));
if (queue.length > 10) {
yield queue.shift();
}
}
finally {
_d = true;
}
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (!_d && !_a && (_b = _e.return)) yield _b.call(_e);
}
finally { if (e_2) throw e_2.error; }
}
});
}
exports.startStream = startStream;