@attestate/crawler
Version:
@attestate/crawler is a tool chain to retrieve on-chain data from Ethereum.
618 lines (559 loc) • 13.9 kB
JavaScript
//@format
import { constants, writeFileSync, unlinkSync } from "fs";
import { readdir, access, unlink, rmdir } from "fs/promises";
import { join, resolve, dirname } from "path";
import { fileURLToPath } from "url";
import EventEmitter from "events";
import test from "ava";
import { inDataDir, fileExists } from "../src/disc.mjs";
import {
extract,
transform,
load,
EXTRACTOR_CODES,
prepareMessages,
tidy,
} from "../src/lifecycle.mjs";
import * as lifecycle from "../src/lifecycle.mjs";
import {
ValidationError,
NotFoundError,
NotImplementedError,
} from "../src/errors.mjs";
const __dirname = dirname(fileURLToPath(import.meta.url));
const mockMessageCommissioner = "mockCommissioner";
const mockMessage = {
type: "https",
version: "0.0.1",
options: {
url: "https://attestate.com",
method: "GET",
},
};
test.serial(
"walk: schedules multiple strategies independent of run-time and interval delay",
async (t) => {
let strategy1Called = false;
let strategy2Called = false;
const strategy1 = {
name: "testStrategy1",
extractor: false,
transformer: false,
loader: false,
coordinator: new Proxy(
{
interval: 2000,
module: "testModule1",
},
{
get: function (target, prop) {
if (prop === "interval") {
strategy1Called = true;
}
return target[prop];
},
}
),
};
const strategy2 = {
name: "testStrategy2",
extractor: false,
transformer: false,
loader: false,
coordinator: new Proxy(
{
interval: 200,
module: "testModule2",
},
{
get: function (target, prop) {
if (prop === "interval") {
strategy2Called = true;
}
return target[prop];
},
}
),
};
const worker = {};
const messageRouter = {};
lifecycle.walk(worker, { path: [strategy1, strategy2] }, messageRouter);
await new Promise((resolve) => setTimeout(resolve, 1000));
t.true(strategy1Called);
t.true(strategy2Called);
}
);
test.serial("run: periodically called", async (t) => {
const strategy = {
name: "testStrategy",
extractor: false,
transformer: false,
loader: false,
coordinator: {
interval: 100,
module: "testModule",
},
};
const worker = {};
const messageRouter = {};
let count = 0;
const reinvocation = async (
strategy,
worker,
messageRouter,
config,
_reinvocation
) => {
count++;
if (count < 5)
return await _reinvocation(
strategy,
worker,
messageRouter,
config,
_reinvocation
);
};
const config = {};
await lifecycle.run(strategy, worker, messageRouter, config, reinvocation);
t.is(count, 5);
});
test.serial("tidy: non-existent file", async (t) => {
process.env.DATA_DIR = resolve("test/fixtures");
const filePath = "non-existent-file.txt";
// Assert file doesn't exist
t.false(await fileExists(inDataDir(filePath)));
// Call tidy with non-existent file
const archive = false;
await t.notThrowsAsync(async () => {
await tidy(filePath, archive);
});
});
test.serial("tidy: file exists and archive is false", async (t) => {
process.env.DATA_DIR = resolve("test/fixtures");
const filePath = "test-file.txt";
const fullFilePath = inDataDir(filePath);
// Create a test file
writeFileSync(fullFilePath, "Hello, world!");
const archive = false;
await tidy(filePath, archive);
t.false(await fileExists(fullFilePath));
try {
unlinkSync(fullFilePath);
} catch (e) {
// Ignore
}
});
test.serial("tidy: file exists and archive is true", async (t) => {
const filePath = "test-file.txt";
const fullFilePath = inDataDir(filePath);
writeFileSync(fullFilePath, "Hello, world!");
const archive = true;
await tidy(filePath, archive);
const files = await readdir(process.env.DATA_DIR);
const archivedFileExists = files.some((file) =>
file.endsWith(`_${filePath}`)
);
t.true(archivedFileExists);
// TODO: This seems to not always delete all files
if (archivedFileExists) {
const archivedFile = files.find((file) => file.endsWith(`_${filePath}`));
unlinkSync(inDataDir(archivedFile));
}
});
test("load function without existent input file", async (t) => {
const name = "abc";
const strategy = {
input: {
name: "non-existent-file",
},
};
await load(name, strategy);
t.pass();
});
test("direct load function", async (t) => {
let count = 0;
t.plan(6);
const dbMock = {
openDB: (name) => {
t.true(name === "test-strategy:direct" || name === "test-strategy:order");
return {
put: async (k, v) => {
t.truthy(v);
if (count === 0) t.deepEqual(k, ["a"]);
if (count === 1) t.deepEqual(k, ["b"]);
count++;
},
};
},
};
const strategy = {
name: "test-strategy",
loader: {
input: {
name: "../fixtures/file1.data",
},
output: {
name: "../fixtures/file1.output",
},
module: {
order: function* () {},
direct: function* ({ state: { line } }) {
const list = JSON.parse(line);
for (let elem of list) {
yield {
key: [elem.primary],
value: elem,
};
}
},
},
},
};
const state = {};
await load(strategy.name, strategy.loader, dbMock, state);
});
test("order load function", async (t) => {
let count = 0;
t.plan(6);
const dbMock = {
openDB: (name) => {
t.true(name === "test-strategy:direct" || name === "test-strategy:order");
return {
put: async (k, v) => {
t.truthy(v);
if (count === 0) t.deepEqual(k, ["a", "c"]);
if (count === 1) t.deepEqual(k, ["b", "a"]);
count++;
},
};
},
};
const strategy = {
name: "test-strategy",
loader: {
input: {
name: "../fixtures/file1.data",
},
output: {
name: "../fixtures/file1.output",
},
module: {
direct: function* () {},
order: function* ({ state: { line } }) {
const list = JSON.parse(line);
for (let elem of list) {
yield {
key: [elem.primary, elem.secondary],
value: elem.primary,
};
}
},
},
},
};
const state = {};
await load(strategy.name, strategy.loader, dbMock, state);
});
test("if function transform gracefully returns when sourceFile doesn't exist", async (t) => {
const strategy = {
name: "test-strategy",
transformer: {
input: {
name: "doesn't exist",
},
module: {
onLine: () => {},
},
},
};
const result = await transform(strategy.name, strategy.transformer);
t.falsy(result);
});
test("reading a file by line using the line reader", async (t) => {
let count = 0;
t.plan(3);
const lineHandlerMock = ({ state }) => {
if (count === 0) t.is(state.line, "line0");
if (count === 1) t.is(state.line, "line1");
count++;
return { write: "hello world", messages: [] };
};
const strategy = {
name: "test-strategy",
transformer: {
module: {
onLine: lineHandlerMock,
onClose: () => {},
},
args: { arg1: "argument1" },
input: {
name: "../fixtures/file0.data",
},
output: {
name: "../fixtures/file0.output",
},
},
};
const state = {};
await transform(strategy.name, strategy.transformer, state);
t.is(count, 2);
try {
await access(inDataDir(strategy.transformer.output.name), constants.R_OK);
} catch (err) {
t.log(err);
t.fail();
} finally {
await unlink(inDataDir(strategy.transformer.output.name));
}
});
test("if extract rejects result if it is invalid", async (t) => {
const mockStrategy = {
module: {
name: "mockMessage",
init: () => {
return false;
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
await t.throwsAsync(async () => {
try {
await extract(
mockStrategy.module.name,
mockStrategy,
worker,
router,
state,
config
);
} catch (e) {
throw e;
}
});
});
test("if extract function can handle bad results from update", async (t) => {
const mockStrategy = {
output: {
name: "output",
},
module: {
name: mockMessageCommissioner,
init: () => {
return {
messages: [mockMessage],
write: null,
};
},
update: () => {
return false;
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
await t.throwsAsync(
async () =>
await extract(
mockStrategy.module.name,
mockStrategy,
worker,
router,
state,
config
)
);
t.is(router.eventNames().length, 0);
});
test("if extract function can handle lifecycle errors", async (t) => {
const mockStrategy = {
name: mockMessageCommissioner,
extractor: {
output: {
name: "name",
},
args: {},
module: {
init: () => {
return {
messages: [{ ...mockMessage, error: "this is an error" }],
write: null,
};
},
update: () => {
t.fail();
},
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
const { code } = await extract(
mockStrategy.name,
mockStrategy.extractor,
worker,
router,
state,
config
);
t.is(code, EXTRACTOR_CODES.SHUTDOWN_IN_UPDATE);
t.is(router.eventNames().length, 0);
});
test("if extract() resolves the promise and removes the listener on no new messages", async (t) => {
const mockStrategy = {
name: mockMessageCommissioner,
extractor: {
output: {
name: "name",
},
args: {},
module: {
init: () => {
return { messages: [mockMessage], write: null };
},
update: () => {
return { messages: [], write: null };
},
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
await extract(
mockStrategy.name,
mockStrategy.extractor,
worker,
router,
state,
config
);
t.deepEqual(router.eventNames(), []);
t.pass();
});
test("if extract() resolves the promise and removes the listener on no message from init", async (t) => {
const mockStrategy = {
name: "a name",
extractor: {
output: {
name: "name",
},
args: {},
module: {
init: () => {
return { messages: [], write: null };
},
update: () => {
return { messages: [], write: null };
},
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
const { code } = await extract(
mockStrategy.name,
mockStrategy.extractor,
worker,
router,
state,
config
);
t.is(code, EXTRACTOR_CODES.SHUTDOWN_IN_INIT);
t.deepEqual(router.eventNames(), []);
t.pass();
});
test("if extract function can write to the correct output name", async (t) => {
const mockStrategy = {
name: "a name",
extractor: {
output: {
name: "../fixtures/file3.output",
},
args: {},
module: {
init: () => {
return { messages: [], write: "some-test-data" };
},
update: () => {
return null;
},
},
},
};
class Worker extends EventEmitter {
postMessage(message) {
return router.emit(`${message.commissioner}-extraction`, message);
}
}
const worker = new Worker();
const router = new EventEmitter();
const state = {};
const config = {
environment: {},
};
await extract(
mockStrategy.name,
mockStrategy.extractor,
worker,
router,
state,
config
);
t.is(await fileExists(inDataDir(mockStrategy.extractor.output.name)), true);
await unlink(inDataDir(mockStrategy.extractor.output.name));
});
test("if prepareMessages filters invalid message and prepare message for worker", async (t) => {
const messages = [mockMessage, {}, { ...mockMessage, type: "invalid-type" }];
const preparedMessages = prepareMessages(messages, mockMessageCommissioner);
t.is(preparedMessages.length, 1);
t.is(preparedMessages[0].commissioner, mockMessageCommissioner);
});
test("if filterValidWorkerMessages throws error on invalid input", async (t) => {
t.throws(() => prepareMessages(null));
});