@discoveryjs/discovery
Version:
Frontend framework for rapid data (JSON) analysis, shareable serverless reports and dashboards
421 lines (420 loc) • 13.6 kB
JavaScript
import { Observer } from "../observer.js";
import { normalizeEncodings } from "../encodings/utils.js";
import * as buildinEncodings from "../encodings/index.js";
import { consumeStreamAsTypedArray, defaultStreamTransformers, ProgressTransformer, StreamTransformSelector } from "./load-data-streams.js";
export const dataSource = {
stream: loadDataFromStream,
event: loadDataFromEvent,
file: loadDataFromFile,
url: loadDataFromUrl,
push: loadDataFromPush
};
function isSameOrigin(url) {
try {
return new URL(url, location.origin).origin === location.origin;
} catch {
return false;
}
}
function defaultFetchOk(response) {
return response.ok;
}
function defaultFetchContentEncodedSize(response) {
return response.headers.get("x-file-encoded-size") || response.headers.get("content-length");
}
function defaultFetchContentSize(response) {
return response.headers.get("x-file-size") || (isSameOrigin(response.url) && !response.headers.get("content-encoding") ? response.headers.get("content-length") : void 0);
}
function defaultFetchContentCreatedAt(response) {
return response.headers.get("x-file-created-at") || response.headers.get("last-modified") || void 0;
}
function isDiscoveryCliLegacyDataWrapper(input) {
const keys = input ? Object.keys(input) : [];
const expectedKeys = ["name", "createdAt", "elapsedTime", "data"];
if (keys.length !== 4 || keys.some((key) => !expectedKeys.includes(key))) {
return false;
}
return true;
}
function buildDataset(rawData, rawResource, { size, compression, encoding }) {
let rawMeta = null;
if (isDiscoveryCliLegacyDataWrapper(rawData)) {
const { data: data2, ...rawDataMeta } = rawData;
rawData = data2;
rawResource = { ...rawResource, createdAt: data2.createdAt };
rawMeta = rawDataMeta;
}
const data = rawData;
const meta = rawMeta || {};
const { type, name, encoding: ignore1, size: ignore2, encodedSize, createdAt, ...restResource } = rawResource || {};
const resource = {
type: type || "unknown",
name: name || "unknown",
compression,
encoding,
...Number.isFinite(size) ? { size } : null,
...Number.isFinite(encodedSize) ? { encodedSize } : null,
createdAt: new Date((typeof createdAt === "string" ? Date.parse(createdAt) : createdAt) || Date.now()),
...restResource
};
return {
resource,
meta,
data
};
}
export async function dataFromStream(stream, extraEncodings, totalSize, setStageProgress) {
const streamStartTime = Date.now();
const encodings = [
...normalizeEncodings(extraEncodings),
buildinEncodings.jsonxl,
buildinEncodings.json
];
let decodingTime = 0;
let compression = false;
let encoding = "unknown";
let size = 0;
await setStageProgress("receiving", getProgressState(false));
const streamPipeline = stream.pipeThrough(new TransformStream(new ProgressTransformer(setProgress))).pipeThrough(new TransformStream(new StreamTransformSelector(defaultStreamTransformers, (name) => compression = name)));
const reader = streamPipeline.getReader();
try {
const firstChunk = await reader.read();
const { value, done } = firstChunk;
if (done) {
throw new Error("Empty payload");
}
for (const { name, test, streaming, decode } of encodings) {
if (test(value)) {
encoding = name;
const readerIterator = createReaderIterator(reader, firstChunk);
const decodeRequest = streaming ? decode(readerIterator) : consumeStreamAsTypedArray(readerIterator).then(measureDecodingTime(decode));
const data = await decodeRequest;
return { data, compression, encoding, size, decodingTime };
}
}
throw new Error("No matched encoding found for the payload");
} finally {
reader.releaseLock();
}
function getProgressState(done) {
return {
done,
elapsed: Date.now() - streamStartTime,
units: "bytes",
completed: size,
total: totalSize
};
}
async function setProgress(done, sizeDelta = 0) {
size += sizeDelta;
await setStageProgress("receiving", getProgressState(done));
}
async function* createReaderIterator(reader2, firstChunk) {
while (true) {
const { value, done } = firstChunk || await reader2.read();
firstChunk = void 0;
if (done) {
break;
}
const startDecodingTime = performance.now();
yield value;
decodingTime += performance.now() - startDecodingTime;
}
}
function measureDecodingTime(decode) {
return async (payload) => {
await setStageProgress("decoding", void 0, encoding);
const startDecodingTime = performance.now();
try {
return await decode(payload);
} finally {
decodingTime = performance.now() - startDecodingTime;
}
};
}
}
async function loadDataFromStreamInternal(request, loadDataStateTracker) {
try {
await loadDataStateTracker.asyncSet({ stage: "request" });
const requestStart = /* @__PURE__ */ new Date();
const {
method,
stream,
resource: rawResource,
options,
data: explicitData
} = await request();
const responseStart = /* @__PURE__ */ new Date();
const payloadSize = rawResource?.size;
const { encodings } = options || {};
const {
data: rawData,
compression = false,
encoding = "unknown",
size = void 0,
decodingTime = 0
} = explicitData ? { data: explicitData } : await dataFromStream(
stream,
encodings,
Number(payloadSize) || 0,
(stage, progress, step) => loadDataStateTracker.asyncSet({ stage, progress, step })
);
await loadDataStateTracker.asyncSet({ stage: "received" });
const { data, resource, meta } = buildDataset(rawData, rawResource, { size, compression, encoding });
const finishedTime = /* @__PURE__ */ new Date();
const time = Number(finishedTime) - Number(requestStart);
const roundedDecodingTime = Math.round(decodingTime || 0);
return {
loadMethod: method,
resource,
meta,
data,
timings: {
time,
start: requestStart,
end: finishedTime,
loadingTime: time - roundedDecodingTime,
decodingTime: roundedDecodingTime,
requestTime: Number(responseStart) - Number(requestStart),
requestStart,
requestEnd: responseStart,
responseTime: Number(finishedTime) - Number(responseStart),
responseStart,
responseEnd: finishedTime
}
};
} catch (error) {
console.error("[Discovery] Error loading data:", error);
await loadDataStateTracker.asyncSet({ stage: "error", error });
throw error;
}
}
export function createLoadDataState(datasetFactory, extra) {
const state = new Observer({ stage: "inited" });
return {
state,
// encapsulate logic into separate function since it's async,
// but we need to return observer for progress tracking purposes
dataset: datasetFactory(state),
...extra
};
}
export function createDatasetFactoryFromStreamRequest(request) {
return (state) => loadDataFromStreamInternal(request, state);
}
export function loadDataFromStream(stream, options) {
return createLoadDataState(
createDatasetFactoryFromStreamRequest(() => ({
method: "stream",
stream,
resource: options?.resource,
options
}))
);
}
export function loadDataFromFile(file, options) {
const resource = extractResourceMetadata(file);
return createLoadDataState(
createDatasetFactoryFromStreamRequest(() => ({
method: "file",
stream: file.stream(),
resource: options?.resource || resource,
// options.resource takes precedence over an extracted resource
options
})),
{ title: "Load data from file: " + (resource?.name || "unknown") }
);
}
export function loadDataFromEvent(event, options) {
const source = event.dataTransfer || event.clipboardData || event.target;
const file = source?.files?.[0];
event.stopPropagation();
event.preventDefault();
if (!file) {
throw new Error("Can't extract a file from an event object");
}
return loadDataFromFile(file, options);
}
export function loadDataFromUrl(url, options) {
options = options || {};
return createLoadDataState(
createDatasetFactoryFromStreamRequest(async () => {
const response = await fetch(url, options.fetch);
const resource = extractResourceMetadata(response, options);
if (resource && response.body) {
return {
method: "fetch",
stream: response.body,
resource: options.resource || resource,
// options.resource takes precedence over an extracted resource
options
};
}
const contentType = response.headers.get("content-type") || "";
let error = await response.text();
if (contentType.toLowerCase().startsWith("application/json")) {
try {
const json = JSON.parse(error);
error = json.error || json;
} catch {
}
}
error = new Error(error);
error.isFetchError = true;
error.status = response.status;
error.statusText = response.statusText;
error.stack = null;
throw error;
}),
{ title: `Load data from url: ${url}` }
);
}
export function loadDataFromPush(options) {
let controller;
const stream = new ReadableStream({
start(controller_) {
controller = controller_;
},
cancel() {
controller = null;
}
});
let resolveRequest;
let pushResource = null;
const request = new Promise((resolve) => {
resolveRequest = (resource) => resolve({
method: "push",
stream,
resource: resource ? pushResource = resource : options?.resource,
// resource takes precedence over options.resource
options
});
});
options = options || {};
return createLoadDataState(
createDatasetFactoryFromStreamRequest(() => request),
{
start(resource) {
resolveRequest(resource);
},
push(chunk) {
resolveRequest();
controller?.enqueue(chunk);
},
// error(error) {
// rejectRequest(error);
// },
finish(encodedSize) {
controller?.close();
controller = null;
if (encodedSize !== void 0 && isFinite(encodedSize) && pushResource !== null) {
pushResource.encodedSize = Number(encodedSize);
}
pushResource = null;
}
}
);
}
export function syncLoaderWithProgressbar({ dataset, state }, progressbar) {
return new Promise(
(resolve, reject) => state.subscribeSync(async (loadDataState, unsubscribe) => {
if (loadDataState.stage === "error") {
unsubscribe();
reject(loadDataState.error);
return;
}
const { stage, progress, step } = loadDataState;
await progressbar.setState({ stage, progress }, step);
if (stage === "received") {
unsubscribe();
resolve(dataset);
}
})
);
}
export function extractResourceMetadata(source, options) {
if (source instanceof Response) {
const isResponseOk = options?.isResponseOk || defaultFetchOk;
const getContentSize = options?.getContentSize || defaultFetchContentSize;
const getContentEncodedSize = options?.getContentEncodedSize || defaultFetchContentEncodedSize;
const getContentCreatedAt = options?.getContentCreatedAt || defaultFetchContentCreatedAt;
if (isResponseOk(source)) {
return {
type: "url",
name: source.url,
size: Number(getContentSize(source)) || null,
encodedSize: Number(getContentEncodedSize(source)),
createdAt: getContentCreatedAt(source)
};
}
}
if (source instanceof File) {
return {
type: "file",
name: source.name,
size: source.size,
createdAt: source.lastModified
};
}
if (source instanceof Blob) {
return {
size: source.size
};
}
if (ArrayBuffer.isView(source)) {
return {
size: source.byteLength
};
}
if (typeof source === "string") {
return {
size: source.length
};
}
}
export function convertToBlobIfPossible(source) {
let blobParts = source;
if (typeof blobParts === "string" || ArrayBuffer.isView(blobParts) || blobParts instanceof ArrayBuffer || blobParts && Symbol.iterator in blobParts === false && Symbol.asyncIterator in blobParts === false) {
blobParts = [blobParts];
}
if (blobParts && Symbol.iterator in blobParts) {
return new Blob(blobParts);
}
return source;
}
export function getReadableStreamFromSource(source) {
if (source instanceof ReadableStream) {
return source;
}
if (source instanceof Response) {
if (source.body === null) {
throw new Error("Response has no body");
}
return source.body;
}
source = convertToBlobIfPossible(source);
if (source instanceof Blob) {
return source.stream();
}
return new ReadableStream({
start() {
const generator = source !== null && typeof source === "object" && Symbol.asyncIterator in source ? source[Symbol.asyncIterator] : void 0;
if (typeof generator !== "function") {
throw new Error("Bad value type (can't convert to a stream)");
}
this.iterator = generator();
},
async pull(controller) {
const { value, done } = await this.iterator.next();
if (done) {
this.iterator = null;
controller.close();
} else {
controller.enqueue(value);
}
},
cancel() {
this.iterator = null;
}
});
}