UNPKG

@discoveryjs/discovery

Version:

Frontend framework for rapid data (JSON) analysis, shareable serverless reports and dashboards

421 lines (420 loc) 13.6 kB
import { Observer } from "../observer.js"; import { normalizeEncodings } from "../encodings/utils.js"; import * as buildinEncodings from "../encodings/index.js"; import { consumeStreamAsTypedArray, defaultStreamTransformers, ProgressTransformer, StreamTransformSelector } from "./load-data-streams.js"; export const dataSource = { stream: loadDataFromStream, event: loadDataFromEvent, file: loadDataFromFile, url: loadDataFromUrl, push: loadDataFromPush }; function isSameOrigin(url) { try { return new URL(url, location.origin).origin === location.origin; } catch { return false; } } function defaultFetchOk(response) { return response.ok; } function defaultFetchContentEncodedSize(response) { return response.headers.get("x-file-encoded-size") || response.headers.get("content-length"); } function defaultFetchContentSize(response) { return response.headers.get("x-file-size") || (isSameOrigin(response.url) && !response.headers.get("content-encoding") ? response.headers.get("content-length") : void 0); } function defaultFetchContentCreatedAt(response) { return response.headers.get("x-file-created-at") || response.headers.get("last-modified") || void 0; } function isDiscoveryCliLegacyDataWrapper(input) { const keys = input ? Object.keys(input) : []; const expectedKeys = ["name", "createdAt", "elapsedTime", "data"]; if (keys.length !== 4 || keys.some((key) => !expectedKeys.includes(key))) { return false; } return true; } function buildDataset(rawData, rawResource, { size, compression, encoding }) { let rawMeta = null; if (isDiscoveryCliLegacyDataWrapper(rawData)) { const { data: data2, ...rawDataMeta } = rawData; rawData = data2; rawResource = { ...rawResource, createdAt: data2.createdAt }; rawMeta = rawDataMeta; } const data = rawData; const meta = rawMeta || {}; const { type, name, encoding: ignore1, size: ignore2, encodedSize, createdAt, ...restResource } = rawResource || {}; const resource = { type: type || "unknown", name: name || "unknown", compression, encoding, ...Number.isFinite(size) ? { size } : null, ...Number.isFinite(encodedSize) ? { encodedSize } : null, createdAt: new Date((typeof createdAt === "string" ? Date.parse(createdAt) : createdAt) || Date.now()), ...restResource }; return { resource, meta, data }; } export async function dataFromStream(stream, extraEncodings, totalSize, setStageProgress) { const streamStartTime = Date.now(); const encodings = [ ...normalizeEncodings(extraEncodings), buildinEncodings.jsonxl, buildinEncodings.json ]; let decodingTime = 0; let compression = false; let encoding = "unknown"; let size = 0; await setStageProgress("receiving", getProgressState(false)); const streamPipeline = stream.pipeThrough(new TransformStream(new ProgressTransformer(setProgress))).pipeThrough(new TransformStream(new StreamTransformSelector(defaultStreamTransformers, (name) => compression = name))); const reader = streamPipeline.getReader(); try { const firstChunk = await reader.read(); const { value, done } = firstChunk; if (done) { throw new Error("Empty payload"); } for (const { name, test, streaming, decode } of encodings) { if (test(value)) { encoding = name; const readerIterator = createReaderIterator(reader, firstChunk); const decodeRequest = streaming ? decode(readerIterator) : consumeStreamAsTypedArray(readerIterator).then(measureDecodingTime(decode)); const data = await decodeRequest; return { data, compression, encoding, size, decodingTime }; } } throw new Error("No matched encoding found for the payload"); } finally { reader.releaseLock(); } function getProgressState(done) { return { done, elapsed: Date.now() - streamStartTime, units: "bytes", completed: size, total: totalSize }; } async function setProgress(done, sizeDelta = 0) { size += sizeDelta; await setStageProgress("receiving", getProgressState(done)); } async function* createReaderIterator(reader2, firstChunk) { while (true) { const { value, done } = firstChunk || await reader2.read(); firstChunk = void 0; if (done) { break; } const startDecodingTime = performance.now(); yield value; decodingTime += performance.now() - startDecodingTime; } } function measureDecodingTime(decode) { return async (payload) => { await setStageProgress("decoding", void 0, encoding); const startDecodingTime = performance.now(); try { return await decode(payload); } finally { decodingTime = performance.now() - startDecodingTime; } }; } } async function loadDataFromStreamInternal(request, loadDataStateTracker) { try { await loadDataStateTracker.asyncSet({ stage: "request" }); const requestStart = /* @__PURE__ */ new Date(); const { method, stream, resource: rawResource, options, data: explicitData } = await request(); const responseStart = /* @__PURE__ */ new Date(); const payloadSize = rawResource?.size; const { encodings } = options || {}; const { data: rawData, compression = false, encoding = "unknown", size = void 0, decodingTime = 0 } = explicitData ? { data: explicitData } : await dataFromStream( stream, encodings, Number(payloadSize) || 0, (stage, progress, step) => loadDataStateTracker.asyncSet({ stage, progress, step }) ); await loadDataStateTracker.asyncSet({ stage: "received" }); const { data, resource, meta } = buildDataset(rawData, rawResource, { size, compression, encoding }); const finishedTime = /* @__PURE__ */ new Date(); const time = Number(finishedTime) - Number(requestStart); const roundedDecodingTime = Math.round(decodingTime || 0); return { loadMethod: method, resource, meta, data, timings: { time, start: requestStart, end: finishedTime, loadingTime: time - roundedDecodingTime, decodingTime: roundedDecodingTime, requestTime: Number(responseStart) - Number(requestStart), requestStart, requestEnd: responseStart, responseTime: Number(finishedTime) - Number(responseStart), responseStart, responseEnd: finishedTime } }; } catch (error) { console.error("[Discovery] Error loading data:", error); await loadDataStateTracker.asyncSet({ stage: "error", error }); throw error; } } export function createLoadDataState(datasetFactory, extra) { const state = new Observer({ stage: "inited" }); return { state, // encapsulate logic into separate function since it's async, // but we need to return observer for progress tracking purposes dataset: datasetFactory(state), ...extra }; } export function createDatasetFactoryFromStreamRequest(request) { return (state) => loadDataFromStreamInternal(request, state); } export function loadDataFromStream(stream, options) { return createLoadDataState( createDatasetFactoryFromStreamRequest(() => ({ method: "stream", stream, resource: options?.resource, options })) ); } export function loadDataFromFile(file, options) { const resource = extractResourceMetadata(file); return createLoadDataState( createDatasetFactoryFromStreamRequest(() => ({ method: "file", stream: file.stream(), resource: options?.resource || resource, // options.resource takes precedence over an extracted resource options })), { title: "Load data from file: " + (resource?.name || "unknown") } ); } export function loadDataFromEvent(event, options) { const source = event.dataTransfer || event.clipboardData || event.target; const file = source?.files?.[0]; event.stopPropagation(); event.preventDefault(); if (!file) { throw new Error("Can't extract a file from an event object"); } return loadDataFromFile(file, options); } export function loadDataFromUrl(url, options) { options = options || {}; return createLoadDataState( createDatasetFactoryFromStreamRequest(async () => { const response = await fetch(url, options.fetch); const resource = extractResourceMetadata(response, options); if (resource && response.body) { return { method: "fetch", stream: response.body, resource: options.resource || resource, // options.resource takes precedence over an extracted resource options }; } const contentType = response.headers.get("content-type") || ""; let error = await response.text(); if (contentType.toLowerCase().startsWith("application/json")) { try { const json = JSON.parse(error); error = json.error || json; } catch { } } error = new Error(error); error.isFetchError = true; error.status = response.status; error.statusText = response.statusText; error.stack = null; throw error; }), { title: `Load data from url: ${url}` } ); } export function loadDataFromPush(options) { let controller; const stream = new ReadableStream({ start(controller_) { controller = controller_; }, cancel() { controller = null; } }); let resolveRequest; let pushResource = null; const request = new Promise((resolve) => { resolveRequest = (resource) => resolve({ method: "push", stream, resource: resource ? pushResource = resource : options?.resource, // resource takes precedence over options.resource options }); }); options = options || {}; return createLoadDataState( createDatasetFactoryFromStreamRequest(() => request), { start(resource) { resolveRequest(resource); }, push(chunk) { resolveRequest(); controller?.enqueue(chunk); }, // error(error) { // rejectRequest(error); // }, finish(encodedSize) { controller?.close(); controller = null; if (encodedSize !== void 0 && isFinite(encodedSize) && pushResource !== null) { pushResource.encodedSize = Number(encodedSize); } pushResource = null; } } ); } export function syncLoaderWithProgressbar({ dataset, state }, progressbar) { return new Promise( (resolve, reject) => state.subscribeSync(async (loadDataState, unsubscribe) => { if (loadDataState.stage === "error") { unsubscribe(); reject(loadDataState.error); return; } const { stage, progress, step } = loadDataState; await progressbar.setState({ stage, progress }, step); if (stage === "received") { unsubscribe(); resolve(dataset); } }) ); } export function extractResourceMetadata(source, options) { if (source instanceof Response) { const isResponseOk = options?.isResponseOk || defaultFetchOk; const getContentSize = options?.getContentSize || defaultFetchContentSize; const getContentEncodedSize = options?.getContentEncodedSize || defaultFetchContentEncodedSize; const getContentCreatedAt = options?.getContentCreatedAt || defaultFetchContentCreatedAt; if (isResponseOk(source)) { return { type: "url", name: source.url, size: Number(getContentSize(source)) || null, encodedSize: Number(getContentEncodedSize(source)), createdAt: getContentCreatedAt(source) }; } } if (source instanceof File) { return { type: "file", name: source.name, size: source.size, createdAt: source.lastModified }; } if (source instanceof Blob) { return { size: source.size }; } if (ArrayBuffer.isView(source)) { return { size: source.byteLength }; } if (typeof source === "string") { return { size: source.length }; } } export function convertToBlobIfPossible(source) { let blobParts = source; if (typeof blobParts === "string" || ArrayBuffer.isView(blobParts) || blobParts instanceof ArrayBuffer || blobParts && Symbol.iterator in blobParts === false && Symbol.asyncIterator in blobParts === false) { blobParts = [blobParts]; } if (blobParts && Symbol.iterator in blobParts) { return new Blob(blobParts); } return source; } export function getReadableStreamFromSource(source) { if (source instanceof ReadableStream) { return source; } if (source instanceof Response) { if (source.body === null) { throw new Error("Response has no body"); } return source.body; } source = convertToBlobIfPossible(source); if (source instanceof Blob) { return source.stream(); } return new ReadableStream({ start() { const generator = source !== null && typeof source === "object" && Symbol.asyncIterator in source ? source[Symbol.asyncIterator] : void 0; if (typeof generator !== "function") { throw new Error("Bad value type (can't convert to a stream)"); } this.iterator = generator(); }, async pull(controller) { const { value, done } = await this.iterator.next(); if (done) { this.iterator = null; controller.close(); } else { controller.enqueue(value); } }, cancel() { this.iterator = null; } }); }