UNPKG

@puls-atlas/cli

Version:

The Puls Atlas CLI tool for managing Atlas projects

205 lines 5.65 kB
import { isFunction } from 'es-toolkit/compat'; const isTimestampLike = value => value && typeof value === 'object' && (isFunction(value.toDate) || isFunction(value.toMillis)); const inferArrayType = value => { if (!Array.isArray(value)) { return null; } if (value.length === 0) { return 'array<unknown>'; } const elementTypes = [...new Set(value.map(resolveValueType))]; if (elementTypes.length === 1) { return `array<${elementTypes[0]}>`; } return 'array<mixed>'; }; const resolveValueType = value => { if (value === null || value === undefined) { return 'null'; } if (Array.isArray(value)) { return inferArrayType(value); } if (isTimestampLike(value) || value instanceof Date) { return 'timestamp'; } switch (typeof value) { case 'boolean': return 'boolean'; case 'number': return Number.isInteger(value) ? 'integer' : 'float'; case 'string': return 'string'; case 'object': return 'object'; default: return 'unknown'; } }; const normalizeDocumentPayload = document => { if (!document || typeof document !== 'object') { return null; } if (document.data && typeof document.data === 'object' && !Array.isArray(document.data)) { return document.data; } if (document.id && document.data === undefined) { return null; } return Array.isArray(document) ? null : document; }; const normalizeEntries = fieldState => [...fieldState.entries()].map(([name, state]) => ({ name, optional: state.presentCount < state.totalDocumentCount, sampleCount: state.presentCount, types: [...state.types].sort((left, right) => left.localeCompare(right)) })).sort((left, right) => left.name.localeCompare(right.name)); export const inferSchemaFromDocuments = documents => { if (!Array.isArray(documents) || documents.length === 0) { return []; } const payloads = documents.map(normalizeDocumentPayload).filter(payload => payload && typeof payload === 'object'); if (payloads.length === 0) { return []; } const fieldState = new Map(); for (const payload of payloads) { for (const [fieldName, value] of Object.entries(payload)) { if (!fieldState.has(fieldName)) { fieldState.set(fieldName, { presentCount: 0, totalDocumentCount: payloads.length, types: new Set() }); } const state = fieldState.get(fieldName); state.presentCount += 1; state.types.add(resolveValueType(value)); } } return normalizeEntries(fieldState); }; const resolveSearchFieldType = types => { const normalizedTypes = new Set(types.filter(typeName => typeName !== 'null')); if (normalizedTypes.size === 0) { return 'auto'; } if (normalizedTypes.size === 1) { const [singleType] = normalizedTypes; switch (singleType) { case 'string': return 'string'; case 'integer': case 'timestamp': return 'int64'; case 'float': return 'float'; case 'boolean': return 'bool'; case 'object': return 'object'; case 'array<object>': return 'object[]'; case 'array<string>': return 'string[]'; default: return 'auto'; } } if (normalizedTypes.size === 2 && normalizedTypes.has('integer') && normalizedTypes.has('float')) { return 'float'; } return 'auto'; }; export const mapInferredSchemaToSearchFields = inferredSchema => inferredSchema.map(entry => ({ name: entry.name, optional: entry.optional || entry.types.includes('null'), type: resolveSearchFieldType(entry.types) })); const resolveBigQueryFieldType = types => { const normalizedTypes = new Set(types.filter(typeName => typeName !== 'null')); if (normalizedTypes.size === 0) { return { mode: 'NULLABLE', type: 'STRING' }; } if (normalizedTypes.size === 1) { const [singleType] = normalizedTypes; switch (singleType) { case 'string': return { mode: 'NULLABLE', type: 'STRING' }; case 'integer': return { mode: 'NULLABLE', type: 'INT64' }; case 'float': return { mode: 'NULLABLE', type: 'FLOAT64' }; case 'boolean': return { mode: 'NULLABLE', type: 'BOOL' }; case 'timestamp': return { mode: 'NULLABLE', type: 'TIMESTAMP' }; case 'object': return { mode: 'NULLABLE', type: 'JSON' }; case 'array<string>': return { mode: 'REPEATED', type: 'STRING' }; case 'array<integer>': return { mode: 'REPEATED', type: 'INT64' }; case 'array<float>': return { mode: 'REPEATED', type: 'FLOAT64' }; case 'array<boolean>': return { mode: 'REPEATED', type: 'BOOL' }; default: return { mode: 'NULLABLE', type: 'STRING' }; } } if (normalizedTypes.size === 2 && normalizedTypes.has('integer') && normalizedTypes.has('float')) { return { mode: 'NULLABLE', type: 'FLOAT64' }; } return { mode: 'NULLABLE', type: 'STRING' }; }; export const mapInferredSchemaToBigQueryFields = inferredSchema => inferredSchema.map(entry => { const fieldType = resolveBigQueryFieldType(entry.types); return { mode: entry.optional && fieldType.mode !== 'REPEATED' ? 'NULLABLE' : fieldType.mode, name: entry.name, type: fieldType.type }; });