@puls-atlas/cli
Version:
The Puls Atlas CLI tool for managing Atlas projects
205 lines • 5.65 kB
JavaScript
import { isFunction } from 'es-toolkit/compat';
const isTimestampLike = value => value && typeof value === 'object' && (isFunction(value.toDate) || isFunction(value.toMillis));
const inferArrayType = value => {
if (!Array.isArray(value)) {
return null;
}
if (value.length === 0) {
return 'array<unknown>';
}
const elementTypes = [...new Set(value.map(resolveValueType))];
if (elementTypes.length === 1) {
return `array<${elementTypes[0]}>`;
}
return 'array<mixed>';
};
const resolveValueType = value => {
if (value === null || value === undefined) {
return 'null';
}
if (Array.isArray(value)) {
return inferArrayType(value);
}
if (isTimestampLike(value) || value instanceof Date) {
return 'timestamp';
}
switch (typeof value) {
case 'boolean':
return 'boolean';
case 'number':
return Number.isInteger(value) ? 'integer' : 'float';
case 'string':
return 'string';
case 'object':
return 'object';
default:
return 'unknown';
}
};
const normalizeDocumentPayload = document => {
if (!document || typeof document !== 'object') {
return null;
}
if (document.data && typeof document.data === 'object' && !Array.isArray(document.data)) {
return document.data;
}
if (document.id && document.data === undefined) {
return null;
}
return Array.isArray(document) ? null : document;
};
const normalizeEntries = fieldState => [...fieldState.entries()].map(([name, state]) => ({
name,
optional: state.presentCount < state.totalDocumentCount,
sampleCount: state.presentCount,
types: [...state.types].sort((left, right) => left.localeCompare(right))
})).sort((left, right) => left.name.localeCompare(right.name));
export const inferSchemaFromDocuments = documents => {
if (!Array.isArray(documents) || documents.length === 0) {
return [];
}
const payloads = documents.map(normalizeDocumentPayload).filter(payload => payload && typeof payload === 'object');
if (payloads.length === 0) {
return [];
}
const fieldState = new Map();
for (const payload of payloads) {
for (const [fieldName, value] of Object.entries(payload)) {
if (!fieldState.has(fieldName)) {
fieldState.set(fieldName, {
presentCount: 0,
totalDocumentCount: payloads.length,
types: new Set()
});
}
const state = fieldState.get(fieldName);
state.presentCount += 1;
state.types.add(resolveValueType(value));
}
}
return normalizeEntries(fieldState);
};
const resolveSearchFieldType = types => {
const normalizedTypes = new Set(types.filter(typeName => typeName !== 'null'));
if (normalizedTypes.size === 0) {
return 'auto';
}
if (normalizedTypes.size === 1) {
const [singleType] = normalizedTypes;
switch (singleType) {
case 'string':
return 'string';
case 'integer':
case 'timestamp':
return 'int64';
case 'float':
return 'float';
case 'boolean':
return 'bool';
case 'object':
return 'object';
case 'array<object>':
return 'object[]';
case 'array<string>':
return 'string[]';
default:
return 'auto';
}
}
if (normalizedTypes.size === 2 && normalizedTypes.has('integer') && normalizedTypes.has('float')) {
return 'float';
}
return 'auto';
};
export const mapInferredSchemaToSearchFields = inferredSchema => inferredSchema.map(entry => ({
name: entry.name,
optional: entry.optional || entry.types.includes('null'),
type: resolveSearchFieldType(entry.types)
}));
const resolveBigQueryFieldType = types => {
const normalizedTypes = new Set(types.filter(typeName => typeName !== 'null'));
if (normalizedTypes.size === 0) {
return {
mode: 'NULLABLE',
type: 'STRING'
};
}
if (normalizedTypes.size === 1) {
const [singleType] = normalizedTypes;
switch (singleType) {
case 'string':
return {
mode: 'NULLABLE',
type: 'STRING'
};
case 'integer':
return {
mode: 'NULLABLE',
type: 'INT64'
};
case 'float':
return {
mode: 'NULLABLE',
type: 'FLOAT64'
};
case 'boolean':
return {
mode: 'NULLABLE',
type: 'BOOL'
};
case 'timestamp':
return {
mode: 'NULLABLE',
type: 'TIMESTAMP'
};
case 'object':
return {
mode: 'NULLABLE',
type: 'JSON'
};
case 'array<string>':
return {
mode: 'REPEATED',
type: 'STRING'
};
case 'array<integer>':
return {
mode: 'REPEATED',
type: 'INT64'
};
case 'array<float>':
return {
mode: 'REPEATED',
type: 'FLOAT64'
};
case 'array<boolean>':
return {
mode: 'REPEATED',
type: 'BOOL'
};
default:
return {
mode: 'NULLABLE',
type: 'STRING'
};
}
}
if (normalizedTypes.size === 2 && normalizedTypes.has('integer') && normalizedTypes.has('float')) {
return {
mode: 'NULLABLE',
type: 'FLOAT64'
};
}
return {
mode: 'NULLABLE',
type: 'STRING'
};
};
export const mapInferredSchemaToBigQueryFields = inferredSchema => inferredSchema.map(entry => {
const fieldType = resolveBigQueryFieldType(entry.types);
return {
mode: entry.optional && fieldType.mode !== 'REPEATED' ? 'NULLABLE' : fieldType.mode,
name: entry.name,
type: fieldType.type
};
});