UNPKG

@jackchuka/gql-ingest

Version:

A CLI tool for ingesting data from CSV files into a GraphQL API

490 lines (428 loc) 15.2 kB
import fs from "fs"; import path from "path"; import { parse, DocumentNode, VariableDefinitionNode } from "graphql"; import { DataReaderFactory, DataRow } from "./readers"; import { GraphQLClientWrapper } from "./graphql-client"; import { MetricsCollector } from "./metrics"; import { ParallelProcessingConfig, RetryConfig } from "./config"; export interface MappingConfig { // Legacy CSV support csvFile?: string; // New flexible data file support dataFile?: string; dataFormat?: string; graphqlFile: string; mapping: Record<string, string | any>; } export class DataMapper { private client: GraphQLClientWrapper; private basePath: string; private metrics: MetricsCollector; private verbose: boolean; private formatOverride?: string; constructor( client: GraphQLClientWrapper, basePath: string = process.cwd(), metrics?: MetricsCollector, verbose: boolean = false, formatOverride?: string ) { this.client = client; this.basePath = basePath; this.metrics = metrics || new MetricsCollector(); this.verbose = verbose; this.formatOverride = formatOverride; } discoverMappings(configDir: string, entityFilter?: string[]): string[] { const mappingsPath = path.resolve(this.basePath, configDir, "mappings"); try { const files = fs.readdirSync(mappingsPath); let jsonFiles = files.filter((file) => file.endsWith(".json")); // Apply entity filter if provided if (entityFilter && entityFilter.length > 0) { const requestedEntities = new Set(entityFilter); const foundEntities = new Set<string>(); jsonFiles = jsonFiles.filter((file) => { const entityName = path.basename(file, ".json"); if (requestedEntities.has(entityName)) { foundEntities.add(entityName); return true; } return false; }); // Check for requested entities that were not found const notFound = entityFilter.filter((e) => !foundEntities.has(e)); if (notFound.length > 0) { console.warn( `Warning: The following entities were not found in mappings: ${notFound.join( ", " )}` ); } } jsonFiles.sort(); // Alphabetical order for consistent processing console.log( `Discovered ${jsonFiles.length} mapping files: ${jsonFiles.join(", ")}` ); return jsonFiles.map((file) => path.join(configDir, "mappings", file)); } catch (error) { console.error(`Error reading mappings directory ${mappingsPath}:`, error); return []; } } async processEntity( configPath: string, parallelConfig?: ParallelProcessingConfig, retryConfig?: RetryConfig ): Promise<void> { const entityName = path.basename(configPath, ".json"); console.log(`Processing entity: ${configPath}`); this.metrics.startEntityProcessing(entityName); // Read mapping configuration const configFullPath = path.resolve(this.basePath, configPath); const config: MappingConfig = JSON.parse( fs.readFileSync(configFullPath, "utf8") ); // Extract config directory (parent of mappings directory) const configDir = path.dirname(path.dirname(configFullPath)); // Determine data file path (support both legacy csvFile and new dataFile) const dataFile = config.dataFile || config.csvFile; if (!dataFile) { throw new Error( `No data file specified in mapping config: ${configPath}` ); } const dataPath = path.resolve(configDir, dataFile); // Get appropriate reader (prioritize CLI format override, then config format) const format = this.formatOverride || config.dataFormat; const reader = DataReaderFactory.getReader(dataPath, format); const data = await reader.readFile(dataPath); // Read GraphQL mutation (relative to config directory) const graphqlPath = path.resolve(configDir, config.graphqlFile); const mutation = fs.readFileSync(graphqlPath, "utf8"); // Process rows with optional parallelization if (parallelConfig && parallelConfig.concurrency > 1) { await this.processRowsConcurrently( data, mutation, config.mapping, entityName, parallelConfig, retryConfig ); } else { await this.processRowsSequentially( data, mutation, config.mapping, entityName, retryConfig ); } this.metrics.finishEntityProcessing(entityName); } private async processRowsSequentially( data: DataRow[], mutation: string, mapping: Record<string, string | any>, entityName: string, retryConfig?: RetryConfig ): Promise<void> { const totalRows = data.length; const variableTypes = this.extractVariableTypes(mutation); for (let i = 0; i < data.length; i++) { const row = data[i]; const variables = this.mapRowToVariables(row, mapping, variableTypes); try { await this.client.executeMutation(mutation, variables, retryConfig); this.metrics.recordSuccess(entityName); // Show progress every 10% or at the end (only in non-verbose mode) if ( !this.verbose && ((i + 1) % Math.max(1, Math.floor(totalRows / 10)) === 0 || i === totalRows - 1) ) { const progress = (((i + 1) / totalRows) * 100).toFixed(1); console.log(`📊 Progress: ${i + 1}/${totalRows} (${progress}%) ✓`); } } catch (error) { this.metrics.recordFailure(entityName); if (!this.verbose) { console.error( `✗ Failed to create entity for row ${i + 1}:`, row, error ); } } } } private async processRowsConcurrently( data: DataRow[], mutation: string, mapping: Record<string, string | any>, entityName: string, parallelConfig: ParallelProcessingConfig, retryConfig?: RetryConfig ): Promise<void> { const concurrency = parallelConfig.concurrency; console.log( `Processing ${data.length} rows with concurrency: ${concurrency}` ); // Extract variable types once for all rows const variableTypes = this.extractVariableTypes(mutation); // Split data into chunks for concurrent processing const chunks = this.chunkArray(data, concurrency); let processedCount = 0; const totalRows = data.length; for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { const chunk = chunks[chunkIndex]; const promises = chunk.map(async (row) => { const variables = this.mapRowToVariables(row, mapping, variableTypes); try { const result = await this.client.executeMutation( mutation, variables, retryConfig ); this.metrics.recordSuccess(entityName); return { success: true, result, row }; } catch (error) { this.metrics.recordFailure(entityName); return { success: false, error, row }; } }); const results = await Promise.allSettled(promises); processedCount += chunk.length; // Count successes and failures in this chunk let chunkSuccesses = 0; let chunkFailures = 0; results.forEach((result) => { if (result.status === "fulfilled") { const { success, error, row } = result.value; if (success) { chunkSuccesses++; } else { chunkFailures++; if (!this.verbose) { console.error(`✗ Failed to create entity for row:`, row, error); } } } else { chunkFailures++; if (!this.verbose) { console.error(`✗ Promise rejected:`, result.reason); } } }); // Show progress update (only in non-verbose mode) if (!this.verbose) { const progress = ((processedCount / totalRows) * 100).toFixed(1); console.log( `📊 Progress: ${processedCount}/${totalRows} (${progress}%) - Chunk ${ chunkIndex + 1 }: ${chunkSuccesses} ✓, ${chunkFailures} ✗` ); } } } private chunkArray<T>(array: T[], chunkSize: number): T[][] { const chunks: T[][] = []; for (let i = 0; i < array.length; i += chunkSize) { chunks.push(array.slice(i, i + chunkSize)); } return chunks; } private mapRowToVariables( row: DataRow, mapping: Record<string, string | any>, variableTypes: Record<string, string> ): Record<string, any> { const variables: Record<string, any> = {}; for (const [graphqlVar, mappingValue] of Object.entries(mapping)) { // Handle direct mapping for nested data (e.g., "input": "$") if (mappingValue === "$") { // Use the entire row as the variable value variables[graphqlVar] = row; } // Handle path-based mapping for nested data (e.g., "input.name": "$.product.name") else if ( typeof mappingValue === "string" && mappingValue.startsWith("$.") ) { const path = mappingValue.substring(2); // Remove '$.' const value = this.getValueByPath(row, path); if (value !== undefined) { const type = variableTypes[graphqlVar]; variables[graphqlVar] = this.convertValue(value, type, graphqlVar); } } // Handle traditional flat mapping (e.g., "name": "product_name") else if ( typeof mappingValue === "string" && row[mappingValue] !== undefined ) { const rawValue = row[mappingValue]; const type = variableTypes[graphqlVar]; variables[graphqlVar] = this.convertValue(rawValue, type, graphqlVar); } // Handle complex mapping object else if (typeof mappingValue === "object" && mappingValue !== null) { variables[graphqlVar] = this.mapNestedObject( row, mappingValue, variableTypes ); } } return variables; } private getValueByPath(obj: any, path: string): any { const parts = path.split("."); let current = obj; for (const part of parts) { if (current && typeof current === "object" && part in current) { current = current[part]; } else { return undefined; } } return current; } private mapNestedObject( row: DataRow, mappingObj: any, variableTypes: Record<string, string> ): any { if (Array.isArray(mappingObj)) { return mappingObj.map((item) => this.mapNestedObject(row, item, variableTypes) ); } if (typeof mappingObj === "object" && mappingObj !== null) { const result: any = {}; for (const [key, value] of Object.entries(mappingObj)) { if (typeof value === "string" && value.startsWith("$.")) { const path = value.substring(2); let fieldValue = this.getValueByPath(row, path); // Handle special case for array fields (e.g., comma-separated values) if ( key === "values" && typeof fieldValue === "string" && fieldValue.includes(",") ) { fieldValue = fieldValue.split(",").map((v) => v.trim()); } result[key] = fieldValue; } else if (typeof value === "string" && row[value] !== undefined) { result[key] = row[value]; } else if (typeof value === "object") { result[key] = this.mapNestedObject(row, value, variableTypes); } else { result[key] = value; } } return result; } return mappingObj; } private extractVariableTypes(mutation: string): Record<string, string> { const types: Record<string, string> = {}; try { const document: DocumentNode = parse(mutation); // Find the operation (mutation/query) and extract variable definitions for (const definition of document.definitions) { if ( definition.kind === "OperationDefinition" && definition.variableDefinitions ) { for (const variableDef of definition.variableDefinitions) { const varName = variableDef.variable.name.value; const typeName = this.extractTypeName(variableDef); if (typeName) { types[varName] = typeName; } } } } } catch (error) { console.error("Error parsing GraphQL mutation:", error); } return types; } private extractTypeName(variableDef: VariableDefinitionNode): string | null { const type = variableDef.type; if (type.kind === "NonNullType") { // Handle non-null types like String! if (type.type.kind === "NamedType") { return type.type.name.value; } } else if (type.kind === "NamedType") { // Handle nullable types like String return type.name.value; } return null; } private convertValue( value: any, type: string | undefined, varName: string ): any { if (!type) { // No type information available, keep as is return value; } // For non-string values (objects, arrays), return as is if (typeof value !== "string") { return value; } const trimmedValue = value.trim(); switch (type) { case "Int": const intValue = Number(trimmedValue); // Validate that it's a valid integer (no decimals, NaN, or Infinity) if ( isNaN(intValue) || !isFinite(intValue) || !Number.isInteger(intValue) ) { console.warn( `Warning: Cannot convert "${value}" to Int for variable $${varName}. Expected a valid integer. Using original value.` ); return value; } return intValue; case "Float": const floatValue = Number(trimmedValue); // Number() is more strict than parseFloat() - it requires the entire string to be valid if (isNaN(floatValue) || !isFinite(floatValue)) { console.warn( `Warning: Cannot convert "${value}" to Float for variable $${varName}. Expected a valid number. Using original value.` ); return value; } return floatValue; case "Boolean": const lowerValue = trimmedValue.toLowerCase(); if (lowerValue === "true" || lowerValue === "1") return true; if (lowerValue === "false" || lowerValue === "0") return false; console.warn( `Warning: Cannot convert "${value}" to Boolean for variable $${varName}. Expected "true", "false", "1", or "0". Using original value.` ); return value; case "String": return value; default: // Unknown scalar type - keep as string for safety if (this.verbose) { console.log( `Unknown GraphQL type "${type}" for variable $${varName}. Keeping value as string.` ); } return value; } } getMetrics(): MetricsCollector { return this.metrics; } }