UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

1,071 lines 55.7 kB
{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Consumer Schema", "description": "Schema for defining data consumers", "type": "object", "properties": { "$schema": { "type": "string", "format": "uri" }, "name": { "type": "string", "description": "The name of the consumer" }, "description": { "type": "string", "description": "Optional description of the consumer" }, "producers": { "type": "array", "description": "The 'FROM' elements of the SQL query (only the first one is in the FROM clause, others are joined in)", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of an available producer/consumer" }, "joins": { "type": "array", "items": { "type": "object", "properties": { "otherName": { "type": "string", "description": "The name of the other producer to JOIN with from the list of producers" }, "relationship": { "type": "string", "enum": [ "one-to-one", "one-to-many", "many-to-one" ], "description": "The type of join relationship" }, "sql": { "type": "string", "description": "The SQL to use to perform the join" } }, "required": [ "otherName", "relationship", "sql" ], "additionalProperties": false } }, "union": { "type": "boolean", "description": "Merges the data from the various producers in a single dataset. They must have the same output dimensions. If true, then you can't set any joins on any producer, since all producers are merged in a single dataset." }, "isOptional": { "type": "boolean", "description": "If true, if the data for this producer is not available when executing the consumer (e.g. the file is not present) the consumer will not fail, but just create a placeholder dataset and continue execution." } }, "required": [ "name" ], "additionalProperties": false }, "minItems": 1 }, "fields": { "type": "array", "description": "The fields to include in the consumer", "items": { "type": "object", "properties": { "key": { "type": "string", "description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions" }, "alias": { "type": "string", "description": "If set, changes the key returned by the consumer to this alias" }, "from": { "type": "string", "description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from" }, "grouping": { "type": "object", "description": "If set, group resulting items that have the same value for the 'groupingKey'", "properties": { "groupingKey": { "type": "string", "description": "Used to create hierarchical data structure (parent -> children)" }, "subFields": { "type": "array", "description": "Recursive structure defining fields that should be pulled inside this grouped field", "items": { "$ref": "#/definitions/consumerField" } } }, "required": [ "groupingKey", "subFields" ], "additionalProperties": false }, "transform": { "description": "Optional list of transformations to apply to the dataset before returning the data", "oneOf": [ { "$ref": "#/definitions/singleTransformation" }, { "type": "array", "items": { "$ref": "#/definitions/singleTransformation" }, "description": "A list of transformations to be applied in sequence" } ] }, "validate": { "type": "object", "description": "Rules to check field value compliance and data quality", "properties": { "min": { "type": "number", "description": "Minimum value for numeric fields" }, "max": { "type": "number", "description": "Maximum value for numeric fields" }, "regex": { "type": "string", "description": "Regular expression pattern to validate string fields" }, "required": { "type": "boolean", "description": "Whether the field is required" } }, "additionalProperties": false }, "onError": { "type": "string", "description": "Action to take if an error occurs during transformations or validation", "enum": ["set_default", "skip", "fail"] }, "default": { "description": "Default value of the field if it is missing (or on error if specified)" }, "hidden": { "type": "boolean", "description": "If set, the field is kept and used during processing, but omitted when exporting the data" }, "fixed": { "type": "boolean", "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop." } }, "required": [ "key" ], "additionalProperties": false }, "minItems": 1 }, "outputs": { "type": "array", "description": "Output formats and configurations for the consumer", "items": { "type": "object", "properties": { "format": { "type": "string", "enum": [ "SQL", "API", "CSV", "PARQUET", "JSON" ], "description": "The output format of the consumer" }, "accelerated": { "type": "boolean", "description": "If true and supported, the consumer will be materialized to improve query performance" }, "direct": { "type": "boolean", "description": "If true, no view is created on the consumer side due to permission limitations" }, "exportDestination": { "type": "string", "description": "The name of the source where the consumer will export its data when deployed/run" }, "exportName": { "type": "string", "description": "If the format is a file, forces the same name in the export file (extension is auto-added)" }, "trigger": { "type": "object", "description": "Triggers to perform the export (not just the usual 'Deploy')", "properties": { "type": { "type": "string", "enum": [ "CRON", "API", "QUEUE" ], "description": "The type of trigger schedule. CRON: time-based scheduling. API: HTTP endpoint trigger. QUEUE: SQS queue message trigger (supports shared queues with message type filtering)." }, "value": { "type": "string", "description": "The value for the trigger. For CRON: cron expression (e.g., '0 0 * * *'). For API: endpoint path. For QUEUE: SQS queue URL or queue name (will construct full URL using metadata.region and metadata.accountId if needed)." }, "metadata": { "type": "object", "description": "Additional metadata for the trigger (e.g., AWS credentials, message type filter)", "properties": { "messageType": { "type": "string", "description": "Optional message type filter for QUEUE triggers. Only messages with matching 'type', 'messageType', or 'eventType' fields will be processed by this consumer. Messages without a matching type will be left in the queue for other consumers, enabling shared queue usage." }, "region": { "type": "string", "description": "AWS region for the queue (for QUEUE triggers)" }, "accountId": { "type": "string", "description": "AWS account ID for constructing queue URL (for QUEUE triggers)" }, "accessKeyId": { "type": "string", "description": "AWS access key ID for queue authentication (for QUEUE triggers)" }, "secretAccessKey": { "type": "string", "description": "AWS secret access key for queue authentication (for QUEUE triggers)" }, "sessionToken": { "type": "string", "description": "AWS session token for temporary credentials (for QUEUE triggers)" } }, "additionalProperties": { "type": "string" } } }, "required": [ "type", "value" ], "additionalProperties": false }, "onSuccess": { "type": "array", "description": "Actions to perform when the output operation completes successfully", "items": { "$ref": "#/definitions/consumerOutputOnFinish" } }, "onError": { "type": "array", "description": "Actions to perform when the output operation fails", "items": { "$ref": "#/definitions/consumerOutputOnFinish" } } }, "required": [ "format" ], "additionalProperties": false }, "minItems": 1 }, "filters": { "type": "array", "description": "Filters to apply to the consumer data", "items": { "type": "object", "properties": { "sql": { "type": "string", "description": "The SQL to use to filter on the included dimension" }, "rule": { "type": "object", "description": "Non-SQL based filtering logic", "properties": { "member": { "type": "string", "description": "The field to filter on. Use the \"alias\" if specified" }, "operator": { "type": "string", "enum": [ "equals", "notEquals", "contains", "notContains", "startsWith", "endsWith", "greaterThan", "greaterThanOrEquals", "lessThan", "lessThanOrEquals", "in", "notIn", "between", "notBetween", "isNull", "isNotNull", "true", "false", "matches", "notMatches", "sql" ], "description": "The filter operator to apply" }, "values": { "type": "array", "items": { "type": "string" }, "description": "The values to filter against" } }, "required": [ "member", "operator", "values" ], "additionalProperties": false } }, "additionalProperties": false } }, "metadata": { "type": "object", "description": "Any tags to add to this consumer for sorting and organizing", "additionalProperties": { "type": "string" } }, "project": { "type": "string", "description": "Tag that groups consumers together to make them accessible by a singular api_key" }, "options": { "type": "object", "description": "Additional options for the consumer", "properties": { "distinct": { "type": "boolean", "description": "If true, then the result set will only contain DISTINCT values" }, "distinctOn": { "type": "object", "description": "Performs a distinct operation on specific key(s) and applies collision resolution rules to determine which record to keep when duplicates are found", "properties": { "keys": { "type": "array", "items": { "type": "string" }, "minItems": 1, "description": "The field(s) to use for determining uniqueness. Use the 'alias' if specified. Can be a single field or multiple fields for composite keys." }, "resolution": { "type": "object", "description": "Rules to determine which record to keep when duplicates are found", "properties": { "strategy": { "type": "string", "enum": ["first", "last", "min", "max"], "description": "Strategy for resolving which record to keep. 'first': Keep the first record based on orderBy field. 'last': Keep the last record based on orderBy field. 'min': Keep the record with the minimum value in orderBy field. 'max': Keep the record with the maximum value in orderBy field." }, "orderBy": { "type": "string", "description": "Required for 'first', 'last', 'min', 'max' strategies. The field to use for ordering/comparison when selecting the record to keep." }, "direction": { "type": "string", "enum": ["asc", "desc"], "default": "asc", "description": "For 'first' and 'last' strategies, the sort direction." } }, "required": ["strategy"], "additionalProperties": false } }, "required": ["keys", "resolution"], "additionalProperties": false } }, "additionalProperties": false }, "schema": { "type": "string", "description": "The name of the JSON schema in this project that this consumer needs to adhere to" }, "_version": { "type": "number", "description": "Version number of the consumer configuration" } }, "required": [ "name", "producers", "fields", "outputs" ], "additionalProperties": false, "definitions": { "consumerField": { "type": "object", "properties": { "key": { "type": "string", "description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions" }, "alias": { "type": "string", "description": "If set, changes the key returned by the consumer to this alias" }, "from": { "type": "string", "description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from" }, "grouping": { "type": "object", "description": "If set, group resulting items that have the same value for the 'groupingKey'", "properties": { "groupingKey": { "type": "string", "description": "Used to create hierarchical data structure (parent -> children)" }, "subFields": { "type": "array", "description": "Recursive structure defining fields that should be pulled inside this grouped field", "items": { "$ref": "#/definitions/consumerField" } } }, "required": [ "groupingKey", "subFields" ], "additionalProperties": false }, "transform": { "description": "Optional list of transformations to apply to the dataset before returning the data", "oneOf": [ { "$ref": "#/definitions/singleTransformation" }, { "type": "array", "items": { "$ref": "#/definitions/singleTransformation" }, "description": "A list of transformations to be applied in sequence" } ] }, "validate": { "type": "object", "description": "Rules to check field value compliance and data quality", "properties": { "min": { "type": "number", "description": "Minimum value for numeric fields" }, "max": { "type": "number", "description": "Maximum value for numeric fields" }, "regex": { "type": "string", "description": "Regular expression pattern to validate string fields" }, "required": { "type": "boolean", "description": "Whether the field is required" } }, "additionalProperties": false }, "onError": { "type": "string", "description": "Action to take if an error occurs during transformations or validation", "enum": ["set_default", "skip", "fail"] }, "default": { "description": "Default value of the field if it is missing (or on error if specified)" }, "hidden": { "type": "boolean", "description": "If set, the field is kept and used during processing, but omitted when exporting the data" }, "fixed": { "type": "boolean", "description": "If set, \"default\" must have a value. This field is not searched in the underlying dataset, but is a fixed value set by the \"default\" prop." } }, "required": [ "key" ], "additionalProperties": false }, "singleTransformation": { "oneOf": [ { "type": "object", "properties": { "cast": { "type": "string", "description": "Cast the value to a specific type", "enum": ["string", "number", "datetime", "boolean"] }, "format": { "type": "string", "description": "Optional format for date parsing or string formatting (e.g. YYYY-MM-DD, DD/MM/YY)" } }, "required": ["cast"], "additionalProperties": false }, { "type": "object", "properties": { "multiply": { "type": "number", "description": "Multiply the numeric value by this factor" } }, "required": ["multiply"], "additionalProperties": false }, { "type": "object", "properties": { "add": { "type": "number", "description": "Add this number to the numeric value" } }, "required": ["add"], "additionalProperties": false }, { "type": "object", "properties": { "extract": { "type": "string", "enum": ["year", "month", "day", "hour", "minute"], "description": "Extract a component from a date value" } }, "required": ["extract"], "additionalProperties": false }, { "type": "object", "properties": { "concat": { "type": "object", "properties": { "separator": { "type": "string", "description": "The separator to use when joining array elements" } }, "required": ["separator"], "additionalProperties": false } }, "required": ["concat"], "additionalProperties": false }, { "type": "object", "properties": { "split": { "type": "object", "properties": { "separator": { "type": "string", "description": "The separator to use when splitting the string" }, "index": { "type": "number", "description": "The index of the split part to keep" } }, "required": ["separator", "index"], "additionalProperties": false } }, "required": ["split"], "additionalProperties": false }, { "type": "object", "properties": { "regex_match": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to test against" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'i' for case-insensitive)" } }, "required": ["pattern"], "additionalProperties": false } }, "required": ["regex_match"], "additionalProperties": false }, { "type": "object", "properties": { "regex_replace": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to match" }, "replacement": { "type": "string", "description": "The replacement string" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'g' for global)" } }, "required": ["pattern", "replacement"], "additionalProperties": false } }, "required": ["regex_replace"], "additionalProperties": false }, { "type": "object", "properties": { "regex_extract": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to extract with" }, "group": { "type": "number", "description": "The capture group index to extract (0 for full match)" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'i' for case-insensitive)" } }, "required": ["pattern", "group"], "additionalProperties": false } }, "required": ["regex_extract"], "additionalProperties": false }, { "type": "object", "properties": { "trim": { "type": "boolean", "description": "Trim whitespace from both ends of the string" } }, "required": ["trim"], "additionalProperties": false }, { "type": "object", "properties": { "to_lowercase": { "type": "boolean", "description": "Convert string to lowercase" } }, "required": ["to_lowercase"], "additionalProperties": false }, { "type": "object", "properties": { "to_uppercase": { "type": "boolean", "description": "Convert string to uppercase" } }, "required": ["to_uppercase"], "additionalProperties": false }, { "type": "object", "properties": { "capitalize": { "type": "boolean", "description": "Capitalize the first letter of the string" } }, "required": ["capitalize"], "additionalProperties": false }, { "type": "object", "properties": { "substring": { "type": "object", "properties": { "start": { "type": "number", "description": "Starting position of substring" }, "end": { "type": "number", "description": "Optional ending position of substring" } }, "required": ["start"], "additionalProperties": false } }, "required": ["substring"], "additionalProperties": false }, { "type": "object", "properties": { "pad_start": { "type": "object", "properties": { "length": { "type": "number", "description": "Desired string length after padding" }, "char": { "type": "string", "description": "Character to pad with", "minLength": 1, "maxLength": 1 } }, "required": ["length", "char"], "additionalProperties": false } }, "required": ["pad_start"], "additionalProperties": false }, { "type": "object", "properties": { "pad_end": { "type": "object", "properties": { "length": { "type": "number", "description": "Desired string length after padding" }, "char": { "type": "string", "description": "Character to pad with", "minLength": 1, "maxLength": 1 } }, "required": ["length", "char"], "additionalProperties": false } }, "required": ["pad_end"], "additionalProperties": false }, { "type": "object", "properties": { "prepend": { "type": "string", "description": "String to add at the beginning" } }, "required": ["prepend"], "additionalProperties": false }, { "type": "object", "properties": { "append": { "type": "string", "description": "String to add at the end" } }, "required": ["append"], "additionalProperties": false }, { "type": "object", "properties": { "combine_fields": { "type": "object", "properties": { "fields": { "type": "array", "items": { "type": "string" }, "description": "Array of field names to combine", "minItems": 1 }, "separator": { "type": "string", "description": "Optional separator between fields (default: empty string)" }, "template": { "type": "string", "description": "Optional template string with placeholders like '{field1} - {field2}'" } }, "required": ["fields"], "additionalProperties": false } }, "required": ["combine_fields"], "additionalProperties": false }, { "type": "object", "description": "Apply conditional logic to transform values based on comparison conditions.", "properties": { "conditional": { "type": "object", "properties": { "clauses": { "type": "array", "description": "Array of if-then clauses evaluated in order. First matching clause wins.", "items": { "type": "object", "properties": { "if": { "$ref": "#/definitions/comparisonCondition" }, "then": { "description": "The value to return if the condition matches", "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] } }, "required": ["if", "then"], "additionalProperties": false }, "minItems": 1 }, "else": { "description": "Default value if no clause matches. If not specified, the original value is kept.", "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" }, { "type": "null" } ] } }, "required": ["clauses"], "additionalProperties": false } }, "required": ["conditional"], "additionalProperties": false }, { "type": "object", "properties": { "mask": { "type": "string", "enum": ["hash", "mask", "crypt", "random", "seeded-random", "none"], "description": "Apply masking to the field value for data privacy. Options: 'hash' (one-way hash), 'mask' (replace with asterisks), 'crypt' (reversible encryption), 'random' (random value), 'seeded-random' (consistent random value per input), 'none' (no masking)" } }, "required": ["mask"], "additionalProperties": false } ] }, "comparisonCondition": { "description": "A condition to compare a field value against. Exactly one comparison operator must be specified.", "oneOf": [ { "type": "object", "description": "Check if the numeric value is greater than the specified number", "properties": { "greater_than": { "type": "number", "description": "The value must be greater than this number" } }, "required": ["greater_than"], "additionalProperties": false }, { "type": "object", "description": "Check if the numeric value is greater than or equal to the specified number", "properties": { "greater_than_or_equal": { "type": "number", "description": "The value must be greater than or equal to this number" } }, "required": ["greater_than_or_equal"], "additionalProperties": false }, { "type": "object", "description": "Check if the numeric value is less than the specified number", "properties": { "less_than": { "type": "number", "description": "The value must be less than this number" } }, "required": ["less_than"], "additionalProperties": false }, { "type": "object", "description": "Check if the numeric value is less than or equal to the specified number", "properties": { "less_than_or_equal": { "type": "number", "description": "The value must be less than or equal to this number" } }, "required": ["less_than_or_equal"], "additionalProperties": false }, { "type": "object", "description": "Check if the value equals the specified value (strict equality)", "properties": { "equals": { "description": "The value must equal this exactly", "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] } }, "required": ["equals"], "additionalProperties": false }, { "type": "object", "description": "Check if the value does not equal the specified value", "properties": { "not_equals": { "description": "The value must not equal this", "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] } }, "required": ["not_equals"], "additionalProperties": false }, { "type": "object", "description": "Check if the value is in the specified list of values", "properties": { "in": { "type": "array", "description": "The value must be one of these values", "items": { "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] }, "minItems": 1 } }, "required": ["in"], "additionalProperties": false }, { "type": "object", "description": "Check if the value is not in the specified list of values", "properties": { "not_in": { "type": "array", "description": "The value must not be any of these values", "items": { "oneOf": [ { "type": "string" }, { "type": "number" }, { "type": "boolean" } ] }, "minItems": 1 } }, "required": ["not_in"], "additionalProperties": false }, { "type": "object", "description": "Check if the string value starts with the specified prefix", "properties": { "starts_with": { "type": "string", "description": "The string value must start with this prefix" } }, "required": ["starts_with"], "additionalProperties": false }, { "type": "object", "description": "Check if the string value ends with the specified suffix", "properties": { "ends_with": { "type": "string", "description": "The string value must end with this suffix" } }, "required": ["ends_with"], "additionalProperties": false }, { "type": "object", "description": "Check if the string value contains t