UNPKG

@forzalabs/remora

Version:

A powerful CLI tool for seamless data translation.

815 lines 33.9 kB
{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Consumer Schema", "description": "Schema for defining data consumers", "type": "object", "properties": { "$schema": { "type": "string", "format": "uri" }, "name": { "type": "string", "description": "The name of the consumer" }, "description": { "type": "string", "description": "Optional description of the consumer" }, "producers": { "type": "array", "description": "The 'FROM' elements of the SQL query (only the first one is in the FROM clause, others are joined in)", "items": { "type": "object", "properties": { "name": { "type": "string", "description": "The name of an available producer/consumer" }, "joins": { "type": "array", "items": { "type": "object", "properties": { "otherName": { "type": "string", "description": "The name of the other producer to JOIN with from the list of producers" }, "relationship": { "type": "string", "enum": [ "one-to-one", "one-to-many", "many-to-one" ], "description": "The type of join relationship" }, "sql": { "type": "string", "description": "The SQL to use to perform the join" } }, "required": [ "otherName", "relationship", "sql" ], "additionalProperties": false } }, "custom": { "type": "object", "description": "Custom code to extract fields from producers, transform them, and return results", "properties": { "language": { "type": "string", "enum": [ "js", "python" ], "description": "The language of the custom code" }, "code": { "type": "string", "description": "The custom code to execute" } }, "required": [ "language", "code" ], "additionalProperties": false } }, "required": [ "name" ], "additionalProperties": false }, "minItems": 1 }, "fields": { "type": "array", "description": "The fields to include in the consumer", "items": { "type": "object", "properties": { "key": { "type": "string", "description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions" }, "alias": { "type": "string", "description": "If set, changes the key returned by the consumer to this alias" }, "from": { "type": "string", "description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from" }, "grouping": { "type": "object", "description": "If set, group resulting items that have the same value for the 'groupingKey'", "properties": { "groupingKey": { "type": "string", "description": "Used to create hierarchical data structure (parent -> children)" }, "subFields": { "type": "array", "description": "Recursive structure defining fields that should be pulled inside this grouped field", "items": { "$ref": "#/definitions/consumerField" } } }, "required": [ "groupingKey", "subFields" ], "additionalProperties": false }, "transform": { "description": "Optional list of transformations to apply to the dataset before returning the data", "oneOf": [ { "$ref": "#/definitions/singleTransformation" }, { "type": "array", "items": { "$ref": "#/definitions/singleTransformation" }, "description": "A list of transformations to be applied in sequence" } ] }, "validate": { "type": "object", "description": "Rules to check field value compliance and data quality", "properties": { "min": { "type": "number", "description": "Minimum value for numeric fields" }, "max": { "type": "number", "description": "Maximum value for numeric fields" }, "regex": { "type": "string", "description": "Regular expression pattern to validate string fields" }, "required": { "type": "boolean", "description": "Whether the field is required" } }, "additionalProperties": false }, "onError": { "type": "string", "description": "Action to take if an error occurs during transformations or validation", "enum": ["set_default", "skip", "fail"] }, "default": { "description": "Default value of the field if it is missing (or on error if specified)" } }, "required": [ "key" ], "additionalProperties": false }, "minItems": 1 }, "outputs": { "type": "array", "description": "Output formats and configurations for the consumer", "items": { "type": "object", "properties": { "format": { "type": "string", "enum": [ "SQL", "API", "CSV", "PARQUET", "JSON" ], "description": "The output format of the consumer" }, "accellerated": { "type": "boolean", "description": "If true and supported, the consumer will be materialized to improve query performance" }, "direct": { "type": "boolean", "description": "If true, no view is created on the consumer side due to permission limitations" }, "exportDestination": { "type": "string", "description": "The name of the source where the consumer will export its data when deployed/run" }, "trigger": { "type": "object", "description": "Triggers to perform the export (not just the usual 'Deploy')", "properties": { "type": { "type": "string", "enum": [ "CRON", "API" ], "description": "The type of trigger schedule" }, "value": { "type": "string", "description": "The value for the trigger (e.g., cron expression or API endpoint)" } }, "required": [ "type", "value" ], "additionalProperties": false } }, "required": [ "format" ], "additionalProperties": false }, "minItems": 1 }, "filters": { "type": "array", "description": "Filters to apply to the consumer data", "items": { "type": "object", "properties": { "sql": { "type": "string", "description": "The SQL to use to filter on the included dimension" }, "rule": { "type": "object", "description": "Non-SQL based filtering logic", "properties": { "member": { "type": "string", "description": "The field to filter on" }, "operator": { "type": "string", "enum": [ "equals", "notEquals", "contains", "notContains", "startsWith", "endsWith", "greaterThan", "greaterThanOrEquals", "lessThan", "lessThanOrEquals", "in", "notIn", "between", "notBetween", "isNull", "isNotNull", "true", "false", "matches", "notMatches", "sql" ], "description": "The filter operator to apply" }, "values": { "type": "array", "items": { "type": "string" }, "description": "The values to filter against" } }, "required": [ "member", "operator", "values" ], "additionalProperties": false } }, "additionalProperties": false } }, "metadata": { "type": "object", "description": "Any tags to add to this consumer for sorting and organizing", "additionalProperties": { "type": "string" } }, "project": { "type": "string", "description": "Tag that groups consumers together to make them accessible by a singular api_key" }, "schema": { "type": "string", "description": "The name of the JSON schema in this project that this consumer needs to adhere to" }, "_version": { "type": "number", "description": "Version number of the consumer configuration" } }, "required": [ "name", "producers", "fields", "outputs", "_version" ], "additionalProperties": false, "definitions": { "consumerField": { "type": "object", "properties": { "key": { "type": "string", "description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions" }, "alias": { "type": "string", "description": "If set, changes the key returned by the consumer to this alias" }, "from": { "type": "string", "description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from" }, "grouping": { "type": "object", "description": "If set, group resulting items that have the same value for the 'groupingKey'", "properties": { "groupingKey": { "type": "string", "description": "Used to create hierarchical data structure (parent -> children)" }, "subFields": { "type": "array", "description": "Recursive structure defining fields that should be pulled inside this grouped field", "items": { "$ref": "#/definitions/consumerField" } } }, "required": [ "groupingKey", "subFields" ], "additionalProperties": false }, "transform": { "description": "Optional list of transformations to apply to the dataset before returning the data", "oneOf": [ { "$ref": "#/definitions/singleTransformation" }, { "type": "array", "items": { "$ref": "#/definitions/singleTransformation" }, "description": "A list of transformations to be applied in sequence" } ] }, "validate": { "type": "object", "description": "Rules to check field value compliance and data quality", "properties": { "min": { "type": "number", "description": "Minimum value for numeric fields" }, "max": { "type": "number", "description": "Maximum value for numeric fields" }, "regex": { "type": "string", "description": "Regular expression pattern to validate string fields" }, "required": { "type": "boolean", "description": "Whether the field is required" } }, "additionalProperties": false }, "onError": { "type": "string", "description": "Action to take if an error occurs during transformations or validation", "enum": ["set_default", "skip", "fail"] }, "default": { "description": "Default value of the field if it is missing (or on error if specified)" } }, "required": [ "key" ], "additionalProperties": false }, "singleTransformation": { "oneOf": [ { "type": "object", "properties": { "cast": { "type": "string", "description": "Cast the value to a specific type", "enum": ["string", "number", "date", "boolean"] } }, "required": ["cast"], "additionalProperties": false }, { "type": "object", "properties": { "multiply": { "type": "number", "description": "Multiply the numeric value by this factor" } }, "required": ["multiply"], "additionalProperties": false }, { "type": "object", "properties": { "add": { "type": "number", "description": "Add this number to the numeric value" } }, "required": ["add"], "additionalProperties": false }, { "type": "object", "properties": { "extract": { "type": "string", "enum": ["year", "month", "day", "hour", "minute"], "description": "Extract a component from a date value" } }, "required": ["extract"], "additionalProperties": false }, { "type": "object", "properties": { "concat": { "type": "object", "properties": { "separator": { "type": "string", "description": "The separator to use when joining array elements" } }, "required": ["separator"], "additionalProperties": false } }, "required": ["concat"], "additionalProperties": false }, { "type": "object", "properties": { "split": { "type": "object", "properties": { "separator": { "type": "string", "description": "The separator to use when splitting the string" }, "index": { "type": "number", "description": "The index of the split part to keep" } }, "required": ["separator", "index"], "additionalProperties": false } }, "required": ["split"], "additionalProperties": false }, { "type": "object", "properties": { "regex_match": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to test against" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'i' for case-insensitive)" } }, "required": ["pattern"], "additionalProperties": false } }, "required": ["regex_match"], "additionalProperties": false }, { "type": "object", "properties": { "regex_replace": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to match" }, "replacement": { "type": "string", "description": "The replacement string" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'g' for global)" } }, "required": ["pattern", "replacement"], "additionalProperties": false } }, "required": ["regex_replace"], "additionalProperties": false }, { "type": "object", "properties": { "regex_extract": { "type": "object", "properties": { "pattern": { "type": "string", "description": "The regex pattern to extract with" }, "group": { "type": "number", "description": "The capture group index to extract (0 for full match)" }, "flags": { "type": "string", "description": "Regex flags (e.g., 'i' for case-insensitive)" } }, "required": ["pattern", "group"], "additionalProperties": false } }, "required": ["regex_extract"], "additionalProperties": false }, { "type": "object", "properties": { "trim": { "type": "boolean", "description": "Trim whitespace from both ends of the string" } }, "required": ["trim"], "additionalProperties": false }, { "type": "object", "properties": { "to_lowercase": { "type": "boolean", "description": "Convert string to lowercase" } }, "required": ["to_lowercase"], "additionalProperties": false }, { "type": "object", "properties": { "to_uppercase": { "type": "boolean", "description": "Convert string to uppercase" } }, "required": ["to_uppercase"], "additionalProperties": false }, { "type": "object", "properties": { "capitalize": { "type": "boolean", "description": "Capitalize the first letter of the string" } }, "required": ["capitalize"], "additionalProperties": false }, { "type": "object", "properties": { "substring": { "type": "object", "properties": { "start": { "type": "number", "description": "Starting position of substring" }, "end": { "type": "number", "description": "Optional ending position of substring" } }, "required": ["start"], "additionalProperties": false } }, "required": ["substring"], "additionalProperties": false }, { "type": "object", "properties": { "pad_start": { "type": "object", "properties": { "length": { "type": "number", "description": "Desired string length after padding" }, "char": { "type": "string", "description": "Character to pad with", "minLength": 1, "maxLength": 1 } }, "required": ["length", "char"], "additionalProperties": false } }, "required": ["pad_start"], "additionalProperties": false }, { "type": "object", "properties": { "pad_end": { "type": "object", "properties": { "length": { "type": "number", "description": "Desired string length after padding" }, "char": { "type": "string", "description": "Character to pad with", "minLength": 1, "maxLength": 1 } }, "required": ["length", "char"], "additionalProperties": false } }, "required": ["pad_end"], "additionalProperties": false }, { "type": "object", "properties": { "prepend": { "type": "string", "description": "String to add at the beginning" } }, "required": ["prepend"], "additionalProperties": false }, { "type": "object", "properties": { "append": { "type": "string", "description": "String to add at the end" } }, "required": ["append"], "additionalProperties": false } ] } }, "examples": [ { "name": "UserOrderAnalytics", "description": "Consumer that combines user data with their order history", "producers": [ { "name": "UserData", "joins": [ { "otherName": "OrderData", "relationship": "one-to-many", "sql": "${P.id} = ${OrderData.user_id}" } ] }, { "name": "OrderData" } ], "fields": [ { "key": "id", "from": "UserData" }, { "key": "name", "alias": "user_name", "from": "UserData" }, { "key": "email", "from": "UserData" }, { "key": "orders", "grouping": { "groupingKey": "order_items", "subFields": [ { "key": "id", "from": "OrderData", "alias": "order_id" }, { "key": "created_at", "from": "OrderData" }, { "key": "total_amount", "from": "OrderData" } ] } } ], "outputs": [ { "format": "API" }, { "format": "JSON", "exportDestination": "Data Lake", "trigger": { "type": "CRON", "value": "0 0 * * *" } } ], "filters": [ { "rule": { "member": "total_amount", "operator": "greaterThan", "values": [ "100" ] } } ], "metadata": { "department": "Sales", "owner": "analytics-team" }, "project": "sales-analytics", "_version": 1 } ] }