@forzalabs/remora
Version:
A powerful CLI tool for seamless data translation.
815 lines • 33.9 kB
JSON
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Consumer Schema",
"description": "Schema for defining data consumers",
"type": "object",
"properties": {
"$schema": {
"type": "string",
"format": "uri"
},
"name": {
"type": "string",
"description": "The name of the consumer"
},
"description": {
"type": "string",
"description": "Optional description of the consumer"
},
"producers": {
"type": "array",
"description": "The 'FROM' elements of the SQL query (only the first one is in the FROM clause, others are joined in)",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The name of an available producer/consumer"
},
"joins": {
"type": "array",
"items": {
"type": "object",
"properties": {
"otherName": {
"type": "string",
"description": "The name of the other producer to JOIN with from the list of producers"
},
"relationship": {
"type": "string",
"enum": [
"one-to-one",
"one-to-many",
"many-to-one"
],
"description": "The type of join relationship"
},
"sql": {
"type": "string",
"description": "The SQL to use to perform the join"
}
},
"required": [
"otherName",
"relationship",
"sql"
],
"additionalProperties": false
}
},
"custom": {
"type": "object",
"description": "Custom code to extract fields from producers, transform them, and return results",
"properties": {
"language": {
"type": "string",
"enum": [
"js",
"python"
],
"description": "The language of the custom code"
},
"code": {
"type": "string",
"description": "The custom code to execute"
}
},
"required": [
"language",
"code"
],
"additionalProperties": false
}
},
"required": [
"name"
],
"additionalProperties": false
},
"minItems": 1
},
"fields": {
"type": "array",
"description": "The fields to include in the consumer",
"items": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions"
},
"alias": {
"type": "string",
"description": "If set, changes the key returned by the consumer to this alias"
},
"from": {
"type": "string",
"description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from"
},
"grouping": {
"type": "object",
"description": "If set, group resulting items that have the same value for the 'groupingKey'",
"properties": {
"groupingKey": {
"type": "string",
"description": "Used to create hierarchical data structure (parent -> children)"
},
"subFields": {
"type": "array",
"description": "Recursive structure defining fields that should be pulled inside this grouped field",
"items": {
"$ref": "#/definitions/consumerField"
}
}
},
"required": [
"groupingKey",
"subFields"
],
"additionalProperties": false
},
"transform": {
"description": "Optional list of transformations to apply to the dataset before returning the data",
"oneOf": [
{
"$ref": "#/definitions/singleTransformation"
},
{
"type": "array",
"items": {
"$ref": "#/definitions/singleTransformation"
},
"description": "A list of transformations to be applied in sequence"
}
]
},
"validate": {
"type": "object",
"description": "Rules to check field value compliance and data quality",
"properties": {
"min": {
"type": "number",
"description": "Minimum value for numeric fields"
},
"max": {
"type": "number",
"description": "Maximum value for numeric fields"
},
"regex": {
"type": "string",
"description": "Regular expression pattern to validate string fields"
},
"required": {
"type": "boolean",
"description": "Whether the field is required"
}
},
"additionalProperties": false
},
"onError": {
"type": "string",
"description": "Action to take if an error occurs during transformations or validation",
"enum": ["set_default", "skip", "fail"]
},
"default": {
"description": "Default value of the field if it is missing (or on error if specified)"
}
},
"required": [
"key"
],
"additionalProperties": false
},
"minItems": 1
},
"outputs": {
"type": "array",
"description": "Output formats and configurations for the consumer",
"items": {
"type": "object",
"properties": {
"format": {
"type": "string",
"enum": [
"SQL",
"API",
"CSV",
"PARQUET",
"JSON"
],
"description": "The output format of the consumer"
},
"accellerated": {
"type": "boolean",
"description": "If true and supported, the consumer will be materialized to improve query performance"
},
"direct": {
"type": "boolean",
"description": "If true, no view is created on the consumer side due to permission limitations"
},
"exportDestination": {
"type": "string",
"description": "The name of the source where the consumer will export its data when deployed/run"
},
"trigger": {
"type": "object",
"description": "Triggers to perform the export (not just the usual 'Deploy')",
"properties": {
"type": {
"type": "string",
"enum": [
"CRON",
"API"
],
"description": "The type of trigger schedule"
},
"value": {
"type": "string",
"description": "The value for the trigger (e.g., cron expression or API endpoint)"
}
},
"required": [
"type",
"value"
],
"additionalProperties": false
}
},
"required": [
"format"
],
"additionalProperties": false
},
"minItems": 1
},
"filters": {
"type": "array",
"description": "Filters to apply to the consumer data",
"items": {
"type": "object",
"properties": {
"sql": {
"type": "string",
"description": "The SQL to use to filter on the included dimension"
},
"rule": {
"type": "object",
"description": "Non-SQL based filtering logic",
"properties": {
"member": {
"type": "string",
"description": "The field to filter on"
},
"operator": {
"type": "string",
"enum": [
"equals",
"notEquals",
"contains",
"notContains",
"startsWith",
"endsWith",
"greaterThan",
"greaterThanOrEquals",
"lessThan",
"lessThanOrEquals",
"in",
"notIn",
"between",
"notBetween",
"isNull",
"isNotNull",
"true",
"false",
"matches",
"notMatches",
"sql"
],
"description": "The filter operator to apply"
},
"values": {
"type": "array",
"items": {
"type": "string"
},
"description": "The values to filter against"
}
},
"required": [
"member",
"operator",
"values"
],
"additionalProperties": false
}
},
"additionalProperties": false
}
},
"metadata": {
"type": "object",
"description": "Any tags to add to this consumer for sorting and organizing",
"additionalProperties": {
"type": "string"
}
},
"project": {
"type": "string",
"description": "Tag that groups consumers together to make them accessible by a singular api_key"
},
"schema": {
"type": "string",
"description": "The name of the JSON schema in this project that this consumer needs to adhere to"
},
"_version": {
"type": "number",
"description": "Version number of the consumer configuration"
}
},
"required": [
"name",
"producers",
"fields",
"outputs",
"_version"
],
"additionalProperties": false,
"definitions": {
"consumerField": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The dimension/measure of the producer OR '*'. If '*' then return all dimensions"
},
"alias": {
"type": "string",
"description": "If set, changes the key returned by the consumer to this alias"
},
"from": {
"type": "string",
"description": "If the key is ambiguous, set a 'from' with the name of the producer/consumer to get the key from"
},
"grouping": {
"type": "object",
"description": "If set, group resulting items that have the same value for the 'groupingKey'",
"properties": {
"groupingKey": {
"type": "string",
"description": "Used to create hierarchical data structure (parent -> children)"
},
"subFields": {
"type": "array",
"description": "Recursive structure defining fields that should be pulled inside this grouped field",
"items": {
"$ref": "#/definitions/consumerField"
}
}
},
"required": [
"groupingKey",
"subFields"
],
"additionalProperties": false
},
"transform": {
"description": "Optional list of transformations to apply to the dataset before returning the data",
"oneOf": [
{
"$ref": "#/definitions/singleTransformation"
},
{
"type": "array",
"items": {
"$ref": "#/definitions/singleTransformation"
},
"description": "A list of transformations to be applied in sequence"
}
]
},
"validate": {
"type": "object",
"description": "Rules to check field value compliance and data quality",
"properties": {
"min": {
"type": "number",
"description": "Minimum value for numeric fields"
},
"max": {
"type": "number",
"description": "Maximum value for numeric fields"
},
"regex": {
"type": "string",
"description": "Regular expression pattern to validate string fields"
},
"required": {
"type": "boolean",
"description": "Whether the field is required"
}
},
"additionalProperties": false
},
"onError": {
"type": "string",
"description": "Action to take if an error occurs during transformations or validation",
"enum": ["set_default", "skip", "fail"]
},
"default": {
"description": "Default value of the field if it is missing (or on error if specified)"
}
},
"required": [
"key"
],
"additionalProperties": false
},
"singleTransformation": {
"oneOf": [
{
"type": "object",
"properties": {
"cast": {
"type": "string",
"description": "Cast the value to a specific type",
"enum": ["string", "number", "date", "boolean"]
}
},
"required": ["cast"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"multiply": {
"type": "number",
"description": "Multiply the numeric value by this factor"
}
},
"required": ["multiply"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"add": {
"type": "number",
"description": "Add this number to the numeric value"
}
},
"required": ["add"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"extract": {
"type": "string",
"enum": ["year", "month", "day", "hour", "minute"],
"description": "Extract a component from a date value"
}
},
"required": ["extract"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"concat": {
"type": "object",
"properties": {
"separator": {
"type": "string",
"description": "The separator to use when joining array elements"
}
},
"required": ["separator"],
"additionalProperties": false
}
},
"required": ["concat"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"split": {
"type": "object",
"properties": {
"separator": {
"type": "string",
"description": "The separator to use when splitting the string"
},
"index": {
"type": "number",
"description": "The index of the split part to keep"
}
},
"required": ["separator", "index"],
"additionalProperties": false
}
},
"required": ["split"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"regex_match": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "The regex pattern to test against"
},
"flags": {
"type": "string",
"description": "Regex flags (e.g., 'i' for case-insensitive)"
}
},
"required": ["pattern"],
"additionalProperties": false
}
},
"required": ["regex_match"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"regex_replace": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "The regex pattern to match"
},
"replacement": {
"type": "string",
"description": "The replacement string"
},
"flags": {
"type": "string",
"description": "Regex flags (e.g., 'g' for global)"
}
},
"required": ["pattern", "replacement"],
"additionalProperties": false
}
},
"required": ["regex_replace"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"regex_extract": {
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "The regex pattern to extract with"
},
"group": {
"type": "number",
"description": "The capture group index to extract (0 for full match)"
},
"flags": {
"type": "string",
"description": "Regex flags (e.g., 'i' for case-insensitive)"
}
},
"required": ["pattern", "group"],
"additionalProperties": false
}
},
"required": ["regex_extract"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"trim": {
"type": "boolean",
"description": "Trim whitespace from both ends of the string"
}
},
"required": ["trim"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"to_lowercase": {
"type": "boolean",
"description": "Convert string to lowercase"
}
},
"required": ["to_lowercase"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"to_uppercase": {
"type": "boolean",
"description": "Convert string to uppercase"
}
},
"required": ["to_uppercase"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"capitalize": {
"type": "boolean",
"description": "Capitalize the first letter of the string"
}
},
"required": ["capitalize"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"substring": {
"type": "object",
"properties": {
"start": {
"type": "number",
"description": "Starting position of substring"
},
"end": {
"type": "number",
"description": "Optional ending position of substring"
}
},
"required": ["start"],
"additionalProperties": false
}
},
"required": ["substring"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"pad_start": {
"type": "object",
"properties": {
"length": {
"type": "number",
"description": "Desired string length after padding"
},
"char": {
"type": "string",
"description": "Character to pad with",
"minLength": 1,
"maxLength": 1
}
},
"required": ["length", "char"],
"additionalProperties": false
}
},
"required": ["pad_start"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"pad_end": {
"type": "object",
"properties": {
"length": {
"type": "number",
"description": "Desired string length after padding"
},
"char": {
"type": "string",
"description": "Character to pad with",
"minLength": 1,
"maxLength": 1
}
},
"required": ["length", "char"],
"additionalProperties": false
}
},
"required": ["pad_end"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"prepend": {
"type": "string",
"description": "String to add at the beginning"
}
},
"required": ["prepend"],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"append": {
"type": "string",
"description": "String to add at the end"
}
},
"required": ["append"],
"additionalProperties": false
}
]
}
},
"examples": [
{
"name": "UserOrderAnalytics",
"description": "Consumer that combines user data with their order history",
"producers": [
{
"name": "UserData",
"joins": [
{
"otherName": "OrderData",
"relationship": "one-to-many",
"sql": "${P.id} = ${OrderData.user_id}"
}
]
},
{
"name": "OrderData"
}
],
"fields": [
{
"key": "id",
"from": "UserData"
},
{
"key": "name",
"alias": "user_name",
"from": "UserData"
},
{
"key": "email",
"from": "UserData"
},
{
"key": "orders",
"grouping": {
"groupingKey": "order_items",
"subFields": [
{
"key": "id",
"from": "OrderData",
"alias": "order_id"
},
{
"key": "created_at",
"from": "OrderData"
},
{
"key": "total_amount",
"from": "OrderData"
}
]
}
}
],
"outputs": [
{
"format": "API"
},
{
"format": "JSON",
"exportDestination": "Data Lake",
"trigger": {
"type": "CRON",
"value": "0 0 * * *"
}
}
],
"filters": [
{
"rule": {
"member": "total_amount",
"operator": "greaterThan",
"values": [
"100"
]
}
}
],
"metadata": {
"department": "Sales",
"owner": "analytics-team"
},
"project": "sales-analytics",
"_version": 1
}
]
}