@datastax/astra-mongoose
Version:
Astra's NodeJS Mongoose compatibility client
1,061 lines (966 loc) • 57.5 kB
YAML
# DSE Config Version: 6.9.13
# Memory limit for DSE In-Memory tables as a fraction of system memory. When not set,
# the default is 0.2 (20% of system memory).
# Specify max_memory_to_lock_fraction or max_memory_to_lock_mb, not both.
# max_memory_to_lock_fraction: 0.20
# Memory limit for DSE In-Memory tables as a maximum in MB. When not set,
# max_memory_to_lock_fraction is used. The max_memory_to_lock_fraction
# value is ignored if max_memory_to_lock_mb is set to a non-zero value.
# Specify max_memory_to_lock_fraction or max_memory_to_lock_mb, not both.
# max_memory_to_lock_mb: 10240
##########################
# Authentication options
#
# These options are used if the authenticator option in cassandra.yaml is set to
# com.datastax.bdp.cassandra.auth.DseAuthenticator
#
# The enabled option controls whether the DseAuthenticator will authenticate users. If
# set to true users will be authenticated, if set to false they will not.
# When not set enabled is false.
#
# DseAuthenticator allows multiple authentication schemes to be used at the same time.
# The schemes to be used are controlled by the default_scheme and other_schemes options.
# A driver can select the scheme to use during authentication.
#
# The default_scheme option selects which authentication scheme will be used if the driver
# does not request a specific scheme. This can be one of the following values:
# internal - plain text authentication using the internal password authenticator
# ldap - plain text authentication using the passthrough LDAP authenticator
# kerberos - GSSAPI authentication using the Kerberos authenticator
# The other_schemes option is a list of schemes that can also be selected for use by a
# driver and can be a list of the above schemes.
#
# The scheme_permissions option controls whether roles need to have permission granted to
# them in order to use specific authentication schemes. These permissions can be granted
# only when the DseAuthorizer is used.
#
# The allow_digest_with_kerberos option controls whether Digest-MD5 authentication is also
# allowed when Kerberos is one of the authentication schemes. If set to false, it will not
# be allowed. You must set allow_digest_with_kerberos to true in analytics clusters to use Hadoop
# inter-node authentication with Hadoop and Spark jobs.
#
# The plain_text_without_ssl controls how the DseAuthenticator reacts to plain text
# authentication requests over unencrypted client connections. It can be one of:
# block - block the request with an authentication error
# warn - log a warning about the request but allow it to continue
# allow - allow the request without any warning
#
# The transitional_mode option allows the DseAuthenticator to operate in a transitional
# mode during setup of authentication in a cluster. This can be one of the following values:
# disabled - transitional mode is disabled
# permissive - Only super users are authenticated and logged in, all other
# authentication attempts will be logged in as the anonymous user
# normal - If credentials are passed they are authenticated. If the
# authentication is successful then the user is logged in, otherwise
# the user is logged in as anonymous. If no credentials are passed,
# then the user is logged in as anonymous
# strict - If credentials are passed they are authenticated. If the
# authentication is successful, the user is logged in. If the
# authentication fails, an authentication error is returned. If no
# credentials are passed, the user is logged in as anonymous
authentication_options:
enabled: true
default_scheme: internal
other_schemes:
scheme_permissions: false
allow_digest_with_kerberos: true
plain_text_without_ssl: warn
transitional_mode: disabled
##########################
# Role management options
#
# These options are used when the role_manager option in cassandra.yaml is set to
# com.datastax.bdp.cassandra.auth.DseRoleManager
#
# mode can be one of:
# internal - the granting and revoking of roles is managed internally
# using the GRANT ROLE and REVOKE ROLE statements
# ldap - the granting and revoking of roles is managed by an external
# LDAP server configured using the ldap_options.
#
# stats is a boolean option (false by default). If set to true, the DSE server will keep
# track of role creation and password change timestamps in the dse_security.role_stats table.
# This data will be updated based newly executed CREATE / ALTER / DROP role CQL statements.
#
# mode_by_authentication allows and overrides the defined mode based on the authentication scheme.
# The allow values are either internal or ldap. If not set, the defined mode will be used.
#
# role_management_options:
# mode: internal
# stats: false
# mode_by_authentication:
# internal:
# ldap:
# kerberos:
##########################
# Authorization options
#
# These options are used if the authorization option in cassandra.yaml is set to
# com.datastax.bdp.cassandra.auth.DseAuthorizer
#
# The enabled option controls whether the DseAuthorizer will perform authorization. If
# set to true authorization is performed, if set to false it is not.
# When not set, enabled is false.
#
# The transitional_mode option allows the DseAuthorizer to operate in a transitional
# mode during setup of authorization in a cluster. This can be one of the following values:
# disabled - transitional mode is disabled, all connections must provide valid credentials and
# map to a login-enabled role
# normal - allow all connections that provide credentials, permissions can be granted to
# resources but are not enforced
# strict - permissions can be granted to resources and are enforced on
# authenticated users. They are not enforced against anonymous
# users
#
# allow_row_level_security - To use row level security, set to true for the entire system.
# Use the same setting on all nodes.
# authorization_options:
# enabled: false
# transitional_mode: disabled
# allow_row_level_security: false
##########################
# Kerberos options
#
# keytab is <path_to_keytab>/dse.keytab
# The keytab file must contain the credentials for both of the fully resolved principal names, which
# replace _HOST with the fully qualified domain name (FQDN) of the host in the service_principal and
# http_principal settings. The UNIX user running DSE must also have read permissions on the keytab.
#
# The service_principal is the DataStax Enterprise process runs under must use the form
# <dse_user>/_HOST@<REALM>
#
# The http_principal is used by the Tomcat application container to run DSE Search.
#
# The qop is the Quality of Protection (QOP) values that clients and servers
# can use for each connection. Valid values are:
# auth - (default) authentication only
# auth-int - authentication plus integrity protection of all transmitted data
# auth-conf - authentication plus integrity protection and encryption of all
# transmitted data
#
# Warning - Encryption using auth-conf is separate and completely independent
# of whether encryption is done using SSL. If auth-conf is selected here
# and SSL is enabled, the transmitted data is encrypted twice.
kerberos_options:
keytab: resources/dse/conf/dse.keytab
service_principal: dse/_HOST@REALM
http_principal: HTTP/_HOST@REALM
qop: auth
##########################
# LDAP options
#
# These are options are only used when the com.datastax.bdp.cassandra.auth.DseAuthenticator
# is configured as the authenticator in cassandra.yaml and 'ldap' scheme is selected in
# authentication_options and/or role_management_options above.
# ldap_options:
# # LDAP server address or comma separated list of alternative addresses. Each address may include port after a colon.
# # If port is not defined per single address, server_port will be used by default. All servers are equivalent and
# # the one which is chosen for a certain request depends on the current state (recent failures, number of idle
# # connections is the pool).
# server_host:
#
# # The port on which the LDAP server listens, usually port 389 for unencrypted
# # connections and port 636 for SSL-encrypted connections. If use_tls is set to true, use the
# # unencrypted port
# server_port: 389
#
# # The distinguished name (DN) of an account that is used to search for other users on the
# # LDAP server. This user should have only the necessary permissions to do the search
# # If not present then an anonymous bind is used for the search
# search_dn:
#
# # Password of the search_dn account
# search_password:
#
# # Set to true to use an SSL encrypted connection. In this case the server_port needs
# # to be set to the LDAP port for the server
# use_ssl: false
#
# # Set to true to initiate a TLS encrypted connection on the default ldap port
# use_tls: false
#
# truststore_path:
# truststore_password:
# truststore_type: jks
#
# ssl_protocol: TLS
#
# user_search_base:
# extra_user_search_bases: []
# user_search_filter: (uid={0})
#
# # Set to true to enable hostname verification. This is only effective if one of use_ssl or use_tls is true
# # and truststore_path has a valid trust store.
# hostname_verification: false
#
# # Set to the attribute on the user entry containing group membership information.
# user_memberof_attribute: memberof
#
# # The group_search_type defines how group membership will be determined for a user. It can be one of:
# # directory_search - each group entry has the attribute whose values refer to that group's members;
# # the search is performed by finding those groups whose member is the given user;
# # for that purpose, group_search_base and group_search_filter are used;
# #
# # memberof_search - each entry has the attribute (indicated by user_memberof_attribute parameter) whose
# # values refer to the groups that entry is a member of; that is, the membership information
# # is attached directly to the user entry;
# #
# # Note that group_search_base and group_search_filter are also used to find the group entry by its name; we need
# # that to discover group hierarchy - when we have a role, which is mapped to some LDAP group, and we need to find
# # all roles that role belongs to, we need to find that role first - if that role was a user, we would use
# # user_search_base and user_search_filter; when it is a group, we use group_search_base and group_search_filter;
# # Conclusion is that in case of memberof_search, group_search_filter must be able to find a group by that group's
# # name, and in case of directory_search, group_search_filter must be able to find a group by that group's name or
# # any of that group's members DN, for example: (|(member={0})(cn={0}))
# group_search_type: directory_search
# group_search_base:
# extra_group_search_bases: []
# group_search_filter: (uniquemember={0})
#
# # The attribute in the group entry that holds the group name.
# group_name_attribute: cn
#
# # The following options are for optimized searching all parent groups, including inherited ones if the LDAP server
# # supports such queries. Such a query can retrieve all parent groups with a single request with no need to
# # traverse group hierarchy level by level;
# # Similarly to group_search_type, we can use either memberof_search or directory_search, depending on the LDAP
# # server which is used, or leave it empty to disable this mechanism if LDAP server does not support it;
# # When we use memberof_search, we only need to define all_parent_groups_memberof_attribute, which is a special attribute
# # that includes information about all parent groups, including transitive ones, for example Oracle LDAP supports
# # this mechanism with attribute named isMemberOf
# # When we use directory_search, we only need to define all_parent_groups_search_filter, which should allow to find all
# # the groups the role belongs to; for example ActiveDirectory server supports so called
# # LDAP_MATCHING_RULE_IN_CHAIN search operator which can be defined as (member:1.2.840.113556.1.4.1941:={0})
# all_parent_groups_search_type:
# all_parent_groups_memberof_attribute: isMemberOf
# all_parent_groups_search_filter: (member:1.2.840.113556.1.4.1941:={0})
#
# # Validity period for the credentials cache in milli-seconds (remote bind is an expensive
# # operation). Defaults to 0, set to 0 to disable.
# credentials_validity_in_ms: 0
#
# # Validity period for the search cache in seconds. Defaults to 0, set to 0 to disable.
# search_validity_in_seconds: 0
#
# # Validity period for the groups cache in milliseconds. The groups cache holds ldap group entries
# # and is used as a source for the roles cache.
# # Defaults to the search cache validity period (in milliseconds) for backward compatibility.
# # Set to 0 to disable.
# groups_validity_in_ms: 0
#
# # Refresh interval for groups cache (if enabled). After this interval, cache entries become eligible
# # for refresh. Upon next access, an async reload is scheduled and the old value returned until it completes.
# # If groups_validity_in_ms is non-zero, then this must be also.
# # Defaults to the same value as groups_validity_in_ms.
# groups_update_interval_in_ms: 0
#
# # Connection pool settings - not that a separate connection pool will be created for each provided server address
# connection_pool:
# max_active: 8
# max_idle: 8
#
# # DNS service discovery configuration. By default, when it is disabled (it is disabled when fqdn is empty), LDAP
# # servers list is taken from server_host param. When fqdn is defined, we will try to retrieve LDAP servers from
# # the SRV records of the provided domain. In this case, the servers provided in server_host are used as a fallback
# # list of servers, when DNS is not available or returns no SRV records.
# dns_service_discovery:
# # fully qualified domain name to get the SRV records from; leave empty to disable DNS service discovery.
# # example: _ldap._tcp.example.com
# fqdn:
#
# # timeout in ms for querying DNS; it has be be between 0 and 1 hour equivalent
# lookup_timeout_ms: 5000
#
# # for how long the old results should be retained/cached (value between 0 and 10 days equivalent);
# # note that it is applicable only if automatic polling is enabled (see polling_interval_ms)
# retention_duration_ms: 600000
#
# # how often we should try to refresh the list of obtained servers, leave 0 to disable periodical refreshing
# # (value between 0 and 10 days equivalent). Note that if automatic polling is enabled, it does not make
# # sense to set retention_duration_ms to a value lower than the polling_interval_ms as our cached results
# # would be expired by the time we try to refresh the results.
# polling_interval_ms: 0
# To ensure that records with TTLs are purged from DSE Search indexes when they expire, DSE
# periodically checks all indexes for expired documents and deletes them. These settings
# control the scheduling and execution of those checks.
ttl_index_rebuild_options:
# By default, schedule a check every 300 seconds:
fixed_rate_period: 300
# The number of seconds to delay the first check to speed up startup time:
initial_delay: 20
# All documents determined to be expired are deleted from the index during each check, but
# to avoid memory pressure, their unique keys are retrieved and deletes issued in batches.
# This determines the maximum number of documents per batch:
max_docs_per_batch: 4096
# Maximum number of search indexes that can execute TTL cleanup concurrently:
thread_pool_size: 1
# DSE Search resource upload size limit in MB. A value of '0' disables resource uploading.
solr_resource_upload_limit_mb: 10
# Transport options for inter-node communication between DSE Search nodes.
shard_transport_options:
# The cumulative shard request timeout, in milliseconds, defines the internal timeout for all
# search queries to prevent long running queries. Default is 60000 (1 minute).
netty_client_request_timeout: 60000
# ---- DSE Search index encryption options
# solr_encryption_options:
# # Whether to allocate shared index decryption cache off JVM heap.
# # Default is off heap allocation (true).
# decryption_cache_offheap_allocation: true
# # The maximum size of shared DSE Search decryption cache, in MB.
# # Default is 256 MB.
# decryption_cache_size_in_mb: 256
# ---- DSE Search indexing settings
# # The maximum number of queued partitions during search index rebuilding. (This serves primarily
# # as a safeguard against excessive heap usage by the indexing queue.) If set lower than the
# # number of TPC threads, not all TPC threads can be actively indexing.
# #
# # Default: 1024
# back_pressure_threshold_per_core: 1024
#
# # The max time to wait for flushing of index updates during re-index.
# # Flushing should always complete successfully, in order to fully sync search indexes
# # with DSE data. DataStax recommends to always set at a reasonably high value.
# #
# # Default: 5 minutes
# flush_max_time_per_core: 5
#
# # The maximum time to wait for each search index to load on startup and create/reload search index operations.
# # Only change this advanced option if any exceptions happen during search index loading.
# #
# # Default: 5 minutes
# load_max_time_per_core: 5
#
# # Applies the configured Cassandra disk failure policy to index write failures.
# # Default is disabled (false).
# enable_index_disk_failure_policy: false
# # The directory to store search index data. Each DSE Search index is stored under
# # a solrconfig_data_dir/keyspace.table directory.
# # Default is a solr.data directory inside Cassandra data directory, or as specified
# # by the dse.solr.data.dir system property.
# solr_data_dir: /MyDir
# # The Lucene field cache has been deprecated. Instead set docValues="true" on the field
# # in the schema.xml file. After changing the schema, reload and reindex the search index.
# # Default: false
# solr_field_cache_enabled: false
# # Global Lucene RAM buffer usage thresholds (separate for heap and off-heap) at which DSE will force segment flush.
# # Setting this too low may induce a state of constant flushing during periods of ongoing write activity. For
# # NRT, these forced segment flushes will also de-schedule pending auto-soft commits to avoid potentially
# # flushing too many small segments.
# # Default: 1024
# ram_buffer_heap_space_in_mb: 1024
# # Default: 1024
# ram_buffer_offheap_space_in_mb: 1024
# ---- DSE Search CQL query options
# # Maximum time in milliseconds to wait for all rows
# # to be read from the database during CQL Solr queries.
# # Default is 10000 (10 seconds).
# cql_solr_query_row_timeout: 10000
##########################
# Global performance service options
# # Number of background threads used by the performance service under normal conditions.
# # Defaults to 4.
# performance_core_threads: 4
# # Maximum number of background threads used by the performance service.
# # Defaults to concurrent_writes specified in cassandra.yaml.
# performance_max_threads: 32
#
# # The number of queued tasks in the backlog when the number of performance_max_threads are busy (minimum 0).
# performance_queue_capacity: 32000
#
# # If the performance service requests more tasks than (performance_max_threads + performance_queue_capacity),
# # a dropped task warning will be issued. This warning indicates that collected statistics may not be up to date
# # because the server couldn't keep up under the current load.
#
# # You can disable some services, reconfigure some services, or increase the queue size.
##########################
# Performance service options
graph_events:
ttl_seconds: 600
# cql_slow_log_options:
# enabled: true
#
# # When t > 1, log queries taking longer than t milliseconds.
# # 0 <= t <= 1, log queries above t percentile
# threshold: 200.0
#
# # Initial number of queries before percentile filter becomes active
# minimum_samples: 100
#
# ttl_seconds: 259200
#
# # Keeps slow queries in-memory only and doesn't write data to the database.
# # WARNING - if this is set to 'false' then set threshold >= 2000, otherwise there will be a
# # high load on the database.
# skip_writing_to_db: true
#
# # The number of slow queries to keep in-memory
# num_slowest_queries: 5
cql_system_info_options:
enabled: false
refresh_rate_ms: 10000
resource_level_latency_tracking_options:
enabled: false
refresh_rate_ms: 10000
db_summary_stats_options:
enabled: false
refresh_rate_ms: 10000
cluster_summary_stats_options:
enabled: false
refresh_rate_ms: 10000
spark_cluster_info_options:
enabled: false
refresh_rate_ms: 10000
# ---- Spark application stats options
spark_application_info_options:
enabled: false
refresh_rate_ms: 10000
driver:
# enables or disables writing of the metrics collected at Spark Driver to Cassandra
sink: false
# enables or disables Spark Cassandra Connector metrics at Spark Driver
connectorSource: false
# enables or disables JVM heap and GC metrics at Spark Driver
jvmSource: false
# enables or disables application state metrics
stateSource: false
executor:
# enables or disables writing of the metrics collected at executors to Cassandra
sink: false
# enables or disables Spark Cassandra Connector metrics at executors
connectorSource: false
# enables or disables JVM heap and GC metrics at executors
jvmSource: false
# Table Histogram data tables options
histogram_data_options:
enabled: false
refresh_rate_ms: 10000
retention_count: 3
# User/Resource latency tracking settings
user_level_latency_tracking_options:
enabled: false
refresh_rate_ms: 10000
top_stats_limit: 100
quantiles: false
# ---- DSE Search Performance Objects
solr_slow_sub_query_log_options:
enabled: false
ttl_seconds: 604800
async_writers: 1
threshold_ms: 3000
solr_update_handler_metrics_options:
enabled: false
ttl_seconds: 604800
refresh_rate_ms: 60000
solr_request_handler_metrics_options:
enabled: false
ttl_seconds: 604800
refresh_rate_ms: 60000
solr_index_stats_options:
enabled: false
ttl_seconds: 604800
refresh_rate_ms: 60000
solr_cache_stats_options:
enabled: false
ttl_seconds: 604800
refresh_rate_ms: 60000
solr_latency_snapshot_options:
enabled: false
ttl_seconds: 604800
refresh_rate_ms: 60000
# Node health is a score-based representation of how fit a node is to handle queries. The score is a
# function of how long a node has been up and the rate of dropped mutations in the recent past.
node_health_options:
refresh_rate_ms: 60000
# The amount of continuous uptime required for the node to reach the maximum uptime score. If you
# are concerned with consistency during repair after a period of downtime, you may want to
# temporarily increase this time to the expected time it will take to complete repair.
#
# Default - 10800 seconds (3 hours)
uptime_ramp_up_period_seconds: 10800
# The time window in the past over which the rate of dropped mutations affects the node health score.
# Default - 30 minutes
dropped_mutation_window_minutes: 30
# If enabled (true), replica selection for distributed DSE Search queries takes node health into account
# when multiple candidates exist for a particular token range. Set to false to ignore
# node health when choosing replicas.
#
# Health-based routing allows us to make a trade-off between index consistency and query throughput. If
# the primary concern is query performance, it may make sense to set this to "false".
#
# Default is enabled (true).
enable_health_based_routing: true
# If enabled (true), DSE Search reindexing of bootstrapped data will happen asynchronously, and the node will join the ring straight
# after bootstrap.
#
# Default is disabled (false). The node will wait for reindexing of bootstrapped data to finish before joining the ring.
async_bootstrap_reindex: false
# Lease metrics. Enable these metrics to help monitor the performance of the lease subsystem.
# ttl_seconds controls how long the log of lease holder changes persists.
lease_metrics_options:
enabled: false
ttl_seconds: 604800
# The directory where system keys are kept.
#
# Keys used for SSTable encryption must be distributed to all nodes.
# DSE must be able to read and write to the directory.
#
# This directory should have 700 permissions and belong to the dse user.
system_key_directory: /etc/dse/conf
# If this is set to true, DSE requires the following config values to be encrypted:
# resources/cassandra/conf/cassandra.yaml:
# server_encryption_options.keystore_password
# server_encryption_options.truststore_password
# client_encryption_options.keystore_password
# client_encryption_options.truststore_password
# resources/dse/conf/dse.yaml:
# ldap_options.search_password
# ldap_options.truststore_password
#
# It's an error if the passwords aren't encrypted.
# Config values can be encrypted with "dsetool encryptconfigvalue"
config_encryption_active: false
# The name of the system key used to encrypt / decrypt passwords stored
# in configuration files.
#
# If config_encryption_active is true, it's an error if a valid key with
# this name isn't in the system key directory keyfiles, and KMIP managed
# keys can be created with "dsetool createsystemkey"
config_encryption_key_name: system_key
##########################
# Spark-related settings
# The length of a shared secret used to authenticate Spark components and encrypt the connections between them.
# Note that this is not the strength of the cipher used for encrypting connections.
spark_shared_secret_bit_length: 256
# Enables Spark security based on shared secret infrastructure. Enables mutual authentication between Spark master
# and worker nodes. If DSE authentication is enabled, spark security is forced to be enabled and this parameter is ignored.
spark_security_enabled: false
# Enables encryption between Spark master and worker nodes, except Web UI. The connection uses the
# Digest-MD5 SASL-based encryption mechanism. This option applies only if spark_security_enabled is true.
# If DSE authentication is enabled, spark security encryption is forced to be enabled and this parameter is ignored.
spark_security_encryption_enabled: false
# # How often Spark plugin should check for Spark Master / Spark Worker readiness to start. The value is
# # a time (in ms) between subsequent retries.
# spark_daemon_readiness_assertion_interval: 1000
#
# Legacy Resource Manager options
#
# Controls the physical resources that can be used by Spark applications on this node.
# cores_total is the number of cores and and memory_total is total system memory that you can assign to all executors
# that are run by the work pools on this node. The values can be absolute (exact number of cores) or the
# memory size (use metric suffixes like M for mega, and G for giga) or a fraction of physical cores reported by the OS,
# and fraction of available memory, where available memory is calculated as: total physical memory - DSE max heap size.
# cores_total and memory_total replace initial_spark_worker_resources option which was used in earlier DSE versions.
# The default 0.7 for cores and memory corresponds to the default value of initial_spark_worker_resources 0.7.
# DSE does not support setting Spark Worker cores and memory through environment variables SPARK_WORKER_CORES
# and SPARK_WORKER_MEMORY. cores_total and memory_total can be set from environment variables SPARK_WORKER_TOTAL_CORES and
# SPARK_WORKER_TOTAL_MEMORY
# resource_manager_options:
# worker_options:
# cores_total: 0.7
# memory_total: 0.6
#
# workpools:
# - name: alwayson_sql
# cores: 0.25
# memory: 0.25
# In DSE 5.1 and later: Communication between Spark applications and the resource manager are routed through
# the CQL native protocol. Enabling client encryption in cassandra.yaml will also enable encryption for
# the communication with the DSE Spark Master. To secure the communication between Spark Driver and Spark Executors,
# enable Spark authentication and encryption for that application.
# In contrast, mutual authentication and encryption of communication between DSE Spark Master and Workers are
# managed by spark_security_enabled and spark_security_encryption_enabled in dse.yaml.
# Spark UI options apply to Spark Master and Spark Worker UIs and to Spark daemon UIs in general. Spark UI options do NOT
# apply to user applications even if they run in cluster mode.
spark_ui_options:
# Valid values are:
# inherit - SSL settings are inherited from DSE client encryption options
# custom - SSL settings from encryption_options below
encryption: inherit
encryption_options:
enabled: false
keystore: resources/dse/conf/.ui-keystore
keystore_password: cassandra
# require_client_auth: false
# Set truststore and truststore_password if require_client_auth is true
# truststore: resources/dse/conf/.ui-truststore
# truststore_password: cassandra
# More advanced defaults:
# protocol: TLS
# algorithm: SunX509
#
# Set keystore_type for keystore, valid types can be JKS, JCEKS, PKCS12 or PKCS11
# for file based keystores prefer PKCS12
# keystore_type: JKS
#
# Set truststore_type for truststore, valid types can be JKS, JCEKS or PKCS12
# for file based truststores prefer PKCS12
# truststore_type: JKS
#
# cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
# Configure how the driver and executor processes are created and managed.
spark_process_runner:
# Valid options are: default, run_as
runner_type: default
# DSE uses sudo to run Spark application components (drivers and executors) as specific OS users.
# A set of predefined users, called slot users, is used for this purpose. All drivers and executors
# owned by some DSE user are run as some slot user x. Drivers and executors of any other DSE user
# use different slots.
# Setting up slots:
# 1. Create n users (n = number of slots), call them slot1, slot2, ..., slotn, with no login. Each user
# should have primary group the same as its name, so for example slot1:slot1, slot2:slot2, ...
# 2. Add DSE service user (the user who runs DSE server) to the slot user groups; the DSE service user must be
# in all slot user groups.
# 3. Modify the sudoers files so that:
# a) DSE service user can execute any command as any slot user without providing a password
# b) umask is overridden to 007 for those commands so that files created by sub-processes will not be accessible
# by anyone by default,
# For example, if we have two slot users slot1, slot2, and DSE service user dse, add these slot users to sudoers:
# Runas_Alias SLOTS = slot1, slot2
# Defaults>SLOTS umask=007
# Defaults>SLOTS umask_override
# dse ALL=(SLOTS) NOPASSWD: ALL
run_as_runner_options:
user_slots:
- slot1
- slot2
# AlwaysOn SQL options have dependence on workpool setting of resource_manager_options. Set workpool configuration if you
# enable alwayson_sql_options.
# alwayson_sql_options:
# # Set to true to enable the node for AlwaysOn SQL. Only an Analytics node
# # can be enabled as an AlwaysOn SQL node.
# enabled: false
#
# # AlwaysOn SQL Thrift port
# thrift_port: 10000
#
# # AlwaysOn SQL WebUI port
# web_ui_port: 9077
#
# # The waiting time to reserve the Thrift port if it's not available
# reserve_port_wait_time_ms: 100
#
# # The waiting time to check AlwaysOn SQL health status
# alwayson_sql_status_check_wait_time_ms: 500
#
# # The work pool name used by AlwaysOn SQL
# workpool: alwayson_sql
#
# # Location in DSEFS of the log files
# log_dsefs_dir: /spark/log/alwayson_sql
#
# # The role to use for internal communication by AlwaysOn SQL if authentication is enabled
# auth_user: alwayson_sql
#
# # The maximum number of errors that can occur during AlwaysOn SQL service runner thread
# # runs before stopping the service. A service stop requires a manual restart.
# runner_max_errors: 10
#
# # The interval in seconds to update heartbeat of AlwaysOn SQL. If heartbeat is not updated
# # for more than the period of three times of the interval, AlwaysOn SQL malfunctions.
# # AlwaysOn SQL automatically restarts.
# heartbeat_update_interval_seconds: 30
##########################
# DSE File System (DSEFS) options
# dsefs_options:
#
# # Whether to enable DSEFS on this node.
# # If not set, DSEFS is enabled only on the nodes that run a Spark workload.
# enabled: true
#
# # The keyspace where the DSEFS metadata is stored. Optionally configure multiple DSEFS file systems
# # within a cluster by specifying a different keyspace name for each datacenter.
# keyspace_name: dsefs
#
# # The local directory for storing the local node metadata, including the node identifier.
# # The amount of data stored is nominal, and does not require configuration for throughput, latency, or capacity.
# # This directory must not be shared by DSEFS nodes.
# work_dir: /var/lib/dsefs
#
# # The public port on which DSEFS listens for clients. The service on this port is bound to
# # native_transport address.
# public_port: 5598
#
# # Port for inter-node communication, must be not visible from outside of the cluster.
# # It is bound to listen address. Do not open this port to firewalls.
# private_port: 5599
#
# # Mandatory attribute to identify the set of directories. DataStax recommends segregating these data directories
# # on physical devices that are different from the devices that are used for the DSE database.
# # Using multiple directories on JBOD improves performance and capacity.
# data_directories:
# - dir: /var/lib/dsefs/data
#
# # The weighting factor for this location specifies how much data to place in this directory, relative to
# # other directories in the cluster. This soft constraint determines how DSEFS distributes the data.
# storage_weight: 1.0
#
# # Reserved space (in bytes) that is not going to be used for storing blocks
# min_free_space: 268435456
#
# # More advanced settings:
#
# # Wait time before the DSEFS server times out while waiting for services to bootstrap.
# service_startup_timeout_ms: 600000
#
# # Wait time before the DSEFS server times out while waiting for services to close.
# service_close_timeout_ms: 600000
#
# # Wait time that the DSEFS server waits during shutdown before closing all pending connections.
# server_close_timeout_ms: 2147483647 # Integer.MAX_VALUE
#
# # The maximum accepted size of a compression frame defined during file upload.
# compression_frame_max_size: 1048576
#
# # Maximum number of elements in a single DSEFS Server query cache. DSEFS reuses this value for every cache that
# # stores database query results.
# query_cache_size: 2048
#
# # The time to retain the DSEFS Server query cache element in cache. The cache element expires
# # when this time is exceeded.
# query_cache_expire_after_ms: 2000
#
# internode_authentication:
# # If enabled, the servers are obliged to authenticate all messages passed between them on private_port.
# # The authentication protocol is based on HMAC used with a pre-shared secret available only to DSE cluster
# # members (nodes).
# # The actual key is never passed between the nodes.
# # Typically there is no need to turn this authentication off and it doesn't incur any performance overhead.
# # Disabling internode authentication is not recommended, but may be used for debugging purposes
# # to issue internode requests manually with curl.
# # Limitations:
# # Beware that enabling internode authentication does not encrypt the internode traffic.
# # Only HTTP headers are protected with HMAC, so MITM attacks are still possible on the message data.
# # It is also possible to bypass the authentication if the DSE messaging subsystem was not
# # properly secured and the attacker could fake being a part of the DSE cluster in order to obtain
# # the secret key. If you need stronger security, please configure SSL.
# enabled: true
#
# # Algorithm used for key encryption:
# algorithm: HmacSHA256
#
# gossip_options:
# # The delay between gossip rounds
# round_delay_ms: 2000
#
# # How long to wait after registering the Location and reading back all other Locations from the database
# startup_delay_ms: 5000
#
# # How long to wait after announcing shutdown before shutting down the node
# shutdown_delay_ms: 10000
#
# rest_options:
# # How long RestClient is going to wait for a response corresponding to a given request
# request_timeout_ms: 330000
#
# # How long RestClient is going to wait for establishing a new connection
# connection_open_timeout_ms: 10000
#
# # How long RestClient is going to wait until all pending transfers are complete before closing
# client_close_timeout_ms: 60000
#
# # How long to wait for the server rest call to complete
# server_request_timeout_ms: 300000
#
# # Wait time, in milliseconds, before closing idle RestClient - server connection. 0 if disabled.
# # If RestClient does not close connection after this timeout, the server closes the connection after
# # 2 * idle_connection_timeout_ms milliseconds.
# idle_connection_timeout_ms: 60000
#
# # Wait time, in milliseconds, before closing idle internode connection. The internode connections are
# # mainly used to exchange data during replication. Do not set lower than the default value for heavily
# # utilized DSEFS clusters.
# internode_idle_connection_timeout_ms: 120000
#
# # Maximum number of connections to a given host per single CPU core. DSEFS keeps a connection pool for
# # each CPU core.
# core_max_concurrent_connections_per_host: 8
#
# transaction_options:
# # How long to allow a transaction to run before considering it for timing out and rollback
# transaction_timeout_ms: 60000
#
# # How long to wait before retrying a transaction aborted due to a conflict
# conflict_retry_delay_ms: 10
#
# # How many times the transaction is retried in case of a conflict before giving up
# conflict_retry_count: 40
#
# # How long to wait before retrying a failed transaction payload execution
# execution_retry_delay_ms: 1000
#
# # How many times to retry executing the payload before signaling the error to the application
# execution_retry_count: 3
#
# block_allocator_options:
# # The overflow_margin_mb and overflow_factor options control how much additional data can be placed
# # on the local (coordinator) before the local node overflows to the other nodes.
# # A local node is preferred for a new block allocation, if
# # used_size_on_the_local_node < average_used_size_per_node * overflow_factor + overflow_margin.
# # The trade-off is between data locality of writes and balancing the cluster.
# # To disable the preference for allocating blocks on the coordinator node, set these values to 0 MB and 1.0.
# overflow_margin_mb: 1024
# overflow_factor: 1.05
# Insightful Monitoring(Insights) Options
# enable insights_options.
# insights_options:
# # Directory to store insights
# data_dir: /var/lib/cassandra/insights_data
#
# # Directory to store insight logs
# log_dir: /var/log/cassandra/
##########################
# Audit logging options
audit_logging_options:
enabled: false
# The logger used for logging audit information
# Available loggers are:
# CassandraAuditWriter - logs audit info to a cassandra table. This logger can be run synchronously or
# asynchronously. Audit logs are stored in the dse_audit.audit_log table.
# When run synchronously, a query will not execute until it has been written
# to the audit log table successfully. If a failure occurs before an audit event is
# written, and it's query is executed, the audit logs might contain queries that were never
# executed.
# SLF4JAuditWriter - logs audit info to an SLF4J logger. The logger name is `SLF4JAuditWriter`,
# and can be configured in the logback.xml file.
logger: SLF4JAuditWriter
# # Comma-separated list of audit event categories to be included or excluded from the audit log.
# # When not set, the default includes all categories.
# # Categories are: QUERY, DML, DDL, DCL, AUTH, ADMIN, ERROR.
# # Specify either included or excluded categories. Specifying both is an error.
# included_categories:
# excluded_categories:
# # Comma-separated list of keyspaces to be included or excluded from the audit log.
# # When not set, the default includes all keyspaces.
# # Specify either included or excluded keyspaces. Specifying both is an error.
# included_keyspaces:
# excluded_keyspaces:
# # Comma separated list of the roles to be audited or not.
# # Specify either included or excluded roles. Specifying both is an error
# included_roles:
# excluded_roles:
# The amount of time, in hours, audit events are retained by supporting loggers.
# Only the CassandraAuditWriter supports retention time.
# Values of 0 or less retain events forever.
retention_time: 0
# # Whether to render bound variables as CQL literals
# # - when disabled, primitive values are printed as they are, collections, tuples and UDT values are
# # printed as hexadecimal sequences
# # - when enabled (default since 6.8.2), all values are printed in the form as they would be provided in a CQL statement, including
# # quotation marks and escaping (in this case, the content of collections, tuples and UDT values is in human
# # readable form)
# render_cql_literals: true
cassandra_audit_writer_options:
# Sets the mode the audit writer runs in.
#
# When run synchronously, a query is not executed until the audit event is successfully written.
#
# When run asynchronously, audit events are queued for writing to the audit table, but are
# not necessarily logged before the query executes. A pool of writer threads consumes the
# audit events from the queue, and writes them to the audit table in batch queries. While
# this substantially improves performance under load, if there is a failure between when
# a query is executed, and it's audit event is written to the table, the audit table may
# be missing entries for queries that were executed.
# valid options are 'sync' and 'async'
mode: sync
# The maximum number of events the writer will dequeue before writing them out to the table.
# If you're seeing warnings in your logs about batches being too large, decrease this value.
# Increasing guardrails.batch_size_warn_threshold_in_kb in cassandra.yaml is also an option, but make sure you understand
# the implications before doing so.
#
# Only used in async mode. Must be >0
batch_size: 50
# The maximum amount of time in milliseconds an event will be dequeued by a writer before being written out. This
# prevents events from waiting too long before being written to the table when there's not a lot of queries happening.
#
# Only used in async mode. Must be >0
flush_time: 250
# The size of the queue feeding the asynchronous audit log writer threads. When there are more events being
# produced than the writers can write out, the queue will fill up, and newer queries will block until there
# is space on the queue.
# If a value of 0 is used, the queue size will be unbounded, which can lead to resource exhaustion under
# heavy query load.
queue_size: 30000
# the consistency level used to write audit events
write_consistency: QUORUM
# # Where dropped events are logged
# dropped_event_log: /var/log/cassandra/dropped_audit_events.log
# # Partition days into hours by default
# day_partition_millis: 3600000
##########################
# System information encryption settings
#
# If enabled, system tables that might contain sensitive information (system.batchlog,
# system.paxos), hints files, and Cassandra commit logs are encrypted with these
# encryption settings.
#
# If DSE Search index encryption is enabled, DSE Search index files are also encrypted with these settings.
# If backing C* table encryption is enabled, DSE Search commit log is encrypted with these settings.
#
# When enabling system table encryption on a node with existing data, run
# `nodetool upgradesstables -a` on the listed tables to encrypt existing data.
#
# When tracing is enabled, sensitive information is written to the tables in the
# system_traces keyspace. Configure encryption on the tables to encrypt their data
# on disk by using an encrypting compressor.
#
# DataStax recommends using remote encryption keys from a KMIP server when using Transparent Data Encryption (TDE) features.
# Local key support is provided when a KMIP server is not available.
system_info_encryption:
enabled: false
cipher_algorithm: AES
secret_key_strength: 128
chunk_length_kb: 64
# # The encryptor will use a KMIP key server to manage its encryption keys. Specify only to use a KMIP key server,
# # otherwise omit this entry. The default is to use local key encryption.
# key_provider: KmipKeyProviderFactory
# # If KmipKeyProviderFactory is used for system_info_encryption, this specifies the kmip host to be used.
# kmip_host: kmip_host_name
##########################
# KMIP hosts options
#
# Connection settings for key servers supporting the KMIP protocol
# allow DSE encryption features to use encryption and decryption keys that are not stored
# on the same machine running DSE.
#
# Hosts are configured as <kmip_host_name>: {connection_settings}, which maps a user-defined
# name to a set of KMIP hosts and KMIP-defined credentials (keystores and truststores) that are used with a particular
# key server. This name is then used when referring to KMIP hosts. DSE supports multiple KMIP hosts.
# kmip_hosts:
# # The unique name of this KMIP host/cluster which is specified in the table schema.
# host.yourdomain.com:
#
# # Comma-separated list of KMIP hosts host[:port]
# # The current implementation of KMIP connection management supports only failover, so all requests will
# # go through a single KMIP server. There is no load balancing. This is because there aren't many known KMIP servers
# # that support read replication, or other strategies for availability.
# #
# # Hosts are tried in the order they appear, so add KMIP hosts in the intended failover sequence.
# hosts: kmip1.yourdomain.com, kmip2.yourdomain.com
#
# # keystore/truststore info
# keystore_path: /path/to/keystore.jks
# keystore_type: jks
# keystore_password: password
#
# truststore_path: /path/to/truststore.jks,
# truststore_type: jks
# truststore_password: password
#
# # Keys read from the KMIP hosts are cached locally for the period of time specified below.
# # The longer keys are cached, the fewer requests are made to the key server, but the longer
# # it takes for changes (ie: revocation) to propagate to the DSE node.
# key_cache_millis: 300000
#
# # Refresh interval for the KMIP host key cache. After this interval, cache entries become eligible
# # for refresh. Upon next access, an async reload is scheduled and the old value returned until it completes.
# # If key_cache_millis is non-zero, then this must be also.
# # Defaults to the same value as key_cache_millis.
# key_cache_update_millis: 300000
#
# # Socket timeout in milliseconds.
# timeout: 1000
# # driver - DSE Search will use Solr cursor paging (deep paging) when pagination is enabled by the CQL driver.
# #
# # off - DSE Search will ignore the driver's pagination settings and use normal Solr paging unless:
# # - The current workload is an analytics workload (ex. SearchAnalytics).
# # - The query parameter 'paging' is set to