@datastax/astra-mongoose

# DSE Config Version: 6.9.13 # Memory limit for DSE In-Memory tables as a fraction of system memory. When not set, # the default is 0.2 (20% of system memory). # Specify max_memory_to_lock_fraction or max_memory_to_lock_mb, not both. # max_memory_to_lock_fraction: 0.20 # Memory limit for DSE In-Memory tables as a maximum in MB. When not set, # max_memory_to_lock_fraction is used. The max_memory_to_lock_fraction # value is ignored if max_memory_to_lock_mb is set to a non-zero value. # Specify max_memory_to_lock_fraction or max_memory_to_lock_mb, not both. # max_memory_to_lock_mb: 10240 ########################## # Authentication options # # These options are used if the authenticator option in cassandra.yaml is set to # com.datastax.bdp.cassandra.auth.DseAuthenticator # # The enabled option controls whether the DseAuthenticator will authenticate users. If # set to true users will be authenticated, if set to false they will not. # When not set enabled is false. # # DseAuthenticator allows multiple authentication schemes to be used at the same time. # The schemes to be used are controlled by the default_scheme and other_schemes options. # A driver can select the scheme to use during authentication. # # The default_scheme option selects which authentication scheme will be used if the driver # does not request a specific scheme. This can be one of the following values: # internal - plain text authentication using the internal password authenticator # ldap - plain text authentication using the passthrough LDAP authenticator # kerberos - GSSAPI authentication using the Kerberos authenticator # The other_schemes option is a list of schemes that can also be selected for use by a # driver and can be a list of the above schemes. # # The scheme_permissions option controls whether roles need to have permission granted to # them in order to use specific authentication schemes. These permissions can be granted # only when the DseAuthorizer is used. # # The allow_digest_with_kerberos option controls whether Digest-MD5 authentication is also # allowed when Kerberos is one of the authentication schemes. If set to false, it will not # be allowed. You must set allow_digest_with_kerberos to true in analytics clusters to use Hadoop # inter-node authentication with Hadoop and Spark jobs. # # The plain_text_without_ssl controls how the DseAuthenticator reacts to plain text # authentication requests over unencrypted client connections. It can be one of: # block - block the request with an authentication error # warn - log a warning about the request but allow it to continue # allow - allow the request without any warning # # The transitional_mode option allows the DseAuthenticator to operate in a transitional # mode during setup of authentication in a cluster. This can be one of the following values: # disabled - transitional mode is disabled # permissive - Only super users are authenticated and logged in, all other # authentication attempts will be logged in as the anonymous user # normal - If credentials are passed they are authenticated. If the # authentication is successful then the user is logged in, otherwise # the user is logged in as anonymous. If no credentials are passed, # then the user is logged in as anonymous # strict - If credentials are passed they are authenticated. If the # authentication is successful, the user is logged in. If the # authentication fails, an authentication error is returned. If no # credentials are passed, the user is logged in as anonymous authentication_options: enabled: true default_scheme: internal other_schemes: scheme_permissions: false allow_digest_with_kerberos: true plain_text_without_ssl: warn transitional_mode: disabled ########################## # Role management options # # These options are used when the role_manager option in cassandra.yaml is set to # com.datastax.bdp.cassandra.auth.DseRoleManager # # mode can be one of: # internal - the granting and revoking of roles is managed internally # using the GRANT ROLE and REVOKE ROLE statements # ldap - the granting and revoking of roles is managed by an external # LDAP server configured using the ldap_options. # # stats is a boolean option (false by default). If set to true, the DSE server will keep # track of role creation and password change timestamps in the dse_security.role_stats table. # This data will be updated based newly executed CREATE / ALTER / DROP role CQL statements. # # mode_by_authentication allows and overrides the defined mode based on the authentication scheme. # The allow values are either internal or ldap. If not set, the defined mode will be used. # # role_management_options: # mode: internal # stats: false # mode_by_authentication: # internal: # ldap: # kerberos: ########################## # Authorization options # # These options are used if the authorization option in cassandra.yaml is set to # com.datastax.bdp.cassandra.auth.DseAuthorizer # # The enabled option controls whether the DseAuthorizer will perform authorization. If # set to true authorization is performed, if set to false it is not. # When not set, enabled is false. # # The transitional_mode option allows the DseAuthorizer to operate in a transitional # mode during setup of authorization in a cluster. This can be one of the following values: # disabled - transitional mode is disabled, all connections must provide valid credentials and # map to a login-enabled role # normal - allow all connections that provide credentials, permissions can be granted to # resources but are not enforced # strict - permissions can be granted to resources and are enforced on # authenticated users. They are not enforced against anonymous # users # # allow_row_level_security - To use row level security, set to true for the entire system. # Use the same setting on all nodes. # authorization_options: # enabled: false # transitional_mode: disabled # allow_row_level_security: false ########################## # Kerberos options # # keytab is <path_to_keytab>/dse.keytab # The keytab file must contain the credentials for both of the fully resolved principal names, which # replace _HOST with the fully qualified domain name (FQDN) of the host in the service_principal and # http_principal settings. The UNIX user running DSE must also have read permissions on the keytab. # # The service_principal is the DataStax Enterprise process runs under must use the form # <dse_user>/_HOST@<REALM> # # The http_principal is used by the Tomcat application container to run DSE Search. # # The qop is the Quality of Protection (QOP) values that clients and servers # can use for each connection. Valid values are: # auth - (default) authentication only # auth-int - authentication plus integrity protection of all transmitted data # auth-conf - authentication plus integrity protection and encryption of all # transmitted data # # Warning - Encryption using auth-conf is separate and completely independent # of whether encryption is done using SSL. If auth-conf is selected here # and SSL is enabled, the transmitted data is encrypted twice. kerberos_options: keytab: resources/dse/conf/dse.keytab service_principal: dse/_HOST@REALM http_principal: HTTP/_HOST@REALM qop: auth ########################## # LDAP options # # These are options are only used when the com.datastax.bdp.cassandra.auth.DseAuthenticator # is configured as the authenticator in cassandra.yaml and 'ldap' scheme is selected in # authentication_options and/or role_management_options above. # ldap_options: # # LDAP server address or comma separated list of alternative addresses. Each address may include port after a colon. # # If port is not defined per single address, server_port will be used by default. All servers are equivalent and # # the one which is chosen for a certain request depends on the current state (recent failures, number of idle # # connections is the pool). # server_host: # # # The port on which the LDAP server listens, usually port 389 for unencrypted # # connections and port 636 for SSL-encrypted connections. If use_tls is set to true, use the # # unencrypted port # server_port: 389 # # # The distinguished name (DN) of an account that is used to search for other users on the # # LDAP server. This user should have only the necessary permissions to do the search # # If not present then an anonymous bind is used for the search # search_dn: # # # Password of the search_dn account # search_password: # # # Set to true to use an SSL encrypted connection. In this case the server_port needs # # to be set to the LDAP port for the server # use_ssl: false # # # Set to true to initiate a TLS encrypted connection on the default ldap port # use_tls: false # # truststore_path: # truststore_password: # truststore_type: jks # # ssl_protocol: TLS # # user_search_base: # extra_user_search_bases: [] # user_search_filter: (uid={0}) # # # Set to true to enable hostname verification. This is only effective if one of use_ssl or use_tls is true # # and truststore_path has a valid trust store. # hostname_verification: false # # # Set to the attribute on the user entry containing group membership information. # user_memberof_attribute: memberof # # # The group_search_type defines how group membership will be determined for a user. It can be one of: # # directory_search - each group entry has the attribute whose values refer to that group's members; # # the search is performed by finding those groups whose member is the given user; # # for that purpose, group_search_base and group_search_filter are used; # # # # memberof_search - each entry has the attribute (indicated by user_memberof_attribute parameter) whose # # values refer to the groups that entry is a member of; that is, the membership information # # is attached directly to the user entry; # # # # Note that group_search_base and group_search_filter are also used to find the group entry by its name; we need # # that to discover group hierarchy - when we have a role, which is mapped to some LDAP group, and we need to find # # all roles that role belongs to, we need to find that role first - if that role was a user, we would use # # user_search_base and user_search_filter; when it is a group, we use group_search_base and group_search_filter; # # Conclusion is that in case of memberof_search, group_search_filter must be able to find a group by that group's # # name, and in case of directory_search, group_search_filter must be able to find a group by that group's name or # # any of that group's members DN, for example: (|(member={0})(cn={0})) # group_search_type: directory_search # group_search_base: # extra_group_search_bases: [] # group_search_filter: (uniquemember={0}) # # # The attribute in the group entry that holds the group name. # group_name_attribute: cn # # # The following options are for optimized searching all parent groups, including inherited ones if the LDAP server # # supports such queries. Such a query can retrieve all parent groups with a single request with no need to # # traverse group hierarchy level by level; # # Similarly to group_search_type, we can use either memberof_search or directory_search, depending on the LDAP # # server which is used, or leave it empty to disable this mechanism if LDAP server does not support it; # # When we use memberof_search, we only need to define all_parent_groups_memberof_attribute, which is a special attribute # # that includes information about all parent groups, including transitive ones, for example Oracle LDAP supports # # this mechanism with attribute named isMemberOf # # When we use directory_search, we only need to define all_parent_groups_search_filter, which should allow to find all # # the groups the role belongs to; for example ActiveDirectory server supports so called # # LDAP_MATCHING_RULE_IN_CHAIN search operator which can be defined as (member:1.2.840.113556.1.4.1941:={0}) # all_parent_groups_search_type: # all_parent_groups_memberof_attribute: isMemberOf # all_parent_groups_search_filter: (member:1.2.840.113556.1.4.1941:={0}) # # # Validity period for the credentials cache in milli-seconds (remote bind is an expensive # # operation). Defaults to 0, set to 0 to disable. # credentials_validity_in_ms: 0 # # # Validity period for the search cache in seconds. Defaults to 0, set to 0 to disable. # search_validity_in_seconds: 0 # # # Validity period for the groups cache in milliseconds. The groups cache holds ldap group entries # # and is used as a source for the roles cache. # # Defaults to the search cache validity period (in milliseconds) for backward compatibility. # # Set to 0 to disable. # groups_validity_in_ms: 0 # # # Refresh interval for groups cache (if enabled). After this interval, cache entries become eligible # # for refresh. Upon next access, an async reload is scheduled and the old value returned until it completes. # # If groups_validity_in_ms is non-zero, then this must be also. # # Defaults to the same value as groups_validity_in_ms. # groups_update_interval_in_ms: 0 # # # Connection pool settings - not that a separate connection pool will be created for each provided server address # connection_pool: # max_active: 8 # max_idle: 8 # # # DNS service discovery configuration. By default, when it is disabled (it is disabled when fqdn is empty), LDAP # # servers list is taken from server_host param. When fqdn is defined, we will try to retrieve LDAP servers from # # the SRV records of the provided domain. In this case, the servers provided in server_host are used as a fallback # # list of servers, when DNS is not available or returns no SRV records. # dns_service_discovery: # # fully qualified domain name to get the SRV records from; leave empty to disable DNS service discovery. # # example: _ldap._tcp.example.com # fqdn: # # # timeout in ms for querying DNS; it has be be between 0 and 1 hour equivalent # lookup_timeout_ms: 5000 # # # for how long the old results should be retained/cached (value between 0 and 10 days equivalent); # # note that it is applicable only if automatic polling is enabled (see polling_interval_ms) # retention_duration_ms: 600000 # # # how often we should try to refresh the list of obtained servers, leave 0 to disable periodical refreshing # # (value between 0 and 10 days equivalent). Note that if automatic polling is enabled, it does not make # # sense to set retention_duration_ms to a value lower than the polling_interval_ms as our cached results # # would be expired by the time we try to refresh the results. # polling_interval_ms: 0 # To ensure that records with TTLs are purged from DSE Search indexes when they expire, DSE # periodically checks all indexes for expired documents and deletes them. These settings # control the scheduling and execution of those checks. ttl_index_rebuild_options: # By default, schedule a check every 300 seconds: fixed_rate_period: 300 # The number of seconds to delay the first check to speed up startup time: initial_delay: 20 # All documents determined to be expired are deleted from the index during each check, but # to avoid memory pressure, their unique keys are retrieved and deletes issued in batches. # This determines the maximum number of documents per batch: max_docs_per_batch: 4096 # Maximum number of search indexes that can execute TTL cleanup concurrently: thread_pool_size: 1 # DSE Search resource upload size limit in MB. A value of '0' disables resource uploading. solr_resource_upload_limit_mb: 10 # Transport options for inter-node communication between DSE Search nodes. shard_transport_options: # The cumulative shard request timeout, in milliseconds, defines the internal timeout for all # search queries to prevent long running queries. Default is 60000 (1 minute). netty_client_request_timeout: 60000 # ---- DSE Search index encryption options # solr_encryption_options: # # Whether to allocate shared index decryption cache off JVM heap. # # Default is off heap allocation (true). # decryption_cache_offheap_allocation: true # # The maximum size of shared DSE Search decryption cache, in MB. # # Default is 256 MB. # decryption_cache_size_in_mb: 256 # ---- DSE Search indexing settings # # The maximum number of queued partitions during search index rebuilding. (This serves primarily # # as a safeguard against excessive heap usage by the indexing queue.) If set lower than the # # number of TPC threads, not all TPC threads can be actively indexing. # # # # Default: 1024 # back_pressure_threshold_per_core: 1024 # # # The max time to wait for flushing of index updates during re-index. # # Flushing should always complete successfully, in order to fully sync search indexes # # with DSE data. DataStax recommends to always set at a reasonably high value. # # # # Default: 5 minutes # flush_max_time_per_core: 5 # # # The maximum time to wait for each search index to load on startup and create/reload search index operations. # # Only change this advanced option if any exceptions happen during search index loading. # # # # Default: 5 minutes # load_max_time_per_core: 5 # # # Applies the configured Cassandra disk failure policy to index write failures. # # Default is disabled (false). # enable_index_disk_failure_policy: false # # The directory to store search index data. Each DSE Search index is stored under # # a solrconfig_data_dir/keyspace.table directory. # # Default is a solr.data directory inside Cassandra data directory, or as specified # # by the dse.solr.data.dir system property. # solr_data_dir: /MyDir # # The Lucene field cache has been deprecated. Instead set docValues="true" on the field # # in the schema.xml file. After changing the schema, reload and reindex the search index. # # Default: false # solr_field_cache_enabled: false # # Global Lucene RAM buffer usage thresholds (separate for heap and off-heap) at which DSE will force segment flush. # # Setting this too low may induce a state of constant flushing during periods of ongoing write activity. For # # NRT, these forced segment flushes will also de-schedule pending auto-soft commits to avoid potentially # # flushing too many small segments. # # Default: 1024 # ram_buffer_heap_space_in_mb: 1024 # # Default: 1024 # ram_buffer_offheap_space_in_mb: 1024 # ---- DSE Search CQL query options # # Maximum time in milliseconds to wait for all rows # # to be read from the database during CQL Solr queries. # # Default is 10000 (10 seconds). # cql_solr_query_row_timeout: 10000 ########################## # Global performance service options # # Number of background threads used by the performance service under normal conditions. # # Defaults to 4. # performance_core_threads: 4 # # Maximum number of background threads used by the performance service. # # Defaults to concurrent_writes specified in cassandra.yaml. # performance_max_threads: 32 # # # The number of queued tasks in the backlog when the number of performance_max_threads are busy (minimum 0). # performance_queue_capacity: 32000 # # # If the performance service requests more tasks than (performance_max_threads + performance_queue_capacity), # # a dropped task warning will be issued. This warning indicates that collected statistics may not be up to date # # because the server couldn't keep up under the current load. # # # You can disable some services, reconfigure some services, or increase the queue size. ########################## # Performance service options graph_events: ttl_seconds: 600 # cql_slow_log_options: # enabled: true # # # When t > 1, log queries taking longer than t milliseconds. # # 0 <= t <= 1, log queries above t percentile # threshold: 200.0 # # # Initial number of queries before percentile filter becomes active # minimum_samples: 100 # # ttl_seconds: 259200 # # # Keeps slow queries in-memory only and doesn't write data to the database. # # WARNING - if this is set to 'false' then set threshold >= 2000, otherwise there will be a # # high load on the database. # skip_writing_to_db: true # # # The number of slow queries to keep in-memory # num_slowest_queries: 5 cql_system_info_options: enabled: false refresh_rate_ms: 10000 resource_level_latency_tracking_options: enabled: false refresh_rate_ms: 10000 db_summary_stats_options: enabled: false refresh_rate_ms: 10000 cluster_summary_stats_options: enabled: false refresh_rate_ms: 10000 spark_cluster_info_options: enabled: false refresh_rate_ms: 10000 # ---- Spark application stats options spark_application_info_options: enabled: false refresh_rate_ms: 10000 driver: # enables or disables writing of the metrics collected at Spark Driver to Cassandra sink: false # enables or disables Spark Cassandra Connector metrics at Spark Driver connectorSource: false # enables or disables JVM heap and GC metrics at Spark Driver jvmSource: false # enables or disables application state metrics stateSource: false executor: # enables or disables writing of the metrics collected at executors to Cassandra sink: false # enables or disables Spark Cassandra Connector metrics at executors connectorSource: false # enables or disables JVM heap and GC metrics at executors jvmSource: false # Table Histogram data tables options histogram_data_options: enabled: false refresh_rate_ms: 10000 retention_count: 3 # User/Resource latency tracking settings user_level_latency_tracking_options: enabled: false refresh_rate_ms: 10000 top_stats_limit: 100 quantiles: false # ---- DSE Search Performance Objects solr_slow_sub_query_log_options: enabled: false ttl_seconds: 604800 async_writers: 1 threshold_ms: 3000 solr_update_handler_metrics_options: enabled: false ttl_seconds: 604800 refresh_rate_ms: 60000 solr_request_handler_metrics_options: enabled: false ttl_seconds: 604800 refresh_rate_ms: 60000 solr_index_stats_options: enabled: false ttl_seconds: 604800 refresh_rate_ms: 60000 solr_cache_stats_options: enabled: false ttl_seconds: 604800 refresh_rate_ms: 60000 solr_latency_snapshot_options: enabled: false ttl_seconds: 604800 refresh_rate_ms: 60000 # Node health is a score-based representation of how fit a node is to handle queries. The score is a # function of how long a node has been up and the rate of dropped mutations in the recent past. node_health_options: refresh_rate_ms: 60000 # The amount of continuous uptime required for the node to reach the maximum uptime score. If you # are concerned with consistency during repair after a period of downtime, you may want to # temporarily increase this time to the expected time it will take to complete repair. # # Default - 10800 seconds (3 hours) uptime_ramp_up_period_seconds: 10800 # The time window in the past over which the rate of dropped mutations affects the node health score. # Default - 30 minutes dropped_mutation_window_minutes: 30 # If enabled (true), replica selection for distributed DSE Search queries takes node health into account # when multiple candidates exist for a particular token range. Set to false to ignore # node health when choosing replicas. # # Health-based routing allows us to make a trade-off between index consistency and query throughput. If # the primary concern is query performance, it may make sense to set this to "false". # # Default is enabled (true). enable_health_based_routing: true # If enabled (true), DSE Search reindexing of bootstrapped data will happen asynchronously, and the node will join the ring straight # after bootstrap. # # Default is disabled (false). The node will wait for reindexing of bootstrapped data to finish before joining the ring. async_bootstrap_reindex: false # Lease metrics. Enable these metrics to help monitor the performance of the lease subsystem. # ttl_seconds controls how long the log of lease holder changes persists. lease_metrics_options: enabled: false ttl_seconds: 604800 # The directory where system keys are kept. # # Keys used for SSTable encryption must be distributed to all nodes. # DSE must be able to read and write to the directory. # # This directory should have 700 permissions and belong to the dse user. system_key_directory: /etc/dse/conf # If this is set to true, DSE requires the following config values to be encrypted: # resources/cassandra/conf/cassandra.yaml: # server_encryption_options.keystore_password # server_encryption_options.truststore_password # client_encryption_options.keystore_password # client_encryption_options.truststore_password # resources/dse/conf/dse.yaml: # ldap_options.search_password # ldap_options.truststore_password # # It's an error if the passwords aren't encrypted. # Config values can be encrypted with "dsetool encryptconfigvalue" config_encryption_active: false # The name of the system key used to encrypt / decrypt passwords stored # in configuration files. # # If config_encryption_active is true, it's an error if a valid key with # this name isn't in the system key directory keyfiles, and KMIP managed # keys can be created with "dsetool createsystemkey" config_encryption_key_name: system_key ########################## # Spark-related settings # The length of a shared secret used to authenticate Spark components and encrypt the connections between them. # Note that this is not the strength of the cipher used for encrypting connections. spark_shared_secret_bit_length: 256 # Enables Spark security based on shared secret infrastructure. Enables mutual authentication between Spark master # and worker nodes. If DSE authentication is enabled, spark security is forced to be enabled and this parameter is ignored. spark_security_enabled: false # Enables encryption between Spark master and worker nodes, except Web UI. The connection uses the # Digest-MD5 SASL-based encryption mechanism. This option applies only if spark_security_enabled is true. # If DSE authentication is enabled, spark security encryption is forced to be enabled and this parameter is ignored. spark_security_encryption_enabled: false # # How often Spark plugin should check for Spark Master / Spark Worker readiness to start. The value is # # a time (in ms) between subsequent retries. # spark_daemon_readiness_assertion_interval: 1000 # # Legacy Resource Manager options # # Controls the physical resources that can be used by Spark applications on this node. # cores_total is the number of cores and and memory_total is total system memory that you can assign to all executors # that are run by the work pools on this node. The values can be absolute (exact number of cores) or the # memory size (use metric suffixes like M for mega, and G for giga) or a fraction of physical cores reported by the OS, # and fraction of available memory, where available memory is calculated as: total physical memory - DSE max heap size. # cores_total and memory_total replace initial_spark_worker_resources option which was used in earlier DSE versions. # The default 0.7 for cores and memory corresponds to the default value of initial_spark_worker_resources 0.7. # DSE does not support setting Spark Worker cores and memory through environment variables SPARK_WORKER_CORES # and SPARK_WORKER_MEMORY. cores_total and memory_total can be set from environment variables SPARK_WORKER_TOTAL_CORES and # SPARK_WORKER_TOTAL_MEMORY # resource_manager_options: # worker_options: # cores_total: 0.7 # memory_total: 0.6 # # workpools: # - name: alwayson_sql # cores: 0.25 # memory: 0.25 # In DSE 5.1 and later: Communication between Spark applications and the resource manager are routed through # the CQL native protocol. Enabling client encryption in cassandra.yaml will also enable encryption for # the communication with the DSE Spark Master. To secure the communication between Spark Driver and Spark Executors, # enable Spark authentication and encryption for that application. # In contrast, mutual authentication and encryption of communication between DSE Spark Master and Workers are # managed by spark_security_enabled and spark_security_encryption_enabled in dse.yaml. # Spark UI options apply to Spark Master and Spark Worker UIs and to Spark daemon UIs in general. Spark UI options do NOT # apply to user applications even if they run in cluster mode. spark_ui_options: # Valid values are: # inherit - SSL settings are inherited from DSE client encryption options # custom - SSL settings from encryption_options below encryption: inherit encryption_options: enabled: false keystore: resources/dse/conf/.ui-keystore keystore_password: cassandra # require_client_auth: false # Set truststore and truststore_password if require_client_auth is true # truststore: resources/dse/conf/.ui-truststore # truststore_password: cassandra # More advanced defaults: # protocol: TLS # algorithm: SunX509 # # Set keystore_type for keystore, valid types can be JKS, JCEKS, PKCS12 or PKCS11 # for file based keystores prefer PKCS12 # keystore_type: JKS # # Set truststore_type for truststore, valid types can be JKS, JCEKS or PKCS12 # for file based truststores prefer PKCS12 # truststore_type: JKS # # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] # Configure how the driver and executor processes are created and managed. spark_process_runner: # Valid options are: default, run_as runner_type: default # DSE uses sudo to run Spark application components (drivers and executors) as specific OS users. # A set of predefined users, called slot users, is used for this purpose. All drivers and executors # owned by some DSE user are run as some slot user x. Drivers and executors of any other DSE user # use different slots. # Setting up slots: # 1. Create n users (n = number of slots), call them slot1, slot2, ..., slotn, with no login. Each user # should have primary group the same as its name, so for example slot1:slot1, slot2:slot2, ... # 2. Add DSE service user (the user who runs DSE server) to the slot user groups; the DSE service user must be # in all slot user groups. # 3. Modify the sudoers files so that: # a) DSE service user can execute any command as any slot user without providing a password # b) umask is overridden to 007 for those commands so that files created by sub-processes will not be accessible # by anyone by default, # For example, if we have two slot users slot1, slot2, and DSE service user dse, add these slot users to sudoers: # Runas_Alias SLOTS = slot1, slot2 # Defaults>SLOTS umask=007 # Defaults>SLOTS umask_override # dse ALL=(SLOTS) NOPASSWD: ALL run_as_runner_options: user_slots: - slot1 - slot2 # AlwaysOn SQL options have dependence on workpool setting of resource_manager_options. Set workpool configuration if you # enable alwayson_sql_options. # alwayson_sql_options: # # Set to true to enable the node for AlwaysOn SQL. Only an Analytics node # # can be enabled as an AlwaysOn SQL node. # enabled: false # # # AlwaysOn SQL Thrift port # thrift_port: 10000 # # # AlwaysOn SQL WebUI port # web_ui_port: 9077 # # # The waiting time to reserve the Thrift port if it's not available # reserve_port_wait_time_ms: 100 # # # The waiting time to check AlwaysOn SQL health status # alwayson_sql_status_check_wait_time_ms: 500 # # # The work pool name used by AlwaysOn SQL # workpool: alwayson_sql # # # Location in DSEFS of the log files # log_dsefs_dir: /spark/log/alwayson_sql # # # The role to use for internal communication by AlwaysOn SQL if authentication is enabled # auth_user: alwayson_sql # # # The maximum number of errors that can occur during AlwaysOn SQL service runner thread # # runs before stopping the service. A service stop requires a manual restart. # runner_max_errors: 10 # # # The interval in seconds to update heartbeat of AlwaysOn SQL. If heartbeat is not updated # # for more than the period of three times of the interval, AlwaysOn SQL malfunctions. # # AlwaysOn SQL automatically restarts. # heartbeat_update_interval_seconds: 30 ########################## # DSE File System (DSEFS) options # dsefs_options: # # # Whether to enable DSEFS on this node. # # If not set, DSEFS is enabled only on the nodes that run a Spark workload. # enabled: true # # # The keyspace where the DSEFS metadata is stored. Optionally configure multiple DSEFS file systems # # within a cluster by specifying a different keyspace name for each datacenter. # keyspace_name: dsefs # # # The local directory for storing the local node metadata, including the node identifier. # # The amount of data stored is nominal, and does not require configuration for throughput, latency, or capacity. # # This directory must not be shared by DSEFS nodes. # work_dir: /var/lib/dsefs # # # The public port on which DSEFS listens for clients. The service on this port is bound to # # native_transport address. # public_port: 5598 # # # Port for inter-node communication, must be not visible from outside of the cluster. # # It is bound to listen address. Do not open this port to firewalls. # private_port: 5599 # # # Mandatory attribute to identify the set of directories. DataStax recommends segregating these data directories # # on physical devices that are different from the devices that are used for the DSE database. # # Using multiple directories on JBOD improves performance and capacity. # data_directories: # - dir: /var/lib/dsefs/data # # # The weighting factor for this location specifies how much data to place in this directory, relative to # # other directories in the cluster. This soft constraint determines how DSEFS distributes the data. # storage_weight: 1.0 # # # Reserved space (in bytes) that is not going to be used for storing blocks # min_free_space: 268435456 # # # More advanced settings: # # # Wait time before the DSEFS server times out while waiting for services to bootstrap. # service_startup_timeout_ms: 600000 # # # Wait time before the DSEFS server times out while waiting for services to close. # service_close_timeout_ms: 600000 # # # Wait time that the DSEFS server waits during shutdown before closing all pending connections. # server_close_timeout_ms: 2147483647 # Integer.MAX_VALUE # # # The maximum accepted size of a compression frame defined during file upload. # compression_frame_max_size: 1048576 # # # Maximum number of elements in a single DSEFS Server query cache. DSEFS reuses this value for every cache that # # stores database query results. # query_cache_size: 2048 # # # The time to retain the DSEFS Server query cache element in cache. The cache element expires # # when this time is exceeded. # query_cache_expire_after_ms: 2000 # # internode_authentication: # # If enabled, the servers are obliged to authenticate all messages passed between them on private_port. # # The authentication protocol is based on HMAC used with a pre-shared secret available only to DSE cluster # # members (nodes). # # The actual key is never passed between the nodes. # # Typically there is no need to turn this authentication off and it doesn't incur any performance overhead. # # Disabling internode authentication is not recommended, but may be used for debugging purposes # # to issue internode requests manually with curl. # # Limitations: # # Beware that enabling internode authentication does not encrypt the internode traffic. # # Only HTTP headers are protected with HMAC, so MITM attacks are still possible on the message data. # # It is also possible to bypass the authentication if the DSE messaging subsystem was not # # properly secured and the attacker could fake being a part of the DSE cluster in order to obtain # # the secret key. If you need stronger security, please configure SSL. # enabled: true # # # Algorithm used for key encryption: # algorithm: HmacSHA256 # # gossip_options: # # The delay between gossip rounds # round_delay_ms: 2000 # # # How long to wait after registering the Location and reading back all other Locations from the database # startup_delay_ms: 5000 # # # How long to wait after announcing shutdown before shutting down the node # shutdown_delay_ms: 10000 # # rest_options: # # How long RestClient is going to wait for a response corresponding to a given request # request_timeout_ms: 330000 # # # How long RestClient is going to wait for establishing a new connection # connection_open_timeout_ms: 10000 # # # How long RestClient is going to wait until all pending transfers are complete before closing # client_close_timeout_ms: 60000 # # # How long to wait for the server rest call to complete # server_request_timeout_ms: 300000 # # # Wait time, in milliseconds, before closing idle RestClient - server connection. 0 if disabled. # # If RestClient does not close connection after this timeout, the server closes the connection after # # 2 * idle_connection_timeout_ms milliseconds. # idle_connection_timeout_ms: 60000 # # # Wait time, in milliseconds, before closing idle internode connection. The internode connections are # # mainly used to exchange data during replication. Do not set lower than the default value for heavily # # utilized DSEFS clusters. # internode_idle_connection_timeout_ms: 120000 # # # Maximum number of connections to a given host per single CPU core. DSEFS keeps a connection pool for # # each CPU core. # core_max_concurrent_connections_per_host: 8 # # transaction_options: # # How long to allow a transaction to run before considering it for timing out and rollback # transaction_timeout_ms: 60000 # # # How long to wait before retrying a transaction aborted due to a conflict # conflict_retry_delay_ms: 10 # # # How many times the transaction is retried in case of a conflict before giving up # conflict_retry_count: 40 # # # How long to wait before retrying a failed transaction payload execution # execution_retry_delay_ms: 1000 # # # How many times to retry executing the payload before signaling the error to the application # execution_retry_count: 3 # # block_allocator_options: # # The overflow_margin_mb and overflow_factor options control how much additional data can be placed # # on the local (coordinator) before the local node overflows to the other nodes. # # A local node is preferred for a new block allocation, if # # used_size_on_the_local_node < average_used_size_per_node * overflow_factor + overflow_margin. # # The trade-off is between data locality of writes and balancing the cluster. # # To disable the preference for allocating blocks on the coordinator node, set these values to 0 MB and 1.0. # overflow_margin_mb: 1024 # overflow_factor: 1.05 # Insightful Monitoring(Insights) Options # enable insights_options. # insights_options: # # Directory to store insights # data_dir: /var/lib/cassandra/insights_data # # # Directory to store insight logs # log_dir: /var/log/cassandra/ ########################## # Audit logging options audit_logging_options: enabled: false # The logger used for logging audit information # Available loggers are: # CassandraAuditWriter - logs audit info to a cassandra table. This logger can be run synchronously or # asynchronously. Audit logs are stored in the dse_audit.audit_log table. # When run synchronously, a query will not execute until it has been written # to the audit log table successfully. If a failure occurs before an audit event is # written, and it's query is executed, the audit logs might contain queries that were never # executed. # SLF4JAuditWriter - logs audit info to an SLF4J logger. The logger name is `SLF4JAuditWriter`, # and can be configured in the logback.xml file. logger: SLF4JAuditWriter # # Comma-separated list of audit event categories to be included or excluded from the audit log. # # When not set, the default includes all categories. # # Categories are: QUERY, DML, DDL, DCL, AUTH, ADMIN, ERROR. # # Specify either included or excluded categories. Specifying both is an error. # included_categories: # excluded_categories: # # Comma-separated list of keyspaces to be included or excluded from the audit log. # # When not set, the default includes all keyspaces. # # Specify either included or excluded keyspaces. Specifying both is an error. # included_keyspaces: # excluded_keyspaces: # # Comma separated list of the roles to be audited or not. # # Specify either included or excluded roles. Specifying both is an error # included_roles: # excluded_roles: # The amount of time, in hours, audit events are retained by supporting loggers. # Only the CassandraAuditWriter supports retention time. # Values of 0 or less retain events forever. retention_time: 0 # # Whether to render bound variables as CQL literals # # - when disabled, primitive values are printed as they are, collections, tuples and UDT values are # # printed as hexadecimal sequences # # - when enabled (default since 6.8.2), all values are printed in the form as they would be provided in a CQL statement, including # # quotation marks and escaping (in this case, the content of collections, tuples and UDT values is in human # # readable form) # render_cql_literals: true cassandra_audit_writer_options: # Sets the mode the audit writer runs in. # # When run synchronously, a query is not executed until the audit event is successfully written. # # When run asynchronously, audit events are queued for writing to the audit table, but are # not necessarily logged before the query executes. A pool of writer threads consumes the # audit events from the queue, and writes them to the audit table in batch queries. While # this substantially improves performance under load, if there is a failure between when # a query is executed, and it's audit event is written to the table, the audit table may # be missing entries for queries that were executed. # valid options are 'sync' and 'async' mode: sync # The maximum number of events the writer will dequeue before writing them out to the table. # If you're seeing warnings in your logs about batches being too large, decrease this value. # Increasing guardrails.batch_size_warn_threshold_in_kb in cassandra.yaml is also an option, but make sure you understand # the implications before doing so. # # Only used in async mode. Must be >0 batch_size: 50 # The maximum amount of time in milliseconds an event will be dequeued by a writer before being written out. This # prevents events from waiting too long before being written to the table when there's not a lot of queries happening. # # Only used in async mode. Must be >0 flush_time: 250 # The size of the queue feeding the asynchronous audit log writer threads. When there are more events being # produced than the writers can write out, the queue will fill up, and newer queries will block until there # is space on the queue. # If a value of 0 is used, the queue size will be unbounded, which can lead to resource exhaustion under # heavy query load. queue_size: 30000 # the consistency level used to write audit events write_consistency: QUORUM # # Where dropped events are logged # dropped_event_log: /var/log/cassandra/dropped_audit_events.log # # Partition days into hours by default # day_partition_millis: 3600000 ########################## # System information encryption settings # # If enabled, system tables that might contain sensitive information (system.batchlog, # system.paxos), hints files, and Cassandra commit logs are encrypted with these # encryption settings. # # If DSE Search index encryption is enabled, DSE Search index files are also encrypted with these settings. # If backing C* table encryption is enabled, DSE Search commit log is encrypted with these settings. # # When enabling system table encryption on a node with existing data, run # `nodetool upgradesstables -a` on the listed tables to encrypt existing data. # # When tracing is enabled, sensitive information is written to the tables in the # system_traces keyspace. Configure encryption on the tables to encrypt their data # on disk by using an encrypting compressor. # # DataStax recommends using remote encryption keys from a KMIP server when using Transparent Data Encryption (TDE) features. # Local key support is provided when a KMIP server is not available. system_info_encryption: enabled: false cipher_algorithm: AES secret_key_strength: 128 chunk_length_kb: 64 # # The encryptor will use a KMIP key server to manage its encryption keys. Specify only to use a KMIP key server, # # otherwise omit this entry. The default is to use local key encryption. # key_provider: KmipKeyProviderFactory # # If KmipKeyProviderFactory is used for system_info_encryption, this specifies the kmip host to be used. # kmip_host: kmip_host_name ########################## # KMIP hosts options # # Connection settings for key servers supporting the KMIP protocol # allow DSE encryption features to use encryption and decryption keys that are not stored # on the same machine running DSE. # # Hosts are configured as <kmip_host_name>: {connection_settings}, which maps a user-defined # name to a set of KMIP hosts and KMIP-defined credentials (keystores and truststores) that are used with a particular # key server. This name is then used when referring to KMIP hosts. DSE supports multiple KMIP hosts. # kmip_hosts: # # The unique name of this KMIP host/cluster which is specified in the table schema. # host.yourdomain.com: # # # Comma-separated list of KMIP hosts host[:port] # # The current implementation of KMIP connection management supports only failover, so all requests will # # go through a single KMIP server. There is no load balancing. This is because there aren't many known KMIP servers # # that support read replication, or other strategies for availability. # # # # Hosts are tried in the order they appear, so add KMIP hosts in the intended failover sequence. # hosts: kmip1.yourdomain.com, kmip2.yourdomain.com # # # keystore/truststore info # keystore_path: /path/to/keystore.jks # keystore_type: jks # keystore_password: password # # truststore_path: /path/to/truststore.jks, # truststore_type: jks # truststore_password: password # # # Keys read from the KMIP hosts are cached locally for the period of time specified below. # # The longer keys are cached, the fewer requests are made to the key server, but the longer # # it takes for changes (ie: revocation) to propagate to the DSE node. # key_cache_millis: 300000 # # # Refresh interval for the KMIP host key cache. After this interval, cache entries become eligible # # for refresh. Upon next access, an async reload is scheduled and the old value returned until it completes. # # If key_cache_millis is non-zero, then this must be also. # # Defaults to the same value as key_cache_millis. # key_cache_update_millis: 300000 # # # Socket timeout in milliseconds. # timeout: 1000 # # driver - DSE Search will use Solr cursor paging (deep paging) when pagination is enabled by the CQL driver. # # # # off - DSE Search will ignore the driver's pagination settings and use normal Solr paging unless: # # - The current workload is an analytics workload (ex. SearchAnalytics). # # - The query parameter 'paging' is set to