diff --git a/Cargo.lock b/Cargo.lock index 8ad15a4e6241a..91245bde2c29f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3185,6 +3185,7 @@ dependencies = [ name = "databend-common-config" version = "0.1.0" dependencies = [ + "anyhow", "clap", "databend-common-base", "databend-common-building", @@ -3196,8 +3197,11 @@ dependencies = [ "log", "pretty_assertions", "serde", + "serde_ignored", "serde_with", "serfig", + "tempfile", + "toml 0.8.19", ] [[package]] @@ -13214,6 +13218,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_ignored" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8e319a36d1b52126a0d608f24e93b2d81297091818cd70625fcf50a15d84ddf" +dependencies = [ + "serde", +] + [[package]] name = "serde_json" version = "1.0.140" diff --git a/Cargo.toml b/Cargo.toml index 5071bd1bae878..d77be6e2aebeb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -457,6 +457,7 @@ scroll = "0.12.0" semver = "1.0.14" serde = { version = "1.0.164", features = ["derive", "rc"] } serde_derive = "1" +serde_ignored = "0.1.10" serde_json = { version = "1.0.85", default-features = false, features = ["preserve_order", "unbounded_depth"] } serde_repr = "0.1.9" serde_stacker = { version = "0.1" } @@ -501,7 +502,7 @@ tikv-jemalloc-sys = "0.6.0" tokio = { version = "1.35.0", features = ["full"] } tokio-stream = "0.1.11" tokio-util = { version = "0.7.13" } -toml = { version = "0.8", default-features = false } +toml = { version = "0.8", features = ["parse"] } tonic = { version = "0.12.3", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } tonic-build = { version = "0.12.3" } tonic-reflection = { version = "0.12.3" } diff --git a/src/query/config/Cargo.toml b/src/query/config/Cargo.toml index 83672276b17c6..c2131cc14ce01 100644 --- a/src/query/config/Cargo.toml +++ b/src/query/config/Cargo.toml @@ -16,6 +16,7 @@ storage-hdfs = ["databend-common-storage/storage-hdfs"] ignored = ["strum"] [dependencies] +anyhow = { workspace = true } clap = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } @@ -25,11 +26,14 @@ databend-common-storage = { workspace = true } databend-common-tracing = { workspace = true } log = { workspace = true } serde = { workspace = true } +serde_ignored = { workspace = true } serde_with = { workspace = true } serfig = { workspace = true } +toml = { workspace = true } [dev-dependencies] pretty_assertions = { workspace = true } +tempfile = { workspace = true } [build-dependencies] databend-common-building = { workspace = true } diff --git a/src/query/config/src/config.rs b/src/query/config/src/config.rs index 6d6195f7b76f8..493215f0709be 100644 --- a/src/query/config/src/config.rs +++ b/src/query/config/src/config.rs @@ -61,7 +61,6 @@ use serde_with::with_prefix; use serfig::collectors::from_env; use serfig::collectors::from_file; use serfig::collectors::from_self; -use serfig::parsers::Toml; use super::inner; use super::inner::CatalogConfig as InnerCatalogConfig; @@ -73,6 +72,7 @@ use super::inner::QueryConfig as InnerQueryConfig; use crate::builtin::BuiltInConfig; use crate::builtin::UDFConfig; use crate::builtin::UserConfig; +use crate::toml::TomlIgnored; use crate::DATABEND_COMMIT_VERSION; const CATALOG_HIVE: &str = "hive"; @@ -200,7 +200,7 @@ impl Config { // Load from config file first. { - let config_file = if !arg_conf.config_file.is_empty() { + let final_config_file = if !arg_conf.config_file.is_empty() { // TODO: remove this `allow(clippy::redundant_clone)` // as soon as this issue is fixed: // https://github.com/rust-lang/rust-clippy/issues/10940 @@ -212,8 +212,11 @@ impl Config { "".to_string() }; - if !config_file.is_empty() { - builder = builder.collect(from_file(Toml, &config_file)); + if !final_config_file.is_empty() { + let toml = TomlIgnored::new(Box::new(|path| { + log::warn!("unknown field in config: {}", &path); + })); + builder = builder.collect(from_file(toml, &final_config_file)); } } @@ -246,7 +249,10 @@ impl Config { }; if !config_file.is_empty() { - builder = builder.collect(from_file(Toml, &config_file)); + let toml = TomlIgnored::new(Box::new(|path| { + log::warn!("unknown field in config: {}", &path); + })); + builder = builder.collect(from_file(toml, &config_file)); } } @@ -1418,7 +1424,7 @@ impl serde::de::Visitor<'_> for SettingVisitor { /// Query config group. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct QueryConfig { /// Tenant id for get the information from the MetaSrv. #[clap(long, value_name = "VALUE", default_value = "admin")] @@ -2650,10 +2656,9 @@ impl From for OTLPEndpointConfig { } /// Meta config group. -/// deny_unknown_fields to check unknown field, like the deprecated `address`. /// TODO(xuanwo): All meta_xxx should be rename to xxx. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Args)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct MetaConfig { /// The dir to store persisted meta state for a embedded meta store #[clap(long = "meta-embedded-dir", value_name = "VALUE", default_value_t)] @@ -2859,7 +2864,7 @@ impl TryInto for LocalConfig { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct CacheConfig { /// Enable table meta cache. Default is enabled. Set it to false to disable all the table meta caches #[clap(long = "cache-enable-table-meta-cache", default_value = "true")] @@ -3097,7 +3102,7 @@ impl Default for DiskCacheKeyReloadPolicy { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct DiskCacheConfig { /// Max bytes of cached raw table data. Default 20GB, set it to 0 to disable it. #[clap( @@ -3135,7 +3140,7 @@ impl Default for DiskCacheConfig { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct SpillConfig { /// Path of spill to local disk. disable if it's empty. #[clap(long, value_name = "VALUE", default_value = "")] @@ -3161,7 +3166,7 @@ impl Default for SpillConfig { } #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Args, Default)] -#[serde(default, deny_unknown_fields)] +#[serde(default)] pub struct ResourcesManagementConfig { #[clap(long = "type", value_name = "VALUE", default_value = "self_managed")] #[serde(rename = "type")] diff --git a/src/query/config/src/lib.rs b/src/query/config/src/lib.rs index 9f3534389f5fe..2703e3b7bbe1f 100644 --- a/src/query/config/src/lib.rs +++ b/src/query/config/src/lib.rs @@ -33,6 +33,7 @@ mod global; mod inner; mod mask; mod obsolete; +mod toml; pub use builtin::*; pub use config::CacheStorageTypeConfig; pub use config::Commands; diff --git a/src/query/config/src/toml.rs b/src/query/config/src/toml.rs new file mode 100644 index 0000000000000..652f64aceb248 --- /dev/null +++ b/src/query/config/src/toml.rs @@ -0,0 +1,51 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::anyhow; +use anyhow::Result; +use serde::de::DeserializeOwned; +use serfig::Parser; + +/// Toml parser which used with serfig. +/// +/// This parser will ignore unknown fields and call the handler with the path of the unknown field. +pub struct TomlIgnored { + handler: TomlUnknownFieldHandler, +} + +type TomlUnknownFieldHandler = Box; + +impl TomlIgnored { + pub fn new(handler: TomlUnknownFieldHandler) -> Self { + Self { handler } + } +} + +impl std::fmt::Debug for TomlIgnored { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TomlIgnored").finish() + } +} + +impl Parser for TomlIgnored { + fn parse(&mut self, bs: &[u8]) -> Result { + let s = std::str::from_utf8(bs) + .map_err(|err| anyhow!("input value is not valid utf-8: {err:?}"))?; + let de = toml::Deserializer::new(s); + let handler = &self.handler; + Ok(serde_ignored::deserialize(de, move |path| { + handler(path.to_string().as_str()); + })?) + } +} diff --git a/src/query/config/tests/it/config.rs b/src/query/config/tests/it/config.rs new file mode 100644 index 0000000000000..ed8b6bf33171d --- /dev/null +++ b/src/query/config/tests/it/config.rs @@ -0,0 +1,103 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ffi::OsString; +use std::io::Write; + +use clap::Parser; +use databend_common_config::Config; +use databend_common_config::InnerConfig; +use pretty_assertions::assert_eq; + +/// It's required to make sure setting's default value is the same with clap. +#[test] +fn test_config_default() { + let setting_default = InnerConfig::default(); + let config_default: InnerConfig = Config::parse_from(Vec::::new()) + .try_into() + .expect("parse from args must succeed"); + + assert_eq!( + setting_default, config_default, + "default setting is different from default config, please check again" + ) +} + +#[test] +fn test_load_config() { + // Create a comprehensive test configuration with multiple sections and data types + let conf = r#" +# Query configuration section +[query] +max_active_sessions = 256 +shutdown_wait_timeout_ms = 5000 +flight_record_quota_size = 1048576 +unknown_query_field = "should be ignored" + +# Log configuration section +[log] +file.level = "INFO" +file.dir = "/tmp/databend/logs" +unknown_log_field = 123 + +# Storage configuration section +[storage] +type = "fs" + +[storage.fs] +data_path = "/tmp/databend/data" + +# Meta configuration section +[meta] +endpoint = "localhost:9191" +username = "databend" +password = "databend123" +"#; + + let mut temp_file = tempfile::NamedTempFile::new().unwrap(); + temp_file.write_all(conf.as_bytes()).unwrap(); + let temp_path = temp_file.path().to_str().unwrap().to_string(); + + // Save the original environment variable (if it exists) + let original_env = std::env::var("CONFIG_FILE").ok(); + + // Set the environment variable to our test config file + std::env::set_var("CONFIG_FILE", &temp_path); + + // Use the original load function (without a config_file parameter) + let config = Config::load(false).unwrap(); + + // Restore the original environment variable + match original_env { + Some(val) => std::env::set_var("CONFIG_FILE", val), + None => std::env::remove_var("CONFIG_FILE"), + } + + // Test query configuration values + assert_eq!(config.query.max_active_sessions, 256, "max_active_sessions should be 256"); + assert_eq!(config.query.shutdown_wait_timeout_ms, 5000, "shutdown_wait_timeout_ms should be 5000"); + assert_eq!(config.query.flight_record_quota_size, 1048576, "flight_record_quota_size should be 1048576"); + + // Test log configuration values + assert_eq!(config.log.file.level, "INFO", "log.file.level should be INFO"); + assert_eq!(config.log.file.dir, "/tmp/databend/logs", "log.file.dir should be /tmp/databend/logs"); + + // Test storage configuration values + assert_eq!(config.storage.fs.data_path, "/tmp/databend/data", "storage.fs.data_path should be /tmp/databend/data"); + + // Test meta configuration values + assert_eq!(config.meta.endpoint, "localhost:9191", "meta.endpoint should be localhost:9191"); + assert_eq!(config.meta.username, "databend", "meta.username should be databend"); + assert_eq!(config.meta.password, "databend123", "meta.password should be databend123"); +} diff --git a/src/query/service/Cargo.toml b/src/query/service/Cargo.toml index cac944ede7dae..c3730a60115e7 100644 --- a/src/query/service/Cargo.toml +++ b/src/query/service/Cargo.toml @@ -168,7 +168,7 @@ sysinfo = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true, features = ["net"] } -toml = { workspace = true, default-features = false } +toml = { workspace = true, features = ["parse"] } tonic = { workspace = true } typetag = { workspace = true } url = { workspace = true }