Skip to content

Commit c5e7355

Browse files
authored
feat(query): Virtual column support alias name (#17365)
* feat(query): Virtual column support alias name * fix * fix machete * add tests * fix tests * fix comments * fix
1 parent 3bb9e91 commit c5e7355

File tree

43 files changed

+1082
-610
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1082
-610
lines changed

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/meta/api/src/schema_api_impl.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,7 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
932932
virtual_columns: req.virtual_columns.clone(),
933933
created_on: Utc::now(),
934934
updated_on: None,
935+
auto_generated: req.auto_generated,
935936
};
936937

937938
self.insert_name_value_with_create_option(
@@ -963,6 +964,7 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
963964
|mut meta| {
964965
meta.virtual_columns = req.virtual_columns.clone();
965966
meta.updated_on = Some(Utc::now());
967+
meta.auto_generated = req.auto_generated;
966968
Some((meta, None))
967969
},
968970
not_found,

src/meta/api/src/schema_api_test_suite.rs

Lines changed: 187 additions & 144 deletions
Large diffs are not rendered by default.

src/meta/app/src/schema/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,5 @@ pub use virtual_column::DropVirtualColumnReq;
144144
pub use virtual_column::ListVirtualColumnsReq;
145145
pub use virtual_column::UpdateVirtualColumnReq;
146146
pub use virtual_column::VirtualColumnMeta;
147+
pub use virtual_column::VirtualField;
147148
pub use virtual_column_ident::VirtualColumnIdent;

src/meta/app/src/schema/virtual_column.rs

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,62 @@ use crate::schema::virtual_column_ident::VirtualColumnIdent;
2626
use crate::tenant::Tenant;
2727
use crate::tenant::ToTenant;
2828

29-
#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)]
29+
// The virtual field column definition of Variant type.
30+
#[derive(Clone, Debug, Eq, PartialEq)]
31+
pub struct VirtualField {
32+
// Expression to extracts the internal virtual field of the variant value.
33+
// for example:
34+
// `data['key']`, `data[0]`, `data['key1']['key2']`, ..
35+
pub expr: String,
36+
// The data type of internal virtual field.
37+
// If all the rows of a virtual field has same type,
38+
// the virtual field can cast to the type.
39+
pub data_type: TableDataType,
40+
// Optional alias name.
41+
pub alias_name: Option<String>,
42+
}
43+
44+
impl Display for VirtualField {
45+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
46+
if let Some(alias_name) = &self.alias_name {
47+
write!(
48+
f,
49+
"{}::{} AS {}",
50+
self.expr,
51+
self.data_type.remove_nullable(),
52+
alias_name
53+
)
54+
} else {
55+
write!(f, "{}::{}", self.expr, self.data_type.remove_nullable())
56+
}
57+
}
58+
}
59+
60+
#[derive(Clone, Debug, Eq, PartialEq)]
3061
pub struct VirtualColumnMeta {
3162
pub table_id: MetaId,
3263

33-
pub virtual_columns: Vec<(String, TableDataType)>,
64+
// The internal virtual field columns of Variant type.
65+
// For example, the data column has the following values:
66+
// `{"id":1,"name":"tom","metas":{"key1":"val1","key2":"val2"}}`
67+
// `{"id":2,"name":"alice","metas":{"key1":"val3","key2":"val4"}}`
68+
// ...
69+
// We can generate virtual columns as follows:
70+
// `data['id']`, `data['name']`, `data['metas']['key1']`, `data['metas']['key2']`
71+
pub virtual_columns: Vec<VirtualField>,
3472
pub created_on: DateTime<Utc>,
3573
pub updated_on: Option<DateTime<Utc>>,
74+
// Whether the virtual columns are auto-generated,
75+
// true for auto-generated, false for user-defined.
76+
pub auto_generated: bool,
3677
}
3778

3879
#[derive(Clone, Debug, PartialEq, Eq)]
3980
pub struct CreateVirtualColumnReq {
4081
pub create_option: CreateOption,
4182
pub name_ident: VirtualColumnIdent,
42-
pub virtual_columns: Vec<(String, TableDataType)>,
83+
pub virtual_columns: Vec<VirtualField>,
84+
pub auto_generated: bool,
4385
}
4486

4587
impl Display for CreateVirtualColumnReq {
@@ -57,7 +99,8 @@ impl Display for CreateVirtualColumnReq {
5799
pub struct UpdateVirtualColumnReq {
58100
pub if_exists: bool,
59101
pub name_ident: VirtualColumnIdent,
60-
pub virtual_columns: Vec<(String, TableDataType)>,
102+
pub virtual_columns: Vec<VirtualField>,
103+
pub auto_generated: bool,
61104
}
62105

63106
impl Display for UpdateVirtualColumnReq {

src/meta/proto-conv/src/util.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
147147
(115, "2024-12-16: Add: udf.proto: add UDAFScript and UDAFServer"),
148148
(116, "2025-01-09: Add: MarkedDeletedIndexMeta"),
149149
(117, "2025-01-21: Add: config.proto: add disable_list_batch in WebhdfsConfig"),
150-
(118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig")
150+
(118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig"),
151+
(119, "2025-01-25: Add: virtual_column add alias_names and auto_generated field"),
151152
// Dear developer:
152153
// If you're gonna add a new metadata version, you'll have to add a test for it.
153154
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)

src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
//! This mod is the key point about compatibility.
1616
//! Everytime update anything in this file, update the `VER` and let the tests pass.
1717
18+
use std::collections::BTreeMap;
19+
1820
use chrono::DateTime;
1921
use chrono::Utc;
2022
use databend_common_expression::TableDataType;
@@ -37,30 +39,28 @@ impl FromToProto for mt::VirtualColumnMeta {
3739
fn from_pb(p: Self::PB) -> Result<Self, Incompatible>
3840
where Self: Sized {
3941
reader_check_msg(p.ver, p.min_reader_ver)?;
40-
let virtual_columns = if p.data_types.is_empty() {
41-
p.virtual_columns
42-
.iter()
43-
.map(|v| {
44-
(
45-
v.clone(),
46-
TableDataType::Nullable(Box::new(TableDataType::Variant)),
47-
)
48-
})
49-
.collect()
50-
} else {
51-
if p.virtual_columns.len() != p.data_types.len() {
52-
return Err(Incompatible::new(format!(
53-
"Incompatible virtual columns length is {}, but data types length is {}",
54-
p.virtual_columns.len(),
55-
p.data_types.len()
56-
)));
57-
}
58-
let mut virtual_columns = Vec::new();
59-
for (v, ty) in p.virtual_columns.iter().zip(p.data_types.iter()) {
60-
virtual_columns.push((v.clone(), TableDataType::from_pb(ty.clone())?));
61-
}
62-
virtual_columns
63-
};
42+
if !p.data_types.is_empty() && p.virtual_columns.len() != p.data_types.len() {
43+
return Err(Incompatible::new(format!(
44+
"Incompatible virtual columns length is {}, but data types length is {}",
45+
p.virtual_columns.len(),
46+
p.data_types.len()
47+
)));
48+
}
49+
let mut virtual_columns = Vec::with_capacity(p.virtual_columns.len());
50+
for (i, expr) in p.virtual_columns.iter().enumerate() {
51+
let data_type = if let Some(ty) = p.data_types.get(i) {
52+
TableDataType::from_pb(ty.clone())?
53+
} else {
54+
TableDataType::Nullable(Box::new(TableDataType::Variant))
55+
};
56+
let alias_name = p.alias_names.get(&(i as u64)).cloned();
57+
let virtual_column = mt::VirtualField {
58+
expr: expr.clone(),
59+
data_type,
60+
alias_name,
61+
};
62+
virtual_columns.push(virtual_column);
63+
}
6464

6565
let v = Self {
6666
table_id: p.table_id,
@@ -70,16 +70,21 @@ impl FromToProto for mt::VirtualColumnMeta {
7070
Some(updated_on) => Some(DateTime::<Utc>::from_pb(updated_on)?),
7171
None => None,
7272
},
73+
auto_generated: p.auto_generated,
7374
};
7475
Ok(v)
7576
}
7677

7778
fn to_pb(&self) -> Result<Self::PB, Incompatible> {
78-
let mut data_types = Vec::new();
79-
let mut virtual_columns = Vec::new();
80-
for (v, ty) in self.virtual_columns.iter() {
81-
data_types.push(ty.to_pb()?);
82-
virtual_columns.push(v.clone());
79+
let mut data_types = Vec::with_capacity(self.virtual_columns.len());
80+
let mut virtual_columns = Vec::with_capacity(self.virtual_columns.len());
81+
let mut alias_names = BTreeMap::new();
82+
for (i, virtual_field) in self.virtual_columns.iter().enumerate() {
83+
data_types.push(virtual_field.data_type.to_pb()?);
84+
virtual_columns.push(virtual_field.expr.clone());
85+
if let Some(alias_name) = &virtual_field.alias_name {
86+
alias_names.insert(i as u64, alias_name.clone());
87+
}
8388
}
8489
let p = pb::VirtualColumnMeta {
8590
ver: VER,
@@ -92,6 +97,8 @@ impl FromToProto for mt::VirtualColumnMeta {
9297
None => None,
9398
},
9499
data_types,
100+
alias_names,
101+
auto_generated: self.auto_generated,
95102
};
96103
Ok(p)
97104
}

src/meta/proto-conv/tests/it/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,4 @@ mod v115_add_udaf_script;
116116
mod v116_marked_deleted_index_meta;
117117
mod v117_webhdfs_add_disable_list_batch;
118118
mod v118_webhdfs_add_user_name;
119+
mod v119_virtual_column;

src/meta/proto-conv/tests/it/v041_virtual_column.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use chrono::TimeZone;
1616
use chrono::Utc;
1717
use databend_common_expression::TableDataType;
1818
use databend_common_meta_app::schema::VirtualColumnMeta;
19+
use databend_common_meta_app::schema::VirtualField;
1920
use fastrace::func_name;
2021

2122
use crate::common;
@@ -42,23 +43,27 @@ fn test_decode_v41_virtual_column() -> anyhow::Result<()> {
4243
let want = || {
4344
let table_id = 7;
4445
let virtual_columns = vec![
45-
(
46-
"v:k1:k2".to_string(),
47-
TableDataType::Nullable(Box::new(TableDataType::Variant)),
48-
),
49-
(
50-
"v[1][2]".to_string(),
51-
TableDataType::Nullable(Box::new(TableDataType::Variant)),
52-
),
46+
VirtualField {
47+
expr: "v:k1:k2".to_string(),
48+
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
49+
alias_name: None,
50+
},
51+
VirtualField {
52+
expr: "v[1][2]".to_string(),
53+
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
54+
alias_name: None,
55+
},
5356
];
5457
let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap();
5558
let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap());
59+
let auto_generated = false;
5660

5761
VirtualColumnMeta {
5862
table_id,
5963
virtual_columns,
6064
created_on,
6165
updated_on,
66+
auto_generated,
6267
}
6368
};
6469

src/meta/proto-conv/tests/it/v112_virtual_column.rs

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use chrono::TimeZone;
1616
use chrono::Utc;
1717
use databend_common_expression::TableDataType;
1818
use databend_common_meta_app::schema::VirtualColumnMeta;
19+
use databend_common_meta_app::schema::VirtualField;
1920
use fastrace::func_name;
2021

2122
use crate::common;
@@ -45,27 +46,32 @@ fn test_decode_v112_virtual_column() -> anyhow::Result<()> {
4546
let want = || {
4647
let table_id = 7;
4748
let virtual_columns = vec![
48-
(
49-
"v:k1:k2".to_string(),
50-
TableDataType::Nullable(Box::new(TableDataType::Variant)),
51-
),
52-
(
53-
"v[1][2]".to_string(),
54-
TableDataType::Nullable(Box::new(TableDataType::Variant)),
55-
),
56-
(
57-
"v:k3:k4".to_string(),
58-
TableDataType::Nullable(Box::new(TableDataType::String)),
59-
),
49+
VirtualField {
50+
expr: "v:k1:k2".to_string(),
51+
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
52+
alias_name: None,
53+
},
54+
VirtualField {
55+
expr: "v[1][2]".to_string(),
56+
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
57+
alias_name: None,
58+
},
59+
VirtualField {
60+
expr: "v:k3:k4".to_string(),
61+
data_type: TableDataType::Nullable(Box::new(TableDataType::String)),
62+
alias_name: None,
63+
},
6064
];
6165
let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap();
6266
let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap());
67+
let auto_generated = false;
6368

6469
VirtualColumnMeta {
6570
table_id,
6671
virtual_columns,
6772
created_on,
6873
updated_on,
74+
auto_generated,
6975
}
7076
};
7177

0 commit comments

Comments
 (0)