Skip to content

Commit 612ec78

Browse files
authored
Merge branch 'main' into prune-pipeline
2 parents 3672851 + cce8c74 commit 612ec78

File tree

145 files changed

+5184
-8024
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+5184
-8024
lines changed

Cargo.lock

Lines changed: 247 additions & 254 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ arrow-ipc = { version = "53" }
212212
arrow-ord = { version = "53" }
213213
arrow-schema = { version = "53", features = ["serde"] }
214214
arrow-select = { version = "53" }
215-
arrow-udf-js = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
216-
arrow-udf-python = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
217-
arrow-udf-wasm = { git = "https://github.com/arrow-udf/arrow-udf", rev = "80b09d6" }
215+
arrow-udf-js = { version = "0.5.0" }
216+
arrow-udf-python = { version = "0.4.0" }
217+
arrow-udf-wasm = { version = "0.4.1" }
218218
async-backtrace = "0.2"
219219
async-channel = "1.7.1"
220220
async-compression = { git = "https://github.com/datafuse-extras/async-compression", rev = "dc81082", features = [
@@ -272,7 +272,7 @@ dyn-clone = "1.0.9"
272272
educe = "0.4" # FIXME: failed to upgrade to educe 0.6
273273
either = "1.9"
274274
enquote = "1.1.0"
275-
enum-as-inner = "0.5" # FIXME: failed to upgrade to enum-as-inner 0.6
275+
enum-as-inner = "0.6"
276276
enum_dispatch = "0.3.13"
277277
enumflags2 = { version = "0.7.7", features = ["serde"] }
278278
ethnum = { version = "1.5.0" }
@@ -485,6 +485,7 @@ tower = { version = "0.5.1", features = ["util"] }
485485
tower-service = "0.3.3"
486486
twox-hash = "1.6.3"
487487
typetag = "0.2.3"
488+
unicase = "2.8.0"
488489
unicode-segmentation = "1.10.1"
489490
unindent = "0.2"
490491
url = "2.3.1"
@@ -609,6 +610,9 @@ overflow-checks = true
609610
rpath = false
610611

611612
[patch.crates-io]
613+
arrow-udf-js = { git = "https://github.com/arrow-udf/arrow-udf", rev = "ade868f" }
614+
arrow-udf-python = { git = "https://github.com/arrow-udf/arrow-udf", rev = "ade868f" }
615+
arrow-udf-wasm = { git = "https://github.com/arrow-udf/arrow-udf", rev = "ade868f" }
612616
async-backtrace = { git = "https://github.com/datafuse-extras/async-backtrace.git", rev = "dea4553" }
613617
async-recursion = { git = "https://github.com/datafuse-extras/async-recursion.git", rev = "a353334" }
614618
backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "72265be", features = [

scripts/ci/deploy/config/databend-query-node-1.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,6 @@ data_cache_storage = "none"
155155
path = "./.databend/_cache"
156156
# max bytes of cached data 20G
157157
max_bytes = 21474836480
158+
159+
[spill]
160+
spill_local_disk_path = "./.databend/temp/_query_spill"

src/common/base/src/runtime/metrics/histogram.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ pub static BUCKET_MILLISECONDS: [f64; 15] = [
3939
300000.0, 600000.0, 1800000.0,
4040
];
4141

42+
pub static BUCKET_ROWS: [f64; 14] = [
43+
1.0, 10.0, 100.0, 500.0, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0, 500000.0, 1000000.0,
44+
5000000.0, 10000000.0, 50000000.0,
45+
];
46+
4247
/// Histogram is a port of prometheus-client's Histogram. The only difference is that
4348
/// we can reset the histogram.
4449
#[derive(Debug)]

src/common/base/src/runtime/metrics/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ pub use registry::register_gauge_family;
3434
pub use registry::register_histogram;
3535
pub use registry::register_histogram_family;
3636
pub use registry::register_histogram_family_in_milliseconds;
37+
pub use registry::register_histogram_family_in_rows;
3738
pub use registry::register_histogram_family_in_seconds;
3839
pub use registry::register_histogram_in_milliseconds;
3940
pub use registry::register_histogram_in_seconds;

src/common/base/src/runtime/metrics/registry.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use crate::runtime::metrics::family_metrics::FamilyHistogram as InnerFamilyHisto
3838
use crate::runtime::metrics::gauge::Gauge;
3939
use crate::runtime::metrics::histogram::Histogram;
4040
use crate::runtime::metrics::histogram::BUCKET_MILLISECONDS;
41+
use crate::runtime::metrics::histogram::BUCKET_ROWS;
4142
use crate::runtime::metrics::histogram::BUCKET_SECONDS;
4243
use crate::runtime::metrics::process_collector::ProcessCollector;
4344
use crate::runtime::metrics::sample::MetricSample;
@@ -309,6 +310,11 @@ where T: FamilyLabels {
309310
register_histogram_family(name, BUCKET_MILLISECONDS.iter().copied())
310311
}
311312

313+
pub fn register_histogram_family_in_rows<T>(name: &str) -> FamilyHistogram<T>
314+
where T: FamilyLabels {
315+
register_histogram_family(name, BUCKET_ROWS.iter().copied())
316+
}
317+
312318
pub type FamilyGauge<T> = Family<T, InnerFamilyGauge<T>>;
313319
pub type FamilyCounter<T> = Family<T, InnerFamilyCounter<T>>;
314320
pub type FamilyHistogram<T> = Family<T, InnerFamilyHistogram<T>>;

src/common/exception/src/exception_code.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,10 @@ build_exceptions! {
212212
// Geometry errors.
213213
GeometryError(1801),
214214
InvalidGeometryFormat(1802),
215+
216+
// UDF errors.
217+
UDFRuntimeError(1810),
218+
215219
// Tantivy errors.
216220
TantivyError(1901),
217221
TantivyOpenReadError(1902),

src/common/metrics/src/metrics/external_server.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ use std::sync::LazyLock;
1616
use std::time::Duration;
1717

1818
use databend_common_base::runtime::metrics::register_counter_family;
19-
use databend_common_base::runtime::metrics::register_histogram_family_in_seconds;
19+
use databend_common_base::runtime::metrics::register_histogram_family_in_milliseconds;
20+
use databend_common_base::runtime::metrics::register_histogram_family_in_rows;
2021
use databend_common_base::runtime::metrics::FamilyCounter;
2122
use databend_common_base::runtime::metrics::FamilyHistogram;
2223

@@ -28,12 +29,13 @@ const METRIC_RETRY: &str = "external_retry";
2829
const METRIC_ERROR: &str = "external_error";
2930
const METRIC_RUNNING_REQUESTS: &str = "external_running_requests";
3031
const METRIC_REQUESTS: &str = "external_requests";
32+
const METRIC_EXTERNAL_BLOCK_ROWS: &str = "external_block_rows";
3133

3234
static REQUEST_EXTERNAL_DURATION: LazyLock<FamilyHistogram<VecLabels>> =
33-
LazyLock::new(|| register_histogram_family_in_seconds(METRIC_REQUEST_EXTERNAL_DURATION));
35+
LazyLock::new(|| register_histogram_family_in_milliseconds(METRIC_REQUEST_EXTERNAL_DURATION));
3436

3537
static CONNECT_EXTERNAL_DURATION: LazyLock<FamilyHistogram<VecLabels>> =
36-
LazyLock::new(|| register_histogram_family_in_seconds(METRIC_CONNECT_EXTERNAL_DURATION));
38+
LazyLock::new(|| register_histogram_family_in_milliseconds(METRIC_CONNECT_EXTERNAL_DURATION));
3739

3840
static RETRY_EXTERNAL: LazyLock<FamilyCounter<VecLabels>> =
3941
LazyLock::new(|| register_counter_family(METRIC_RETRY));
@@ -47,6 +49,9 @@ static RUNNING_REQUESTS_EXTERNAL: LazyLock<FamilyCounter<VecLabels>> =
4749
static REQUESTS_EXTERNAL_EXTERNAL: LazyLock<FamilyCounter<VecLabels>> =
4850
LazyLock::new(|| register_counter_family(METRIC_REQUESTS));
4951

52+
static EXTERNAL_BLOCK_ROWS: LazyLock<FamilyHistogram<VecLabels>> =
53+
LazyLock::new(|| register_histogram_family_in_rows(METRIC_EXTERNAL_BLOCK_ROWS));
54+
5055
const LABEL_FUNCTION_NAME: &str = "function_name";
5156
const LABEL_ERROR_KIND: &str = "error_kind";
5257

@@ -64,6 +69,13 @@ pub fn record_request_external_duration(function_name: impl Into<String>, durati
6469
.observe(duration.as_millis_f64());
6570
}
6671

72+
pub fn record_request_external_block_rows(function_name: impl Into<String>, rows: usize) {
73+
let labels = &vec![(LABEL_FUNCTION_NAME, function_name.into())];
74+
EXTERNAL_BLOCK_ROWS
75+
.get_or_create(labels)
76+
.observe(rows as f64);
77+
}
78+
6779
pub fn record_retry_external(function_name: impl Into<String>, error_kind: impl Into<String>) {
6880
let labels = &vec![
6981
(LABEL_FUNCTION_NAME, function_name.into()),

src/meta/app/src/principal/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ pub use user_auth::AuthType;
9595
pub use user_auth::PasswordHashMethod;
9696
pub use user_defined_file_format::UserDefinedFileFormat;
9797
pub use user_defined_function::LambdaUDF;
98+
pub use user_defined_function::UDAFScript;
9899
pub use user_defined_function::UDFDefinition;
99100
pub use user_defined_function::UDFScript;
100101
pub use user_defined_function::UDFServer;

src/meta/app/src/principal/user_defined_function.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use std::fmt::Formatter;
1818
use chrono::DateTime;
1919
use chrono::Utc;
2020
use databend_common_expression::types::DataType;
21+
use databend_common_expression::DataField;
2122

2223
#[derive(Clone, Debug, Eq, PartialEq)]
2324
pub struct LambdaUDF {
@@ -44,11 +45,24 @@ pub struct UDFScript {
4445
pub runtime_version: String,
4546
}
4647

48+
#[derive(Clone, Debug, Eq, PartialEq)]
49+
pub struct UDAFScript {
50+
pub code: String,
51+
pub language: String,
52+
// aggregate function input types
53+
pub arg_types: Vec<DataType>,
54+
// aggregate function state fields
55+
pub state_fields: Vec<DataField>,
56+
pub return_type: DataType,
57+
pub runtime_version: String,
58+
}
59+
4760
#[derive(Clone, Debug, Eq, PartialEq)]
4861
pub enum UDFDefinition {
4962
LambdaUDF(LambdaUDF),
5063
UDFServer(UDFServer),
5164
UDFScript(UDFScript),
65+
UDAFScript(UDAFScript),
5266
}
5367

5468
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -160,7 +174,6 @@ impl Display for UDFDefinition {
160174
") RETURNS {return_type} LANGUAGE {language} HANDLER = {handler} ADDRESS = {address}"
161175
)?;
162176
}
163-
164177
UDFDefinition::UDFScript(UDFScript {
165178
code,
166179
arg_types,
@@ -180,6 +193,29 @@ impl Display for UDFDefinition {
180193
") RETURNS {return_type} LANGUAGE {language} RUNTIME_VERSION = {runtime_version} HANDLER = {handler} AS $${code}$$"
181194
)?;
182195
}
196+
UDFDefinition::UDAFScript(UDAFScript {
197+
code,
198+
arg_types,
199+
state_fields,
200+
return_type,
201+
language,
202+
runtime_version,
203+
}) => {
204+
for (i, item) in arg_types.iter().enumerate() {
205+
if i > 0 {
206+
write!(f, ", ")?;
207+
}
208+
write!(f, "{item}")?;
209+
}
210+
write!(f, ") STATE {{ ")?;
211+
for (i, item) in state_fields.iter().enumerate() {
212+
if i > 0 {
213+
write!(f, ", ")?;
214+
}
215+
write!(f, "{} {}", item.name(), item.data_type())?;
216+
}
217+
write!(f, " }} RETURNS {return_type} LANGUAGE {language} RUNTIME_VERSION = {runtime_version} AS $${code}$$")?;
218+
}
183219
}
184220
Ok(())
185221
}

0 commit comments

Comments
 (0)