Skip to content

Commit d2873e1

Browse files
committed
Merge remote-tracking branch 'sundyli/tpcds-1g' into iceberg-hdfs
2 parents 6e26b30 + 04c0cc3 commit d2873e1

File tree

15 files changed

+206
-59
lines changed

15 files changed

+206
-59
lines changed

benchmark/tpcds/README.md

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,12 @@
22

33
## Preparing the Table and Data
44

5-
We use [DuckDB](https://github.com/duckdb/duckdb) to generate TPC-DS data.
5+
We use [DuckDB](https://duckdb.org/docs/installation/) to generate TPC-DS data.
66

77
After installing DuckDB, you can use these commands to generate the data ([more information](https://github.com/duckdb/duckdb/tree/master/extension/tpcds)):
88

9-
```sql
10-
INSTALL tpcds;
11-
LOAD tpcds;
12-
SELECT * FROM dsdgen(sf=0.01) -- sf can be other values, such as 0.1, 1, 10, ...
13-
EXPORT DATABASE '/tmp/tpcds_0_01/' (FORMAT CSV, DELIMITER '|');
14-
```
15-
16-
Then, move the data to current directory:
17-
18-
```shell
19-
mv /tmp/tpcds_0_01/ "$(pwd)/data/"
20-
```
21-
22-
After that, you can load data to Databend:
23-
249
```shell
25-
./load_data.sh
10+
./load_data.sh 0.1
2611
```
2712

2813
## Benchmark
@@ -32,5 +17,5 @@ To run the TPC-DS Benchmark, first build `databend-sqllogictests` binary.
3217
Then, execute the following command in your shell:
3318

3419
```shell
35-
databend-sqllogictests --handlers mysql --database tpcds --run_dir tpcds --bench
20+
databend-sqllogictests --handlers mysql --database tpcds --run_dir tpcds --bench
3621
```

benchmark/tpcds/load_data.sh

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,44 @@
33
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
44
. "$CURDIR"/shell_env.sh
55

6+
factor=$1
7+
8+
echo """
9+
INSTALL tpcds;
10+
LOAD tpcds;
11+
SELECT * FROM dsdgen(sf=$factor); -- sf can be other values, such as 0.1, 1, 10, ...
12+
EXPORT DATABASE '/tmp/tpcds_$factor/' (FORMAT CSV, DELIMITER '|');
13+
""" | duckdb
14+
15+
mv /tmp/tpcds_$factor/ "$(pwd)/data/"
16+
617
# Create Database
718
echo "CREATE DATABASE IF NOT EXISTS ${MYSQL_DATABASE}" | $BENDSQL_CLIENT_CONNECT_DEFAULT
819

920
tables=(
10-
call_center
11-
catalog_returns
12-
customer_address
13-
customer_demographics
14-
household_demographics
15-
inventory
16-
promotion
17-
ship_mode
18-
store_returns
19-
time_dim
20-
web_page
21+
call_center
22+
catalog_returns
23+
customer_address
24+
customer_demographics
25+
household_demographics
26+
inventory
27+
promotion
28+
ship_mode
29+
store_returns
30+
time_dim
31+
web_page
2132
web_sales
22-
catalog_page
23-
catalog_sales
24-
customer
25-
date_dim
26-
income_band
27-
item
28-
reason
29-
store
30-
store_sales
31-
warehouse
32-
web_returns
33+
catalog_page
34+
catalog_sales
35+
customer
36+
date_dim
37+
income_band
38+
item
39+
reason
40+
store
41+
store_sales
42+
warehouse
43+
web_returns
3344
web_site
3445
)
3546

@@ -43,11 +54,13 @@ done
4354
cat "$CURDIR"/tpcds.sql | $BENDSQL_CLIENT_CONNECT
4455

4556
# Load Data
57+
# note: export STORAGE_ALLOW_INSECURE=true to start databend-query
4658
for t in ${tables[@]}
4759
do
4860
echo "$t"
49-
insert_sql="insert into $MYSQL_DATABASE.$t file_format = (type = CSV skip_header = 0 field_delimiter = '|' record_delimiter = '\n')"
50-
curl -s -u root: -XPUT "http://localhost:8000/v1/streaming_load" -H "database: tpcds" -H "insert_sql: ${insert_sql}" -F 'upload=@"'${CURDIR}'/data/'$t'.csv"' > /dev/null 2>&1
61+
fp="`pwd`/data/$t.csv"
62+
echo "copy into ${MYSQL_DATABASE}.$t from 'fs://${fp}' file_format = (type = CSV skip_header = 1 field_delimiter = '|' record_delimiter = '\n')" | $BENDSQL_CLIENT_CONNECT
63+
echo "analyze table ${MYSQL_DATABASE}.$t" | $BENDSQL_CLIENT_CONNECT
5164
done
5265

5366

benchmark/tpch/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
## Preparing the Table and Data
55

6+
We use [DuckDB](https://duckdb.org/docs/installation/) to generate TPCH data.
67
To prepare the table and data for the TPC-H Benchmark, run the following command in your shell:
78

89
```shell

benchmark/tpch/gen_data.sh

Lines changed: 0 additions & 5 deletions
This file was deleted.
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,11 @@ echo "CREATE TABLE IF NOT EXISTS lineitem
111111
) CLUSTER BY(l_shipdate, l_orderkey) ${options}" | $BENDSQL_CLIENT_CONNECT
112112

113113
# insert data to tables
114+
# note: export STORAGE_ALLOW_INSECURE=true to start databend-query
114115
for t in customer lineitem nation orders partsupp part region supplier
115116
do
116117
echo "$t"
117-
insert_sql="insert into ${MYSQL_DATABASE}.$t file_format = (type = CSV skip_header = 0 field_delimiter = '|' record_delimiter = '\n')"
118-
curl -s -u root: -XPUT "http://localhost:${QUERY_HTTP_HANDLER_PORT}/v1/streaming_load" -H "database: tpch" -H "insert_sql: ${insert_sql}" -F 'upload=@"./data/'$t'.tbl"'
118+
fp="`pwd`/data/$t.tbl"
119+
echo "copy into ${MYSQL_DATABASE}.$t from 'fs://${fp}' file_format = (type = CSV skip_header = 1 field_delimiter = '|' record_delimiter = '\n')" | $BENDSQL_CLIENT_CONNECT
120+
echo "analyze table ${MYSQL_DATABASE}.$t" | $BENDSQL_CLIENT_CONNECT
119121
done

benchmark/tpch/tpch.sh

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
#!/usr/bin/env bash
22

3-
# generate tpch data
4-
sh ./gen_data.sh $1
3+
4+
echo """
5+
INSTALL tpch;
6+
LOAD tpch;
7+
SELECT * FROM dsdgen(sf=1); -- sf can be other values, such as 0.1, 1, 10, ...
8+
EXPORT DATABASE '/tmp/tpch_1/' (FORMAT CSV, DELIMITER '|');
9+
""" | duckdb
10+
11+
mv /tmp/tpch_1/ "$(pwd)/data/"
512

613
if [[ $2 == native ]]; then
714
echo "native"
8-
sh ./prepare_table.sh "storage_format = 'native' compression = 'lz4'"
15+
sh ./load_data.sh "storage_format = 'native' compression = 'lz4'"
916
else
1017
echo "fuse"
11-
sh ./prepare_table.sh ""
18+
sh ./load_data.sh ""
1219
fi
1320

src/common/metrics/src/metrics/storage.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static OMIT_FILTER_ROWS: LazyLock<Counter> = LazyLock::new(|| register_counter("
3535
struct StorageHttpLabels {
3636
host: String,
3737
method: String,
38+
bucket: String,
3839
}
3940

4041
static STORAGE_HTTP_REQUESTS_COUNT: LazyLock<FamilyCounter<StorageHttpLabels>> =
@@ -320,9 +321,13 @@ pub fn metrics_inc_omit_filter_rows(c: u64) {
320321
}
321322

322323
/// Storage Http metrics.
323-
pub fn metrics_inc_storage_http_requests_count(host: String, method: String) {
324+
pub fn metrics_inc_storage_http_requests_count(host: String, method: String, bucket: String) {
324325
STORAGE_HTTP_REQUESTS_COUNT
325-
.get_or_create(&StorageHttpLabels { host, method })
326+
.get_or_create(&StorageHttpLabels {
327+
host,
328+
method,
329+
bucket,
330+
})
326331
.inc();
327332
}
328333

src/common/storage/src/http_client.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ impl HttpFetch for StorageHttpClient {
6060
}
6161
m => m.as_str(),
6262
};
63-
metrics_inc_storage_http_requests_count(host.to_string(), method.to_string());
63+
// get first component in path as bucket name
64+
let bucket = match url.path_segments() {
65+
Some(mut segments) => segments.next().unwrap_or("-"),
66+
None => "-",
67+
};
68+
metrics_inc_storage_http_requests_count(
69+
host.to_string(),
70+
method.to_string(),
71+
bucket.to_string(),
72+
);
6473

6574
let (parts, body) = req.into_parts();
6675

src/common/storage/src/statistics.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,4 +182,21 @@ impl Datum {
182182
))),
183183
}
184184
}
185+
186+
pub fn can_compare(&self, other: &Self) -> bool {
187+
match (self, other) {
188+
(Datum::Bool(_), Datum::Bool(_))
189+
| (Datum::Int(_), Datum::Int(_))
190+
| (Datum::Int(_), Datum::UInt(_))
191+
| (Datum::Int(_), Datum::Float(_))
192+
| (Datum::UInt(_), Datum::UInt(_))
193+
| (Datum::UInt(_), Datum::Int(_))
194+
| (Datum::UInt(_), Datum::Float(_))
195+
| (Datum::Float(_), Datum::Float(_))
196+
| (Datum::Float(_), Datum::Int(_))
197+
| (Datum::Float(_), Datum::UInt(_))
198+
| (Datum::Bytes(_), Datum::Bytes(_)) => true,
199+
_ => false,
200+
}
201+
}
185202
}

src/query/expression/src/evaluator.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,40 @@ impl<'a> Evaluator<'a> {
377377
.set_span(span))
378378
}
379379
}
380+
(
381+
DataType::Nullable(box DataType::Variant) | DataType::Variant,
382+
DataType::Boolean
383+
| DataType::Number(_)
384+
| DataType::String
385+
| DataType::Date
386+
| DataType::Timestamp,
387+
) => {
388+
// allow cast variant to not null types.
389+
let cast_fn = format!("to_{}", dest_type.to_string().to_lowercase());
390+
if let Some(new_value) = self.run_simple_cast(
391+
span,
392+
src_type,
393+
&dest_type.wrap_nullable(),
394+
value.clone(),
395+
&cast_fn,
396+
validity.clone(),
397+
options,
398+
)? {
399+
let (new_value, has_null) = new_value.remove_nullable();
400+
if has_null {
401+
return Err(ErrorCode::BadArguments(format!(
402+
"unable to cast type `{src_type}` to type `{dest_type}`, result has null values"
403+
))
404+
.set_span(span));
405+
}
406+
Ok(new_value)
407+
} else {
408+
Err(ErrorCode::BadArguments(format!(
409+
"unable to cast type `{src_type}` to type `{dest_type}`"
410+
))
411+
.set_span(span))
412+
}
413+
}
380414
(DataType::Nullable(inner_src_ty), DataType::Nullable(inner_dest_ty)) => match value {
381415
Value::Scalar(Scalar::Null) => Ok(Value::Scalar(Scalar::Null)),
382416
Value::Scalar(_) => {

0 commit comments

Comments
 (0)