Skip to content

Commit 54fceaa

Browse files
authored
chore(query): testing tpch/tpcds using 1G scale factor (#17449)
* update * update * update
1 parent 7e3df33 commit 54fceaa

33 files changed

+37577
-4895
lines changed

.github/actions/setup_test/action.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ runs:
3333
with:
3434
python-version: "3.12"
3535

36+
- name: Install Python packages
37+
shell: bash
38+
run: |
39+
pip install rich
40+
pip install databend_driver
41+
pip install duckdb
42+
3643
- uses: actions/setup-java@v4
3744
with:
3845
distribution: temurin

benchmark/tpcds/shell_env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
export MYSQL_DATABASE=${MYSQL_DATABASE:="tpcds"}
3+
export QUERY_DATABASE=${MYSQL_DATABASE:="tpcds"}
44
export QUERY_MYSQL_HANDLER_HOST=${QUERY_MYSQL_HANDLER_HOST:="127.0.0.1"}
55
export QUERY_MYSQL_HANDLER_PORT=${QUERY_MYSQL_HANDLER_PORT:="3307"}
66
export QUERY_HTTP_HANDLER_PORT=${QUERY_HTTP_HANDLER_PORT:="8000"}

benchmark/tpch/shell_env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env bash
22

3-
export MYSQL_DATABASE=${MYSQL_DATABASE:="tpch"}
3+
export QUERY_DATABASE=${MYSQL_DATABASE:="tpch"}
44
export QUERY_MYSQL_HANDLER_HOST=${QUERY_MYSQL_HANDLER_HOST:="127.0.0.1"}
55
export QUERY_MYSQL_HANDLER_PORT=${QUERY_MYSQL_HANDLER_PORT:="3307"}
66
export QUERY_HTTP_HANDLER_PORT=${QUERY_HTTP_HANDLER_PORT:="8000"}

benchmark/tpch/tpch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
echo """
55
INSTALL tpch;
66
LOAD tpch;
7-
SELECT * FROM dsdgen(sf=1); -- sf can be other values, such as 0.1, 1, 10, ...
7+
SELECT * FROM dbgen(sf=1); -- sf can be other values, such as 0.1, 1, 10, ...
88
EXPORT DATABASE '/tmp/tpch_1/' (FORMAT CSV, DELIMITER '|');
99
""" | duckdb
1010

src/query/sql/src/planner/binder/sort.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ impl Binder {
129129
.to_string(),
130130
));
131131
} else {
132-
let rewrite_scalar = self
132+
let mut rewrite_scalar = self
133133
.rewrite_scalar_with_replacement(
134134
bind_context,
135135
&bound_expr,
@@ -148,6 +148,10 @@ impl Binder {
148148
)
149149
.map_err(|e| ErrorCode::SemanticError(e.message()))?;
150150

151+
let mut rewriter =
152+
AggregateRewriter::new(bind_context, self.metadata.clone());
153+
rewriter.visit(&mut rewrite_scalar)?;
154+
151155
if let ScalarExpr::ConstantExpr(..) = rewrite_scalar {
152156
continue;
153157
}

tests/shell_env.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export AWS_ACCESS_KEY_ID=minioadmin
66
export AWS_SECRET_ACCESS_KEY=minioadmin
77
export AWS_EC2_METADATA_DISABLED=true
88

9-
export MYSQL_DATABASE=${MYSQL_DATABASE:="default"}
9+
export QUERY_DATABASE=${QUERY_DATABASE:="default"}
1010
export QUERY_MYSQL_HANDLER_HOST=${QUERY_MYSQL_HANDLER_HOST:="127.0.0.1"}
1111
export QUERY_MYSQL_HANDLER_PORT=${QUERY_MYSQL_HANDLER_PORT:="3307"}
1212
export QUERY_HTTP_HANDLER_PORT=${QUERY_HTTP_HANDLER_PORT:="8000"}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import duckdb, sys
2+
3+
sf = sys.argv[1]
4+
5+
# Initialize a DuckDB instance
6+
con = duckdb.connect(":memory:")
7+
8+
con.install_extension("tpcds")
9+
con.load_extension("tpcds")
10+
# Execute the commands
11+
con.execute(f"CALL dsdgen(sf={sf})")
12+
con.execute(f"EXPORT DATABASE '/tmp/tpcds_{sf}/' (FORMAT CSV, DELIMITER '|')")
13+
14+
# Close the connection
15+
con.close()
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import duckdb, sys
2+
3+
sf = sys.argv[1]
4+
5+
# Initialize a DuckDB instance
6+
con = duckdb.connect(":memory:")
7+
8+
con.install_extension("tpch")
9+
con.load_extension("tpch")
10+
# Execute the commands
11+
con.execute(f"CALL dbgen(sf={sf})")
12+
con.execute(f"EXPORT DATABASE '/tmp/tpch_{sf}/' (FORMAT CSV, DELIMITER '|')")
13+
14+
# Close the connection
15+
con.close()

tests/sqllogictests/scripts/prepare_spill_data.sh

Lines changed: 0 additions & 4 deletions
This file was deleted.

tests/sqllogictests/scripts/prepare_tpcds_data.sh

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# shellcheck disable=SC2034
66
target_dir="tests/sqllogictests/"
77

8-
db="tpcds"
8+
db=${1:-"tpcds"}
9+
910

1011
tables=(
1112
call_center
@@ -34,39 +35,30 @@ tables=(
3435
web_site
3536
)
3637

37-
# Clear Data
38-
# shellcheck disable=SC2068
39-
for t in ${tables[@]}; do
40-
echo "DROP TABLE IF EXISTS ${db}.$t" | $BENDSQL_CLIENT_CONNECT
41-
done
4238

43-
echo "CREATE DATABASE IF NOT EXISTS tpcds" | $BENDSQL_CLIENT_CONNECT
39+
force=${2:-"1"}
40+
if [ "$force" == "0" ]; then
41+
res=`echo "SELECT COUNT() from ${db}.call_center" | $BENDSQL_CLIENT_CONNECT`
42+
if [ "$res" != "0" -a "$res" != "" ]; then
43+
echo "Table $db.call_center already exists and is not empty, size: ${res}. Use force=1 to override it."
44+
exit 0
45+
fi
46+
fi
47+
48+
echo "CREATE OR REPLACE DATABASE tpcds" | $BENDSQL_CLIENT_CONNECT
4449

4550
# Create Tables;
4651
# shellcheck disable=SC2002
4752
cat ${target_dir}/scripts/tpcds.sql | $BENDSQL_CLIENT_CONNECT
48-
49-
# download data
50-
mkdir -p ${target_dir}/data/
51-
if [ ! -d ${target_dir}/data/tpcds.tar.gz ]; then
52-
curl -s -o ${target_dir}/data/tpcds.tar.gz https://ci.databend.com/dataset/stateful/tpcds.tar.gz
53-
fi
54-
55-
tar -zxf ${target_dir}/data/tpcds.tar.gz -C ${target_dir}/data
56-
57-
# insert data to tables
58-
# shellcheck disable=SC2068
53+
python ${target_dir}/scripts/prepare_duckdb_tpcds_data.py 1
5954

6055
stmt "drop stage if exists s1"
61-
stmt "create stage s1 url='fs://${PWD}/${target_dir}/'"
56+
stmt "create stage s1 url='fs:///tmp/tpcds_1/'"
6257

6358
for t in ${tables[@]}; do
6459
echo "$t"
65-
sub_path="data/data/$t.csv"
66-
query "copy into ${db}.${t} from @s1/${sub_path} file_format = (type = CSV skip_header = 0 field_delimiter = '|' record_delimiter = '\n')"
60+
query "copy into ${db}.${t} from @s1/${t}.csv file_format = (type = CSV skip_header = 1 field_delimiter = '|' record_delimiter = '\n')"
6761
query "analyze table $db.$t"
6862
done
6963

70-
if [ -d "tests/sqllogictests/data" ]; then
71-
rm -rf tests/sqllogictests/data
72-
fi
64+
# rm -rf /tmp/tpcds_1

0 commit comments

Comments
 (0)