Skip to content

Commit 319c98e

Browse files
authored
Merge pull request #135 from andrewm4894/add-clickhouse
Add clickhouse
2 parents 0b2c173 + de93bc4 commit 319c98e

File tree

8 files changed

+151
-1
lines changed

8 files changed

+151
-1
lines changed

.example.env

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,10 @@ ANOMSTACK_MOTHERDUCK_TOKEN=
111111
# turso related env vars
112112
ANOMSTACK_TURSO_DATABASE_URL=
113113
ANOMSTACK_TURSO_AUTH_TOKEN=
114+
115+
# clickhouse related env vars
116+
ANOMSTACK_CLICKHOUSE_HOST=localhost
117+
ANOMSTACK_CLICKHOUSE_PORT=8123
118+
ANOMSTACK_CLICKHOUSE_USER=anomstack
119+
ANOMSTACK_CLICKHOUSE_PASSWORD=anomstack
120+
ANOMSTACK_CLICKHOUSE_DATABASE=default

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ Supported sources and databases for your metrics to live in and be queried from:
4747
<th align="center"><a href="./anomstack/fn/run.py" target="_blank">Python</a></th>
4848
<th align="center"><a href="./anomstack/external/gcp/bigquery.py" target="_blank">BigQuery</a></th>
4949
<th align="center"><a href="./anomstack/external/snowflake/snowflake.py" target="_blank">Snowflake</a></th>
50+
<th align="center"><a href="./anomstack/external/clickhouse/clickhouse.py" target="_blank">ClickHouse</a></th>
5051
<th align="center"><a href="./anomstack/external/duckdb/duckdb.py" target="_blank">DuckDB</a></th>
5152
<th align="center"><a href="./anomstack/external/sqlite/sqlite.py" target="_blank">SQLite</a></th>
5253
<th align="center"><a href="./anomstack/external/duckdb/duckdb.py" target="_blank">MotherDuck</a></th>
@@ -63,6 +64,7 @@ Supported sources and databases for your metrics to live in and be queried from:
6364
<td align="center">✅</td>
6465
<td align="center">✅</td>
6566
<td align="center">✅</td>
67+
<td align="center">✅</td>
6668
<td align="center">🚧</td>
6769
</tr>
6870
</tbody>

anomstack/df/save.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from anomstack.external.gcp.bigquery import save_df_bigquery
99
from anomstack.external.snowflake.snowflake import save_df_snowflake
1010
from anomstack.external.sqlite.sqlite import save_df_sqlite
11-
11+
from anomstack.external.clickhouse.clickhouse import save_df_clickhouse
1212

1313
def save_df(
1414
df: pd.DataFrame, db: str, table_key: str, if_exists: str = "append"
@@ -35,6 +35,8 @@ def save_df(
3535
df = save_df_duckdb(df, table_key)
3636
elif db == "sqlite":
3737
df = save_df_sqlite(df, table_key)
38+
elif db == "clickhouse":
39+
df = save_df_clickhouse(df, table_key)
3840
else:
3941
raise ValueError(f"Unknown db: {db}")
4042

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
"""
2+
Some helper functions for ClickHouse using clickhouse-connect.
3+
"""
4+
5+
import os
6+
import pandas as pd
7+
from dagster import get_dagster_logger
8+
from clickhouse_connect import get_client
9+
10+
11+
def map_dtype(dtype) -> str:
12+
"""
13+
Map a Pandas dtype to a ClickHouse data type.
14+
"""
15+
if pd.api.types.is_integer_dtype(dtype):
16+
return "Int64"
17+
elif pd.api.types.is_float_dtype(dtype):
18+
return "Float64"
19+
elif pd.api.types.is_bool_dtype(dtype):
20+
return "UInt8"
21+
elif pd.api.types.is_datetime64_any_dtype(dtype):
22+
return "DateTime"
23+
else:
24+
return "String"
25+
26+
27+
def get_clickhouse_client():
28+
"""
29+
Create a ClickHouse client using environment variables for connection parameters.
30+
31+
Returns:
32+
ClickHouseClient: Configured ClickHouse client instance
33+
"""
34+
logger = get_dagster_logger()
35+
36+
host = os.environ.get("ANOMSTACK_CLICKHOUSE_HOST", "localhost")
37+
port = int(os.environ.get("ANOMSTACK_CLICKHOUSE_PORT", "8123"))
38+
user = os.environ.get("ANOMSTACK_CLICKHOUSE_USER", "anomstack")
39+
password = os.environ.get("ANOMSTACK_CLICKHOUSE_PASSWORD", "anomstack")
40+
database = os.environ.get("ANOMSTACK_CLICKHOUSE_DATABASE", "default")
41+
42+
logger.info(f"ClickHouse connection: {host}:{port}/{database}")
43+
44+
return get_client(
45+
host=host,
46+
port=port,
47+
username=user,
48+
password=password,
49+
database=database
50+
)
51+
52+
53+
def read_sql_clickhouse(sql: str) -> pd.DataFrame:
54+
"""
55+
Read data from ClickHouse using an SQL query.
56+
57+
Args:
58+
sql (str): The SQL query to execute.
59+
60+
Returns:
61+
pd.DataFrame: The result of the SQL query as a pandas DataFrame.
62+
"""
63+
client = get_clickhouse_client()
64+
result = client.query(sql)
65+
66+
return pd.DataFrame(result.result_set, columns=result.column_names)
67+
68+
69+
def save_df_clickhouse(df: pd.DataFrame, table_key: str) -> pd.DataFrame:
70+
"""
71+
Save a Pandas DataFrame to ClickHouse.
72+
73+
Args:
74+
df (pd.DataFrame): The DataFrame to save.
75+
table_key (str): The table name to save the DataFrame as.
76+
77+
Returns:
78+
pd.DataFrame: The input DataFrame.
79+
"""
80+
client = get_clickhouse_client()
81+
# Convert the DataFrame to a list of rows and extract column names.
82+
data = df.values.tolist()
83+
columns = list(df.columns)
84+
85+
try:
86+
client.insert(table=table_key, data=data, column_names=columns)
87+
except Exception as e:
88+
logger = get_dagster_logger()
89+
logger.info(
90+
f"Table {table_key} may not exist. Attempting to create table. Error: {e}"
91+
)
92+
# Construct a CREATE TABLE statement based on the DataFrame schema.
93+
columns_defs = []
94+
for col, dtype in df.dtypes.items():
95+
ch_type = map_dtype(dtype)
96+
# Use backticks around column names in case of reserved words.
97+
columns_defs.append(f"`{col}` {ch_type}")
98+
columns_str = ", ".join(columns_defs)
99+
create_sql = (
100+
f"CREATE TABLE IF NOT EXISTS {table_key} ({columns_str}) "
101+
"ENGINE = MergeTree() ORDER BY tuple()"
102+
)
103+
client.command(create_sql)
104+
# Insert the data after creating the table.
105+
client.insert(table=table_key, data=data, column_names=columns)
106+
107+
return df
108+
109+
110+
def run_sql_clickhouse(sql: str) -> None:
111+
"""
112+
Execute a non-returning SQL statement in ClickHouse.
113+
114+
Args:
115+
sql (str): The SQL statement to execute.
116+
117+
Returns:
118+
None
119+
"""
120+
client = get_clickhouse_client()
121+
try:
122+
client.command(sql)
123+
except Exception as e:
124+
logger = get_dagster_logger()
125+
logger.error(f"Error executing SQL statement in ClickHouse: {e}")
126+
raise

anomstack/sql/read.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
from anomstack.external.gcp.bigquery import read_sql_bigquery
1313
from anomstack.external.snowflake.snowflake import read_sql_snowflake
1414
from anomstack.external.sqlite.sqlite import read_sql_sqlite, run_sql_sqlite
15+
from anomstack.external.clickhouse.clickhouse import (
16+
read_sql_clickhouse,
17+
run_sql_clickhouse,
18+
)
1519
from anomstack.sql.translate import db_translate
1620

1721
pd.options.display.max_columns = 10
@@ -62,6 +66,12 @@ def read_sql(sql: str, db: str, returns_df: bool = True) -> pd.DataFrame:
6266
elif not returns_df:
6367
run_sql_sqlite(sql)
6468
df = pd.DataFrame()
69+
elif db == "clickhouse":
70+
if returns_df:
71+
df = read_sql_clickhouse(sql)
72+
elif not returns_df:
73+
run_sql_clickhouse(sql)
74+
df = pd.DataFrame()
6575
else:
6676
raise ValueError(f"Unknown db: {db}")
6777

metrics/examples/python/python_ingest_simple/python_ingest_simple.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
db: "clickhouse"
12
metric_batch: "python_ingest_simple"
23
alert_methods: "email,slack"
34
ingest_cron_schedule: "*/2 * * * *"

requirements.compile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
anomaly-agent
22
boto3
3+
clickhouse-connect
34
dagit
45
dagster
56
dagster-docker

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
anomaly-agent
22
boto3
3+
clickhouse-connect
34
dagit
45
dagster
56
dagster-docker

0 commit comments

Comments
 (0)