33different database connectors.
44"""
55
6- import re
76
87import pandas as pd
9- import sqlglot
108from dagster import get_dagster_logger
119
1210from anomstack .df .utils import log_df_info
13- from anomstack .external .duckdb .duckdb import read_sql_duckdb
11+ from anomstack .external .duckdb .duckdb import read_sql_duckdb , run_sql_duckdb
1412from anomstack .external .gcp .bigquery import read_sql_bigquery
1513from anomstack .external .snowflake .snowflake import read_sql_snowflake
16- from anomstack .external .sqlite .sqlite import read_sql_sqlite
14+ from anomstack .external .sqlite .sqlite import read_sql_sqlite , run_sql_sqlite
15+ from anomstack .sql .translate import db_translate
1716
1817pd .options .display .max_columns = 10
1918
2019
21- def db_translate (sql : str , db : str ) -> str :
22- """
23- Replace some functions with their db-specific equivalents.
24-
25- Args:
26- sql (str): The SQL query to be translated.
27- db (str): The name of the database to which the query will be sent.
28-
29- Returns:
30- str: The translated SQL query.
31- """
32- # Transpile the SQL query to the target database dialect
33- sql = sqlglot .transpile (sql , write = db , identify = True , pretty = True )[0 ]
34- # Replace some functions with their db-specific equivalents
35- if db == "sqlite" :
36- sql = sql .replace ("GET_CURRENT_TIMESTAMP()" , "DATETIME('now')" )
37- elif db == "bigquery" :
38- sql = sql .replace ("GET_CURRENT_TIMESTAMP()" , "CURRENT_TIMESTAMP()" )
39- sql = re .sub (
40- r"DATE\('now', '(-?\d+) day'\)" ,
41- "DATE_ADD(CURRENT_DATE(), INTERVAL \\ 1 DAY)" ,
42- sql
43- )
44-
45- return sql
46-
47-
48- def read_sql (sql : str , db : str ) -> pd .DataFrame :
20+ def read_sql (sql : str , db : str , returns_df : bool = True ) -> pd .DataFrame :
4921 """
5022 Read data from SQL.
5123
5224 Args:
5325 sql (str): SQL query to execute.
5426 db (str): Name of the database to connect to.
27+ returns_df (bool, optional): Whether the query expects a DataFrame as a result.
5528
5629 Returns:
5730 pd.DataFrame: A pandas DataFrame containing the results of the SQL query.
@@ -64,13 +37,31 @@ def read_sql(sql: str, db: str) -> pd.DataFrame:
6437 logger .debug (f"-- read_sql() is about to read this qry:\n { sql } " )
6538
6639 if db == "bigquery" :
67- df = read_sql_bigquery (sql )
40+ if returns_df :
41+ df = read_sql_bigquery (sql )
42+ elif not returns_df :
43+ raise NotImplementedError (
44+ "BigQuery not yet implemented for non-returns_df queries."
45+ )
6846 elif db == "snowflake" :
69- df = read_sql_snowflake (sql )
47+ if returns_df :
48+ df = read_sql_snowflake (sql )
49+ elif not returns_df :
50+ raise NotImplementedError (
51+ "Snowflake not yet implemented for non-returns_df queries."
52+ )
7053 elif db == "duckdb" :
71- df = read_sql_duckdb (sql )
54+ if returns_df :
55+ df = read_sql_duckdb (sql )
56+ elif not returns_df :
57+ run_sql_duckdb (sql )
58+ df = pd .DataFrame ()
7259 elif db == "sqlite" :
73- df = read_sql_sqlite (sql )
60+ if returns_df :
61+ df = read_sql_sqlite (sql )
62+ elif not returns_df :
63+ run_sql_sqlite (sql )
64+ df = pd .DataFrame ()
7465 else :
7566 raise ValueError (f"Unknown db: { db } " )
7667
0 commit comments