Skip to content

Commit 4048a27

Browse files
committed
updated get_lakehouse_tables to use list_tables
1 parent 18c196e commit 4048a27

File tree

3 files changed

+52
-78
lines changed

3 files changed

+52
-78
lines changed

src/sempy_labs/lakehouse/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
from ._materialized_lake_views import (
3535
refresh_materialized_lake_views,
3636
)
37+
from ._schemas import (
38+
list_schemas,
39+
schema_exists,
40+
)
3741

3842
__all__ = [
3943
"get_lakehouse_columns",
@@ -56,4 +60,6 @@
5660
"load_table",
5761
"refresh_materialized_lake_views",
5862
"list_lakehouses",
63+
"list_schemas",
64+
"schema_exists",
5965
]

src/sempy_labs/lakehouse/_get_lakehouse_tables.py

Lines changed: 5 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,9 @@
44
from datetime import datetime
55
from sempy_labs._helper_functions import (
66
_get_column_aggregate,
7-
resolve_workspace_name_and_id,
87
resolve_lakehouse_name_and_id,
98
save_as_delta_table,
10-
_base_api,
11-
_create_dataframe,
9+
resolve_workspace_id,
1210
_read_delta_table,
1311
_get_delta_table,
1412
_mount,
@@ -24,6 +22,7 @@
2422
import sempy_labs._icons as icons
2523
from sempy._utils._log import log
2624
from uuid import UUID
25+
from sempy_labs.lakehouse._schemas import list_tables
2726

2827

2928
@log
@@ -70,84 +69,14 @@ def get_lakehouse_tables(
7069
Shows the tables/columns within a lakehouse and their properties.
7170
"""
7271

73-
columns = {
74-
"Workspace Name": "string",
75-
"Lakehouse Name": "string",
76-
"Schema Name": "string",
77-
"Table Name": "string",
78-
"Format": "string",
79-
"Type": "string",
80-
"Location": "string",
81-
}
82-
df = _create_dataframe(columns=columns)
83-
84-
(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
72+
workspace_id = resolve_workspace_id(workspace)
8573
(lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
8674
lakehouse=lakehouse, workspace=workspace_id
8775
)
8876

89-
# Test if valid lakehouse:
90-
x = _base_api(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}")
77+
df = list_tables(lakehouse=lakehouse, workspace=workspace)
9178

92-
if count_rows: # Setting countrows defaults to extended=True
93-
extended = True
94-
95-
API_called = True
96-
try:
97-
responses = _base_api(
98-
request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
99-
uses_pagination=True,
100-
client="fabric_sp",
101-
)
102-
103-
except Exception:
104-
API_called = False
105-
106-
rows = []
107-
local_path = None
108-
if API_called:
109-
if not responses[0].get("data"):
110-
return df
111-
112-
for r in responses:
113-
for i in r.get("data", []):
114-
rows.append(
115-
{
116-
"Workspace Name": workspace_name,
117-
"Lakehouse Name": lakehouse_name,
118-
"Schema Name": "",
119-
"Table Name": i.get("name"),
120-
"Format": i.get("format"),
121-
"Type": i.get("type"),
122-
"Location": i.get("location"),
123-
}
124-
)
125-
else:
126-
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
127-
tables_path = os.path.join(local_path, "Tables")
128-
list_schema = os.listdir(tables_path)
129-
130-
for schema_name in list_schema:
131-
schema_table_path = os.path.join(local_path, "Tables", schema_name)
132-
list_tables = os.listdir(schema_table_path)
133-
for table_name in list_tables:
134-
location_path = create_abfss_path(
135-
lakehouse_id, workspace_id, table_name, schema_name
136-
)
137-
rows.append(
138-
{
139-
"Workspace Name": workspace_name,
140-
"Lakehouse Name": lakehouse_name,
141-
"Schema Name": schema_name,
142-
"Table Name": table_name,
143-
"Format": "delta",
144-
"Type": "Managed",
145-
"Location": location_path,
146-
}
147-
)
148-
149-
if rows:
150-
df = pd.DataFrame(rows, columns=list(columns.keys()))
79+
local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
15180

15281
if extended:
15382
sku_value = get_sku_size(workspace_id)
@@ -161,7 +90,6 @@ def get_lakehouse_tables(
16190
df["Row Count"] = None
16291

16392
for i, r in df.iterrows():
164-
use_schema = True
16593
schema_name = r["Schema Name"]
16694
table_name = r["Table Name"]
16795
if r["Type"] == "Managed" and r["Format"] == "delta":

src/sempy_labs/lakehouse/_schemas.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,24 @@
1717
def list_schemas(
1818
lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
1919
) -> pd.DataFrame:
20+
"""
21+
Lists the schemas within a Fabric lakehouse.
22+
23+
Parameters
24+
----------
25+
lakehouse : str | uuid.UUID, default=None
26+
The Fabric lakehouse name or ID.
27+
Defaults to None which resolves to the lakehouse attached to the notebook.
28+
workspace : str | uuid.UUID, default=None
29+
The Fabric workspace name or ID used by the lakehouse.
30+
Defaults to None which resolves to the workspace of the attached lakehouse
31+
or if no lakehouse attached, resolves to the workspace of the notebook.
32+
33+
Returns
34+
-------
35+
pandas.DataFrame
36+
Shows the schemas within a lakehouse.
37+
"""
2038

2139
columns = {
2240
"Schema Name": "str",
@@ -84,6 +102,8 @@ def list_tables(
84102
)
85103
# Loop through tables
86104
for t in response.json().get("tables", []):
105+
location = t.get("storage_location", {})
106+
location = f'abfss://{location.split(".microsoft.com/")[1]}'
87107
rows.append(
88108
{
89109
"Workspace Name": workspace_name,
@@ -92,7 +112,7 @@ def list_tables(
92112
"Schema Name": schema_name,
93113
"Format": t.get("data_source_format", {}).capitalize(),
94114
"Type": "Managed",
95-
"Location": t.get("storage_location", {}),
115+
"Location": location,
96116
}
97117
)
98118
else:
@@ -130,6 +150,26 @@ def schema_exists(
130150
lakehouse: Optional[str | UUID] = None,
131151
workspace: Optional[str | UUID] = None,
132152
) -> bool:
153+
"""
154+
Indicates whether the specified schema exists within a Fabric lakehouse.
155+
156+
Parameters
157+
----------
158+
schema : str
159+
The name of the schema.
160+
lakehouse : str | uuid.UUID, default=None
161+
The Fabric lakehouse name or ID.
162+
Defaults to None which resolves to the lakehouse attached to the notebook.
163+
workspace : str | uuid.UUID, default=None
164+
The Fabric workspace name or ID used by the lakehouse.
165+
Defaults to None which resolves to the workspace of the attached lakehouse
166+
or if no lakehouse attached, resolves to the workspace of the notebook.
167+
168+
Returns
169+
-------
170+
bool
171+
Indicates whether the specified schema exists within the lakehouse.
172+
"""
133173

134174
df = list_schemas(lakehouse=lakehouse, workspace=workspace)
135175
return schema in df["Schema Name"].values

0 commit comments

Comments
 (0)