Merge branch 'm-kovalsky/onelakeapis'

m-kovalsky · m-kovalsky · commit 1ea138bcb766 · 2025-11-18T10:46:46.000+02:00
diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py
@@ -2249,12 +2249,12 @@ def get_token(self, *scopes, **kwargs) -> AccessToken:
     elif client == "fabric_sp":
         token = auth.token_provider.get() or FabricDefaultCredential()
         c = fabric.FabricRestClient(credential=token)
-    elif client in ["azure", "graph"]:
+    elif client in ["azure", "graph", "onelake"]:
         pass
     else:
         raise ValueError(f"{icons.red_dot} The '{client}' client is not supported.")
 
-    if client not in ["azure", "graph"]:
+    if client not in ["azure", "graph", "onelake"]:
         if method == "get":
             response = c.get(request)
         elif method == "delete":
@@ -2268,13 +2268,18 @@ def get_token(self, *scopes, **kwargs) -> AccessToken:
         else:
             raise NotImplementedError
     else:
-        headers = _get_headers(auth.token_provider.get(), audience=client)
-        if client == "graph":
-            url = f"https://graph.microsoft.com/v1.0/{request}"
-        elif client == "azure":
-            url = request
+        if client == "onelake":
+            import notebookutils
+
+            token = notebookutils.credentials.getToken("storage")
+            headers = {"Authorization": f"Bearer {token}"}
+            url = f"https://onelake.table.fabric.microsoft.com/delta/{request}"
         else:
-            raise NotImplementedError
+            headers = _get_headers(auth.token_provider.get(), audience=client)
+            if client == "graph":
+                url = f"https://graph.microsoft.com/v1.0/{request}"
+            elif client == "azure":
+                url = request
         response = requests.request(
             method.upper(),
             url,
diff --git a/src/sempy_labs/lakehouse/__init__.py b/src/sempy_labs/lakehouse/__init__.py
@@ -34,6 +34,10 @@
 from ._materialized_lake_views import (
     refresh_materialized_lake_views,
 )
+from ._schemas import (
+    list_schemas,
+    schema_exists,
+)
 
 __all__ = [
     "get_lakehouse_columns",
@@ -56,4 +60,6 @@
     "load_table",
     "refresh_materialized_lake_views",
     "list_lakehouses",
+    "list_schemas",
+    "schema_exists",
 ]
diff --git a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py
@@ -4,11 +4,9 @@
 from datetime import datetime
 from sempy_labs._helper_functions import (
     _get_column_aggregate,
-    resolve_workspace_name_and_id,
     resolve_lakehouse_name_and_id,
     save_as_delta_table,
-    _base_api,
-    _create_dataframe,
+    resolve_workspace_id,
     _read_delta_table,
     _get_delta_table,
     _mount,
@@ -24,6 +22,7 @@
 import sempy_labs._icons as icons
 from sempy._utils._log import log
 from uuid import UUID
+from sempy_labs.lakehouse._schemas import list_tables
 
 
 @log
@@ -70,84 +69,14 @@ def get_lakehouse_tables(
         Shows the tables/columns within a lakehouse and their properties.
     """
 
-    columns = {
-        "Workspace Name": "string",
-        "Lakehouse Name": "string",
-        "Schema Name": "string",
-        "Table Name": "string",
-        "Format": "string",
-        "Type": "string",
-        "Location": "string",
-    }
-    df = _create_dataframe(columns=columns)
-
-    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    workspace_id = resolve_workspace_id(workspace)
     (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id(
         lakehouse=lakehouse, workspace=workspace_id
     )
 
-    # Test if valid lakehouse:
-    x = _base_api(f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}")
+    df = list_tables(lakehouse=lakehouse, workspace=workspace)
 
-    if count_rows:  # Setting countrows defaults to extended=True
-        extended = True
-
-    API_called = True
-    try:
-        responses = _base_api(
-            request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables",
-            uses_pagination=True,
-            client="fabric_sp",
-        )
-
-    except Exception:
-        API_called = False
-
-    rows = []
-    local_path = None
-    if API_called:
-        if not responses[0].get("data"):
-            return df
-
-        for r in responses:
-            for i in r.get("data", []):
-                rows.append(
-                    {
-                        "Workspace Name": workspace_name,
-                        "Lakehouse Name": lakehouse_name,
-                        "Schema Name": "",
-                        "Table Name": i.get("name"),
-                        "Format": i.get("format"),
-                        "Type": i.get("type"),
-                        "Location": i.get("location"),
-                    }
-                )
-    else:
-        local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
-        tables_path = os.path.join(local_path, "Tables")
-        list_schema = os.listdir(tables_path)
-
-        for schema_name in list_schema:
-            schema_table_path = os.path.join(local_path, "Tables", schema_name)
-            list_tables = os.listdir(schema_table_path)
-            for table_name in list_tables:
-                location_path = create_abfss_path(
-                    lakehouse_id, workspace_id, table_name, schema_name
-                )
-                rows.append(
-                    {
-                        "Workspace Name": workspace_name,
-                        "Lakehouse Name": lakehouse_name,
-                        "Schema Name": schema_name,
-                        "Table Name": table_name,
-                        "Format": "delta",
-                        "Type": "Managed",
-                        "Location": location_path,
-                    }
-                )
-
-    if rows:
-        df = pd.DataFrame(rows, columns=list(columns.keys()))
+    local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id)
 
     if extended:
         sku_value = get_sku_size(workspace_id)
@@ -161,7 +90,6 @@ def get_lakehouse_tables(
             df["Row Count"] = None
 
         for i, r in df.iterrows():
-            use_schema = True
             schema_name = r["Schema Name"]
             table_name = r["Table Name"]
             if r["Type"] == "Managed" and r["Format"] == "delta":
diff --git a/src/sempy_labs/lakehouse/_schemas.py b/src/sempy_labs/lakehouse/_schemas.py
@@ -0,0 +1,185 @@
+from typing import Optional, List
+from uuid import UUID
+from sempy._utils._log import log
+import pandas as pd
+from sempy_labs._helper_functions import (
+    resolve_lakehouse_name_and_id,
+    resolve_workspace_id,
+    resolve_lakehouse_id,
+    _create_dataframe,
+    _base_api,
+    resolve_workspace_name_and_id,
+)
+import sempy_labs._icons as icons
+
+
+@log
+def list_schemas(
+    lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None
+) -> pd.DataFrame:
+    """
+    Lists the schemas within a Fabric lakehouse.
+
+    Parameters
+    ----------
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+
+    Returns
+    -------
+    pandas.DataFrame
+        Shows the schemas within a lakehouse.
+    """
+
+    columns = {
+        "Schema Name": "str",
+    }
+    df = _create_dataframe(columns=columns)
+    workspace_id = resolve_workspace_id(workspace)
+    item_id = resolve_lakehouse_id(lakehouse, workspace)
+    response = _base_api(
+        request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/schemas?catalog_name={item_id}",
+        client="onelake",
+    )
+
+    rows = []
+    for s in response.json().get("schemas", []):
+        rows.append(
+            {
+                "Schema Name": s.get("name", None),
+            }
+        )
+
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+
+    return df
+
+
+def list_tables(
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+    schema: Optional[str | List[str]] = None,
+) -> pd.DataFrame:
+
+    (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    (item_name, item_id) = resolve_lakehouse_name_and_id(lakehouse, workspace)
+
+    response = _base_api(f"/v1/workspaces/{workspace_id}/lakehouses/{item_id}")
+    default_schema = response.json().get("properties", {}).get("defaultSchema", None)
+    schema_enabled = True if default_schema else False
+
+    columns = {
+        "Workspace Name": "str",
+        "Lakehouse Name": "str",
+        "Table Name": "str",
+        "Schema Name": "str",
+        "Format": "str",
+        "Type": "str",
+        "Location": "str",
+    }
+    df = _create_dataframe(columns=columns)
+
+    rows = []
+    if schema_enabled:
+        schemas = list_schemas(lakehouse=lakehouse, workspace=workspace)
+        if schema:
+            if isinstance(schema, str):
+                schema = [schema]
+            schemas = schemas[schemas["Schema Name"].isin(schema)]
+
+        # Loop through schemas
+        for _, r in schemas.iterrows():
+            schema_name = r["Schema Name"]
+            response = _base_api(
+                request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/tables?catalog_name={item_id}&schema_name={schema_name}",
+                client="onelake",
+            )
+            # Loop through tables
+            for t in response.json().get("tables", []):
+                location = t.get("storage_location", {})
+                location = f'abfss://{location.split(".microsoft.com/")[1]}'
+                rows.append(
+                    {
+                        "Workspace Name": workspace_name,
+                        "Lakehouse Name": item_name,
+                        "Table Name": t.get("name", {}),
+                        "Schema Name": schema_name,
+                        "Format": t.get("data_source_format", {}).capitalize(),
+                        "Type": "Managed",
+                        "Location": location,
+                    }
+                )
+    else:
+        if schema:
+            print(
+                f"{icons.info} The schema parameter has been ignored as the '{item_name}' lakehouse within the '{workspace_name}' workspace has schemas disabled."
+            )
+        responses = _base_api(
+            request=f"v1/workspaces/{workspace_id}/lakehouses/{item_id}/tables",
+            uses_pagination=True,
+            client="fabric_sp",
+        )
+        for r in responses:
+            for i in r.get("data", []):
+                rows.append(
+                    {
+                        "Workspace Name": workspace_name,
+                        "Lakehouse Name": item_name,
+                        "Schema Name": None,
+                        "Table Name": i.get("name"),
+                        "Format": i.get("format"),
+                        "Type": i.get("type"),
+                        "Location": i.get("location"),
+                    }
+                )
+
+    if rows:
+        df = pd.DataFrame(rows, columns=list(columns.keys()))
+
+    return df
+
+
+def schema_exists(
+    schema: str,
+    lakehouse: Optional[str | UUID] = None,
+    workspace: Optional[str | UUID] = None,
+) -> bool:
+    """
+    Indicates whether the specified schema exists within a Fabric lakehouse.
+
+    Parameters
+    ----------
+    schema : str
+        The name of the schema.
+    lakehouse : str | uuid.UUID, default=None
+        The Fabric lakehouse name or ID.
+        Defaults to None which resolves to the lakehouse attached to the notebook.
+    workspace : str | uuid.UUID, default=None
+        The Fabric workspace name or ID used by the lakehouse.
+        Defaults to None which resolves to the workspace of the attached lakehouse
+        or if no lakehouse attached, resolves to the workspace of the notebook.
+
+    Returns
+    -------
+    bool
+        Indicates whether the specified schema exists within the lakehouse.
+    """
+
+    df = list_schemas(lakehouse=lakehouse, workspace=workspace)
+    return schema in df["Schema Name"].values
+
+    # (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)
+    # (item_name, item_id) = resolve_lakehouse_name_and_id(lakehouse, workspace)
+    # response = _base_api(
+    #    request=f"{workspace_id}/{item_id}/api/2.1/unity-catalog/schemas/{schema}",
+    #    client="onelake",
+    #    method="head",
+    # )
+
+    # response.json()