Skip to content

Commit 47fe873

Browse files
committed
GH-48254: [Python][Parquet] Support extension types in read_schema
1 parent 8ee7aeb commit 47fe873

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

python/pyarrow/parquet/core.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ class ParquetFile:
259259
If nothing passed, will be inferred based on path.
260260
Path will try to be found in the local on-disk filesystem otherwise
261261
it will be parsed as an URI to determine the filesystem.
262+
arrow_extensions_enabled : bool, default True
263+
If True, read Parquet logical types as Arrow extension types where
264+
possible.
262265
page_checksum_verification : bool, default False
263266
If True, verify the checksum for each page read from the file.
264267
arrow_extensions_enabled : bool, default True
@@ -2347,6 +2350,10 @@ def read_metadata(where, memory_map=False, decryption_properties=None,
23472350
If nothing passed, will be inferred based on path.
23482351
Path will try to be found in the local on-disk filesystem otherwise
23492352
it will be parsed as an URI to determine the filesystem.
2353+
arrow_extensions_enabled : bool, default True
2354+
If True, read Parquet logical types as Arrow extension types where
2355+
possible (e.g. UUID as the canonical `arrow.uuid` extension type).
2356+
If False, use the underlying storage types instead.
23502357
23512358
Returns
23522359
-------
@@ -2382,7 +2389,7 @@ def read_metadata(where, memory_map=False, decryption_properties=None,
23822389

23832390

23842391
def read_schema(where, memory_map=False, decryption_properties=None,
2385-
filesystem=None):
2392+
filesystem=None, arrow_extensions_enabled=True):
23862393
"""
23872394
Read effective Arrow schema from Parquet file metadata.
23882395
@@ -2422,11 +2429,15 @@ def read_schema(where, memory_map=False, decryption_properties=None,
24222429

24232430
with file_ctx:
24242431
file = ParquetFile(
2425-
where, memory_map=memory_map,
2426-
decryption_properties=decryption_properties)
2432+
where,
2433+
memory_map=memory_map,
2434+
decryption_properties=decryption_properties,
2435+
arrow_extensions_enabled=arrow_extensions_enabled,
2436+
)
2437+
if arrow_extensions_enabled:
2438+
return file.schema_arrow
24272439
return file.schema.to_arrow_schema()
24282440

2429-
24302441
__all__ = (
24312442
"ColumnChunkMetaData",
24322443
"ColumnSchema",

0 commit comments

Comments
 (0)