diff --git a/dev/dev-requirements.txt b/dev/dev-requirements.txt index 4ed964e..2e59c8a 100755 --- a/dev/dev-requirements.txt +++ b/dev/dev-requirements.txt @@ -28,3 +28,4 @@ pytz>=2018.3 pytest~=7.0 duckdb>=0.5.0,<2.0.0 ray~=2.10.0 +polars~=1.15.0 diff --git a/paimon_python_api/table_read.py b/paimon_python_api/table_read.py index f0a7b59..8a4881d 100644 --- a/paimon_python_api/table_read.py +++ b/paimon_python_api/table_read.py @@ -17,6 +17,7 @@ ################################################################################# import pandas as pd +import polars as pl import pyarrow as pa import ray @@ -41,6 +42,10 @@ def to_arrow_batch_reader(self, splits: List[Split]) -> pa.RecordBatchReader: def to_pandas(self, splits: List[Split]) -> pd.DataFrame: """Read data from splits and converted to pandas.DataFrame format.""" + @abstractmethod + def to_polars(self, splits: List[Split]) -> pl.DataFrame: + """Read data from splits and converted to polars.DataFrame format.""" + @abstractmethod def to_duckdb( self, diff --git a/paimon_python_java/pypaimon.py b/paimon_python_java/pypaimon.py index 803540c..fa1fca0 100644 --- a/paimon_python_java/pypaimon.py +++ b/paimon_python_java/pypaimon.py @@ -18,6 +18,7 @@ import duckdb import pandas as pd +import polars as pl import pyarrow as pa import ray @@ -164,6 +165,9 @@ def to_arrow_batch_reader(self, splits): def to_pandas(self, splits: List[Split]) -> pd.DataFrame: return self.to_arrow(splits).to_pandas() + def to_polars(self, splits: List[Split]) -> pl.DataFrame: + return pl.from_arrow(self.to_arrow(splits)) + def to_duckdb( self, splits: List[Split], diff --git a/paimon_python_java/tests/test_write_and_read.py b/paimon_python_java/tests/test_write_and_read.py index e2c631d..7947cf6 100644 --- a/paimon_python_java/tests/test_write_and_read.py +++ b/paimon_python_java/tests/test_write_and_read.py @@ -22,8 +22,10 @@ import unittest import pandas as pd import pyarrow as pa +import polars as pl from py4j.protocol import Py4JJavaError +from polars import testing as pl_testing from paimon_python_api import Schema from paimon_python_java import Catalog from paimon_python_java.java_gateway import get_gateway @@ -297,6 +299,10 @@ def testAllWriteAndReadApi(self): pd.testing.assert_frame_equal( actual.reset_index(drop=True), all_data.reset_index(drop=True)) + # to_polars + pl_df = table_read.to_polars(splits) + pl_testing.assert_frame_equal(pl_df, pl.from_pandas(all_data)) + # to_duckdb duckdb_con = table_read.to_duckdb(splits, 'duckdb_table') # select *