Skip to content

Commit ce973e5

Browse files
authored
Merge pull request #733 from sfu-db/memleak
fix memory leak
2 parents a72a83c + 0f98a95 commit ce973e5

File tree

10 files changed

+88
-101
lines changed

10 files changed

+88
-101
lines changed

connectorx-python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ license = "MIT"
1818
maintainers = ["Weiyuan Wu <[email protected]>"]
1919
name = "connectorx"
2020
readme = "README.md" # Markdown files are supported
21+
version = "0.4.1-alpha1"
2122

2223
[project]
2324
name = "connectorx" # Target file name of maturin build
2425
readme = "README.md"
2526
license = { text = "MIT" }
2627
requires-python = ">=3.10"
27-
version = "0.4.1-alpha1"
28+
dynamic = ["version"]
2829

2930
[tool.poetry.dependencies]
3031
dask = {version = "^2021", optional = true, extras = ["dataframe"]}

connectorx-python/src/pandas/destination.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use super::{
22
pandas_columns::{
3-
ArrayBlock, BooleanBlock, BytesBlock, DateTimeBlock, Float64Block, HasPandasColumn,
4-
Int64Block, PandasColumn, PandasColumnObject, PyBytes, StringBlock,
3+
ArrayBlock, BooleanBlock, BytesBlock, DateTimeBlock, ExtractBlockFromBound, Float64Block,
4+
HasPandasColumn, Int64Block, PandasColumn, PandasColumnObject, PyBytes, StringBlock,
55
},
66
pystring::PyString,
77
typesystem::{PandasArrayType, PandasBlockType, PandasTypeSystem},
@@ -215,7 +215,7 @@ impl<'py> Destination for PandasDestination<'py> {
215215
let buf = &self.block_datas[idx];
216216
match block.dt {
217217
PandasBlockType::Boolean(_) => {
218-
let bblock = buf.extract::<BooleanBlock>()?;
218+
let bblock = BooleanBlock::extract_block(buf)?;
219219

220220
let bcols = bblock.split()?;
221221
for (&cid, bcol) in block.cids.iter().zip_eq(bcols) {
@@ -227,7 +227,7 @@ impl<'py> Destination for PandasDestination<'py> {
227227
}
228228
}
229229
PandasBlockType::Float64 => {
230-
let fblock = buf.extract::<Float64Block>()?;
230+
let fblock = Float64Block::extract_block(buf)?;
231231
let fcols = fblock.split()?;
232232
for (&cid, fcol) in block.cids.iter().zip_eq(fcols) {
233233
partitioned_columns[cid] = fcol
@@ -238,7 +238,7 @@ impl<'py> Destination for PandasDestination<'py> {
238238
}
239239
}
240240
PandasBlockType::BooleanArray => {
241-
let bblock = buf.extract::<ArrayBlock<bool>>()?;
241+
let bblock = ArrayBlock::<bool>::extract_block(buf)?;
242242
let bcols = bblock.split()?;
243243
for (&cid, bcol) in block.cids.iter().zip_eq(bcols) {
244244
partitioned_columns[cid] = bcol
@@ -249,7 +249,7 @@ impl<'py> Destination for PandasDestination<'py> {
249249
}
250250
}
251251
PandasBlockType::Float64Array => {
252-
let fblock = buf.extract::<ArrayBlock<f64>>()?;
252+
let fblock = ArrayBlock::<f64>::extract_block(buf)?;
253253
let fcols = fblock.split()?;
254254
for (&cid, fcol) in block.cids.iter().zip_eq(fcols) {
255255
partitioned_columns[cid] = fcol
@@ -260,7 +260,7 @@ impl<'py> Destination for PandasDestination<'py> {
260260
}
261261
}
262262
PandasBlockType::Int64Array => {
263-
let fblock = buf.extract::<ArrayBlock<i64>>()?;
263+
let fblock = ArrayBlock::<i64>::extract_block(buf)?;
264264
let fcols = fblock.split()?;
265265
for (&cid, fcol) in block.cids.iter().zip_eq(fcols) {
266266
partitioned_columns[cid] = fcol
@@ -271,7 +271,7 @@ impl<'py> Destination for PandasDestination<'py> {
271271
}
272272
}
273273
PandasBlockType::Int64(_) => {
274-
let ublock = buf.extract::<Int64Block>()?;
274+
let ublock = Int64Block::extract_block(buf)?;
275275
let ucols = ublock.split()?;
276276
for (&cid, ucol) in block.cids.iter().zip_eq(ucols) {
277277
partitioned_columns[cid] = ucol
@@ -282,7 +282,7 @@ impl<'py> Destination for PandasDestination<'py> {
282282
}
283283
}
284284
PandasBlockType::String => {
285-
let sblock = buf.extract::<StringBlock>()?;
285+
let sblock = StringBlock::extract_block(buf)?;
286286
let scols = sblock.split()?;
287287
for (&cid, scol) in block.cids.iter().zip_eq(scols) {
288288
partitioned_columns[cid] = scol
@@ -293,7 +293,7 @@ impl<'py> Destination for PandasDestination<'py> {
293293
}
294294
}
295295
PandasBlockType::Bytes => {
296-
let bblock = buf.extract::<BytesBlock>()?;
296+
let bblock = BytesBlock::extract_block(buf)?;
297297
let bcols = bblock.split()?;
298298
for (&cid, bcol) in block.cids.iter().zip_eq(bcols) {
299299
partitioned_columns[cid] = bcol
@@ -304,7 +304,7 @@ impl<'py> Destination for PandasDestination<'py> {
304304
}
305305
}
306306
PandasBlockType::DateTime => {
307-
let dblock = buf.extract::<DateTimeBlock>()?;
307+
let dblock = DateTimeBlock::extract_block(buf)?;
308308
let dcols = dblock.split()?;
309309
for (&cid, dcol) in block.cids.iter().zip_eq(dcols) {
310310
partitioned_columns[cid] = dcol

connectorx-python/src/pandas/pandas_columns/array.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject, GIL_MUTEX};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
GIL_MUTEX,
4+
};
25
use crate::errors::ConnectorXPythonError;
36
use anyhow::anyhow;
47
use fehler::throws;
58
use ndarray::{ArrayViewMut2, Axis, Ix2};
6-
use numpy::{Element, PyArray, PyArrayDescr};
7-
use pyo3::{Bound, FromPyObject, Py, PyAny, PyResult, Python, ToPyObject};
9+
use numpy::{Element, PyArray, PyArrayDescr, PyArrayMethods};
10+
use pyo3::{types::PyAnyMethods, Bound, Py, PyAny, PyResult, Python, ToPyObject};
811
use std::any::TypeId;
912
use std::marker::PhantomData;
1013

@@ -30,8 +33,8 @@ pub struct ArrayBlock<'a, V> {
3033
_value_type: PhantomData<V>,
3134
}
3235

33-
impl<'a, V> FromPyObject<'a> for ArrayBlock<'a, V> {
34-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
36+
impl<'a, V> ExtractBlockFromBound<'a> for ArrayBlock<'a, V> {
37+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
3538
check_dtype(ob, "object")?;
3639
let array = ob.downcast::<PyArray<PyList, Ix2>>()?;
3740
let data = unsafe { array.as_array_mut() };
@@ -41,10 +44,6 @@ impl<'a, V> FromPyObject<'a> for ArrayBlock<'a, V> {
4144
_value_type: PhantomData,
4245
})
4346
}
44-
45-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
46-
Self::extract(ob.clone().into_gil_ref())
47-
}
4847
}
4948

5049
impl<'a, V> ArrayBlock<'a, V> {

connectorx-python/src/pandas/pandas_columns/boolean.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
};
24
use crate::errors::ConnectorXPythonError;
35
use anyhow::anyhow;
46
use fehler::throws;
57
use ndarray::{ArrayViewMut1, ArrayViewMut2, Axis, Ix2};
6-
use numpy::{PyArray, PyArray1};
7-
use pyo3::{types::PyTuple, FromPyObject, PyAny, PyResult};
8+
use numpy::{PyArray, PyArray1, PyArrayMethods};
9+
use pyo3::{
10+
types::{PyAnyMethods, PyTuple, PyTupleMethods},
11+
PyAny, PyResult,
12+
};
813
use std::any::TypeId;
914

1015
// Boolean
1116
pub enum BooleanBlock<'a> {
1217
NumPy(ArrayViewMut2<'a, bool>),
1318
Extention(ArrayViewMut1<'a, bool>, ArrayViewMut1<'a, bool>),
1419
}
15-
impl<'a> FromPyObject<'a> for BooleanBlock<'a> {
16-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
20+
21+
impl<'a> ExtractBlockFromBound<'a> for BooleanBlock<'a> {
22+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
1723
if let Ok(array) = ob.downcast::<PyArray<bool, Ix2>>() {
1824
// if numpy array
1925
check_dtype(ob, "bool")?;
@@ -22,8 +28,8 @@ impl<'a> FromPyObject<'a> for BooleanBlock<'a> {
2228
} else {
2329
// if extension array
2430
let tuple = ob.downcast::<PyTuple>()?;
25-
let data = tuple.get_item(0)?;
26-
let mask = tuple.get_item(1)?;
31+
let data = tuple.as_slice().get(0).unwrap();
32+
let mask = tuple.as_slice().get(1).unwrap();
2733
check_dtype(data, "bool")?;
2834
check_dtype(mask, "bool")?;
2935

@@ -33,10 +39,6 @@ impl<'a> FromPyObject<'a> for BooleanBlock<'a> {
3339
))
3440
}
3541
}
36-
37-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
38-
Self::extract(ob.clone().into_gil_ref())
39-
}
4042
}
4143

4244
impl<'a> BooleanBlock<'a> {

connectorx-python/src/pandas/pandas_columns/bytes.rs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject, GIL_MUTEX};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
GIL_MUTEX,
4+
};
25
use crate::errors::ConnectorXPythonError;
36
use anyhow::anyhow;
47
use fehler::throws;
58
use ndarray::{ArrayViewMut2, Axis, Ix2};
6-
use numpy::{Element, PyArray, PyArrayDescr};
7-
use pyo3::{Bound, FromPyObject, Py, PyAny, PyResult, Python};
9+
use numpy::{Element, PyArray, PyArrayDescr, PyArrayMethods};
10+
use pyo3::{types::PyAnyMethods, Bound, Py, PyAny, PyResult, Python};
811
use std::any::TypeId;
912

1013
#[derive(Clone)]
@@ -28,8 +31,8 @@ pub struct BytesBlock<'a> {
2831
buf_size_mb: usize,
2932
}
3033

31-
impl<'a> FromPyObject<'a> for BytesBlock<'a> {
32-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
34+
impl<'a> ExtractBlockFromBound<'a> for BytesBlock<'a> {
35+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
3336
check_dtype(ob, "object")?;
3437
let array = ob.downcast::<PyArray<PyBytes, Ix2>>()?;
3538
let data = unsafe { array.as_array_mut() };
@@ -38,10 +41,6 @@ impl<'a> FromPyObject<'a> for BytesBlock<'a> {
3841
buf_size_mb: 16, // in MB
3942
})
4043
}
41-
42-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
43-
Self::extract(ob.clone().into_gil_ref())
44-
}
4544
}
4645

4746
impl<'a> BytesBlock<'a> {

connectorx-python/src/pandas/pandas_columns/datetime.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
};
24
use crate::errors::ConnectorXPythonError;
35
use anyhow::anyhow;
46
use chrono::{DateTime, Utc};
57
use fehler::throws;
68
use ndarray::{ArrayViewMut2, Axis, Ix2};
7-
use numpy::PyArray;
8-
use pyo3::{FromPyObject, PyAny, PyResult};
9+
use numpy::{PyArray, PyArrayMethods};
10+
use pyo3::{types::PyAnyMethods, PyAny, PyResult};
911
use std::any::TypeId;
1012

1113
// datetime64 is represented in int64 in numpy
@@ -14,17 +16,13 @@ pub struct DateTimeBlock<'a> {
1416
data: ArrayViewMut2<'a, i64>,
1517
}
1618

17-
impl<'a> FromPyObject<'a> for DateTimeBlock<'a> {
18-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
19+
impl<'a> ExtractBlockFromBound<'a> for DateTimeBlock<'a> {
20+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
1921
check_dtype(ob, "int64")?;
2022
let array = ob.downcast::<PyArray<i64, Ix2>>()?;
2123
let data = unsafe { array.as_array_mut() };
2224
Ok(DateTimeBlock { data })
2325
}
24-
25-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
26-
Self::extract(ob.clone().into_gil_ref())
27-
}
2826
}
2927

3028
impl<'a> DateTimeBlock<'a> {

connectorx-python/src/pandas/pandas_columns/float64.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,26 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
};
24
use crate::errors::ConnectorXPythonError;
35
use anyhow::anyhow;
46
use fehler::throws;
57
use ndarray::{ArrayViewMut2, Axis, Ix2};
6-
use numpy::PyArray;
7-
use pyo3::{FromPyObject, PyAny, PyResult};
8+
use numpy::{PyArray, PyArrayMethods};
9+
use pyo3::{types::PyAnyMethods, PyAny, PyResult};
810
use std::any::TypeId;
911

1012
// Float
1113
pub struct Float64Block<'a> {
1214
data: ArrayViewMut2<'a, f64>,
1315
}
1416

15-
impl<'a> FromPyObject<'a> for Float64Block<'a> {
16-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
17+
impl<'a> ExtractBlockFromBound<'a> for Float64Block<'a> {
18+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
1719
check_dtype(ob, "float64")?;
18-
let array = ob.downcast::<PyArray<f64, Ix2>>()?;
19-
let data = unsafe { array.as_array_mut() };
20+
let array: &pyo3::Bound<'a, PyArray<f64, Ix2>> = ob.downcast()?;
21+
let data: ArrayViewMut2<'a, f64> = unsafe { array.as_array_mut() };
2022
Ok(Float64Block { data })
2123
}
22-
23-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
24-
Self::extract(ob.clone().into_gil_ref())
25-
}
2624
}
2725

2826
impl<'a> Float64Block<'a> {

connectorx-python/src/pandas/pandas_columns/int64.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,41 @@
1-
use super::{check_dtype, HasPandasColumn, PandasColumn, PandasColumnObject};
1+
use super::{
2+
check_dtype, ExtractBlockFromBound, HasPandasColumn, PandasColumn, PandasColumnObject,
3+
};
24
use crate::errors::ConnectorXPythonError;
35
use anyhow::anyhow;
46
use fehler::throws;
57
use ndarray::{ArrayViewMut1, ArrayViewMut2, Axis, Ix2};
6-
use numpy::{PyArray, PyArray1};
7-
use pyo3::{types::PyTuple, FromPyObject, PyAny, PyResult};
8+
use numpy::{PyArray, PyArray1, PyArrayMethods};
9+
use pyo3::{
10+
types::{PyAnyMethods, PyTuple, PyTupleMethods},
11+
PyAny, PyResult,
12+
};
813
use std::any::TypeId;
914

1015
pub enum Int64Block<'a> {
1116
NumPy(ArrayViewMut2<'a, i64>),
1217
Extention(ArrayViewMut1<'a, i64>, ArrayViewMut1<'a, bool>),
1318
}
14-
impl<'a> FromPyObject<'a> for Int64Block<'a> {
15-
fn extract(ob: &'a PyAny) -> PyResult<Self> {
19+
20+
impl<'a> ExtractBlockFromBound<'a> for Int64Block<'a> {
21+
fn extract_block<'b: 'a>(ob: &'b pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
1622
if let Ok(array) = ob.downcast::<PyArray<i64, Ix2>>() {
1723
check_dtype(ob, "int64")?;
1824
let data = unsafe { array.as_array_mut() };
1925
Ok(Int64Block::NumPy(data))
2026
} else {
2127
let tuple = ob.downcast::<PyTuple>()?;
22-
let data = tuple.get_item(0)?;
23-
let mask = tuple.get_item(1)?;
28+
// let data = tuple.get_borrowed_item(0)?;
29+
let data = tuple.as_slice().get(0).unwrap();
30+
let mask = tuple.as_slice().get(1).unwrap();
2431
check_dtype(data, "int64")?;
2532
check_dtype(mask, "bool")?;
26-
2733
Ok(Int64Block::Extention(
2834
unsafe { data.downcast::<PyArray1<i64>>()?.as_array_mut() },
2935
unsafe { mask.downcast::<PyArray1<bool>>()?.as_array_mut() },
3036
))
3137
}
3238
}
33-
34-
fn extract_bound(ob: &pyo3::Bound<'a, PyAny>) -> PyResult<Self> {
35-
Self::extract(ob.clone().into_gil_ref())
36-
}
3739
}
3840

3941
impl<'a> Int64Block<'a> {

0 commit comments

Comments
 (0)