From 1a2026283b59e1d98ab39f9af68e80f3e64c13b0 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Mon, 14 Oct 2024 18:03:11 +0200 Subject: [PATCH 01/15] Add format generation from levels. --- pixi.toml | 2 +- sparse/mlir_backend/_common.py | 40 ++++++- sparse/mlir_backend/_constructors.py | 40 ++----- sparse/mlir_backend/_levels.py | 165 +++++++++++++++++++++++++++ 4 files changed, 213 insertions(+), 34 deletions(-) create mode 100644 sparse/mlir_backend/_levels.py diff --git a/pixi.toml b/pixi.toml index 52e57fb3..1b914fab 100644 --- a/pixi.toml +++ b/pixi.toml @@ -28,7 +28,7 @@ mkdocs-jupyter = "*" [feature.tests.tasks] test = "pytest --pyargs sparse -n auto" test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -vvv", env = { SPARSE_BACKEND = "MLIR" } } -test-finch = { cmd = "pytest --pyargs sparse/tests -n auto", env = { SPARSE_BACKEND = "Finch" }, depends-on = ["precompile"] } +test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -vvv", env = { SPARSE_BACKEND = "Finch", PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" }, depends-on = ["precompile"] } [feature.tests.dependencies] pytest = ">=3.5" diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index f78ac991..9d79c811 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -4,8 +4,44 @@ import weakref from dataclasses import dataclass +import mlir.runtime as rt from mlir import ir +import numpy as np + +from ._core import libc +from ._dtypes import DType, asdtype + + +def fn_cache(f, maxsize: int | None = None): + return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) + + +@fn_cache +def get_nd_memref_descr(rank: int, dtype: type[DType]) -> ctypes.Structure: + return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) + + +def numpy_to_ranked_memref(arr: np.ndarray) -> ctypes.Structure: + memref = rt.get_ranked_memref_descriptor(arr) + memref_descr = get_nd_memref_descr(arr.ndim, asdtype(arr.dtype)) + # Required due to ctypes type checks + return memref_descr( + allocated=memref.allocated, + aligned=memref.aligned, + offset=memref.offset, + shape=memref.shape, + strides=memref.strides, + ) + + +def ranked_memref_to_numpy(ref: ctypes.Structure) -> np.ndarray: + return rt.ranked_memref_to_numpy([ref]) + + +def free_memref(obj: ctypes.Structure) -> None: + libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) + class MlirType(abc.ABC): @classmethod @@ -27,10 +63,6 @@ def __len__(self): return len(self.contents) -def fn_cache(f, maxsize: int | None = None): - return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) - - def _hold_self_ref_in_ret(fn): @functools.wraps(fn) def wrapped(self, *a, **kw): diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py index 1f301908..c9717cc5 100644 --- a/sparse/mlir_backend/_constructors.py +++ b/sparse/mlir_backend/_constructors.py @@ -2,15 +2,23 @@ from collections.abc import Iterable from typing import Any -import mlir.runtime as rt from mlir import ir from mlir.dialects import sparse_tensor import numpy as np import scipy.sparse as sps -from ._common import PackedArgumentTuple, _hold_self_ref_in_ret, _take_owneship, fn_cache -from ._core import ctx, libc +from ._common import ( + PackedArgumentTuple, + _hold_self_ref_in_ret, + _take_owneship, + fn_cache, + free_memref, + get_nd_memref_descr, + numpy_to_ranked_memref, + ranked_memref_to_numpy, +) +from ._core import ctx from ._dtypes import DType, asdtype ########### @@ -18,32 +26,6 @@ ########### -@fn_cache -def get_nd_memref_descr(rank: int, dtype: type[DType]) -> type: - return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) - - -def numpy_to_ranked_memref(arr: np.ndarray) -> ctypes.Structure: - memref = rt.get_ranked_memref_descriptor(arr) - memref_descr = get_nd_memref_descr(arr.ndim, asdtype(arr.dtype)) - # Required due to ctypes type checks - return memref_descr( - allocated=memref.allocated, - aligned=memref.aligned, - offset=memref.offset, - shape=memref.shape, - strides=memref.strides, - ) - - -def ranked_memref_to_numpy(ref: ctypes.Structure) -> np.ndarray: - return rt.ranked_memref_to_numpy([ref]) - - -def free_memref(obj: ctypes.Structure) -> None: - libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) - - ########### # Formats # ########### diff --git a/sparse/mlir_backend/_levels.py b/sparse/mlir_backend/_levels.py new file mode 100644 index 00000000..5f43a2b3 --- /dev/null +++ b/sparse/mlir_backend/_levels.py @@ -0,0 +1,165 @@ +import ctypes +import dataclasses +import enum +import itertools +import re +import typing + +import mlir.runtime as rt +from mlir import ir +from mlir.dialects import sparse_tensor + +import numpy as np + +from ._common import ( + PackedArgumentTuple, + _take_owneship, + fn_cache, + numpy_to_ranked_memref, + ranked_memref_to_numpy, +) +from ._dtypes import DType, asdtype + +_CAMEL_TO_SNAKE = [re.compile("(.)([A-Z][a-z]+)"), re.compile("([a-z0-9])([A-Z])")] + + +def _camel_to_snake(name: str) -> str: + for exp in _CAMEL_TO_SNAKE: + name = exp.sub(r"\1_\2", name) + + return name.lower() + + +@fn_cache +def get_nd_memref_descr(rank: int, dtype: type[DType]) -> type: + return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) + + +class LevelProperties(enum.Flag): + NonOrdered = enum.auto() + NonUnique = enum.auto() + + def build(self) -> list[sparse_tensor.LevelProperty]: + return [getattr(sparse_tensor.LevelProperty, _camel_to_snake(p.name)) for p in type(self) if p in self] + + +class LevelFormat(enum.Enum): + Dense = "dense" + Compressed = "compressed" + Singleton = "singleton" + + def build(self) -> sparse_tensor.LevelFormat: + return getattr(sparse_tensor.LevelFormat, self.value) + + +@dataclasses.dataclass(eq=True, frozen=True, kw_only=True) +class Level: + format: LevelFormat + properties: LevelProperties = LevelProperties(0) + + def build(self): + sparse_tensor.EncodingAttr.build_level_type(self.format.build(), self.properties.build()) + + +@dataclasses.dataclass(kw_only=True) +class StorageFormat: + levels: tuple[Level, ...] + order: typing.Literal["C", "F"] | tuple[int, ...] + pos_width: int + crd_width: int + dtype: type[DType] + + @property + def storage_rank(self) -> int: + return len(self.levels) + + @property + def rank(self) -> int: + return self.storage_rank + + def __post_init__(self): + rank = self.storage_rank + self.dtype = asdtype(self.dtype) + if self.order == "C": + self.order = tuple(range(rank)) + return + + if self.order == "F": + self.order = tuple(reversed(range(rank))) + return + + if sorted(self.order) != list(range(rank)): + raise ValueError(f"`sorted(self.order) != list(range(rank))`, {self.order=}, {rank=}.") + + self.order = tuple(self.order) + + @fn_cache + def get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: + if len(shape) != self.rank: + raise ValueError(f"`len(shape) != self.rank`, {shape=}, {self.rank=}") + mlir_levels = [level.build() for level in self.levels] + mlir_order = list(self.order) + mlir_reverse_order = [0] * self.rank + for i, r in enumerate(mlir_order): + mlir_reverse_order[r] = i + + dtype = self.dtype.get_mlir_type() + encoding = sparse_tensor.EncodingAttr.get( + mlir_levels, mlir_order, mlir_reverse_order, self.pos_width, self.crd_width + ) + return ir.RankedTensorType.get(list(shape), dtype, encoding) + + @fn_cache + def get_ctypes_type(self): + ptr_dtype = asdtype(getattr(np, f"uint{self.pos_width}")) + idx_dtype = asdtype(getattr(np, f"uint{self.crd_width}")) + + def get_fields(): + fields = [] + compressed_counter = 0 + for level, next_level in itertools.zip_longest(self.levels, self.levels[1:]): + if LevelFormat.Compressed == level.format: + compressed_counter += 1 + fields.append((f"pointers_to_{compressed_counter}", get_nd_memref_descr(1, ptr_dtype))) + if next_level is not None and LevelFormat.Singleton == next_level.format: + fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(2, idx_dtype))) + else: + fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(1, idx_dtype))) + + fields.append(("values", get_nd_memref_descr(1, self.dtype.np_dtype))) + return fields + + storage_format = self + + class Format(ctypes.Structure): + _fields_ = get_fields() + + def get_mlir_type(self, *, shape: tuple[int, ...]): + return self.get_storage_format().get_mlir_type(shape=shape) + + def to_module_arg(self) -> list: + return [ctypes.pointer(ctypes.pointer(f) for f in self.get__fields_())] + + def get__fields_(self) -> list: + return [getattr(self, field[0]) for field in self._fields_] + + def to_constituent_arrays(self) -> PackedArgumentTuple: + return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) + + def get_storage_format(self) -> StorageFormat: + return storage_format + + @classmethod + def from_constituent_arrays(cls, arrs: list[np.ndarray]) -> "Format": + inst = cls(*(numpy_to_ranked_memref(arr) for arr in arrs)) + for arr in arrs: + _take_owneship(inst, arr) + return inst + + return Format + + def __hash__(self): + return hash(id(self)) + + def __eq__(self, value): + return self is value From 07216a9966df053ecf4c8adbd0cc5e5f64311f5e Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:38:34 +0200 Subject: [PATCH 02/15] Add SciPy conversions. --- pixi.toml | 4 +- sparse/mlir_backend/__init__.py | 12 +- sparse/mlir_backend/_array.py | 45 +++ sparse/mlir_backend/_common.py | 18 +- sparse/mlir_backend/_constructors.py | 414 ----------------------- sparse/mlir_backend/_conversions.py | 180 ++++++++++ sparse/mlir_backend/_dtypes.py | 7 +- sparse/mlir_backend/_levels.py | 138 +++++--- sparse/mlir_backend/_ops.py | 83 +---- sparse/mlir_backend/tests/test_simple.py | 172 ++-------- 10 files changed, 366 insertions(+), 707 deletions(-) create mode 100644 sparse/mlir_backend/_array.py delete mode 100644 sparse/mlir_backend/_constructors.py create mode 100644 sparse/mlir_backend/_conversions.py diff --git a/pixi.toml b/pixi.toml index 1b914fab..d54050dd 100644 --- a/pixi.toml +++ b/pixi.toml @@ -27,8 +27,8 @@ mkdocs-jupyter = "*" [feature.tests.tasks] test = "pytest --pyargs sparse -n auto" -test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -vvv", env = { SPARSE_BACKEND = "MLIR" } } -test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -vvv", env = { SPARSE_BACKEND = "Finch", PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" }, depends-on = ["precompile"] } +test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -v", env = { SPARSE_BACKEND = "MLIR" } } +test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -v", env = { SPARSE_BACKEND = "Finch", PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" }, depends-on = ["precompile"] } [feature.tests.dependencies] pytest = ">=3.5" diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index 86b42965..de19598a 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -7,24 +7,20 @@ "to enable MLIR backend." ) from e -from ._constructors import ( - PackedArgumentTuple, - asarray, -) +from ._common import PackedArgumentTuple +from ._conversions import asarray, to_numpy, to_scipy from ._dtypes import ( asdtype, ) from ._ops import ( add, - broadcast_to, - reshape, ) __all__ = [ "add", - "broadcast_to", "asarray", "asdtype", - "reshape", "PackedArgumentTuple", + "to_numpy", + "to_scipy", ] diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py new file mode 100644 index 00000000..0a7659b4 --- /dev/null +++ b/sparse/mlir_backend/_array.py @@ -0,0 +1,45 @@ +import dataclasses + +from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy +from ._levels import StorageFormat + + +class Array: + def __init__(self, *, storage, shape: tuple[int, ...]) -> None: + storage_rank = storage.get_storage_format().rank + if len(shape) != storage_rank: + raise ValueError(f"Mismatched rank, `{storage_rank=}`, `{shape=}`") + + self._storage = storage + self._shape = shape + + @property + def shape(self) -> tuple[int, ...]: + return self._shape + + @property + def ndim(self) -> int: + return len(self.shape) + + @property + def dtype(self): + return self._storage.get_storage_format().dtype + + def _get_storage_format(self) -> StorageFormat: + return self._storage.get_storage_format() + + def _get_mlir_type(self): + return self._get_storage_format().get_mlir_type(shape=self.shape) + + def _to_module_arg(self): + return self._storage.to_module_arg() + + def copy(self): + storage_format: StorageFormat = dataclasses.replace(self._get_storage_format(), owns_memory=False) + + fields = self._storage.get__fields_() + arrs = [ranked_memref_to_numpy(f).copy() for f in fields] + memrefs = [numpy_to_ranked_memref(arr) for arr in arrs] + arr = Array(storage=storage_format.get_ctypes_type()(*memrefs), shape=self.shape) + for carr in arrs: + _hold_ref(arr, carr) diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index 9d79c811..df3bd4cd 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -1,11 +1,9 @@ -import abc import ctypes import functools import weakref from dataclasses import dataclass import mlir.runtime as rt -from mlir import ir import numpy as np @@ -17,8 +15,12 @@ def fn_cache(f, maxsize: int | None = None): return functools.wraps(f)(functools.lru_cache(maxsize=maxsize)(f)) -@fn_cache def get_nd_memref_descr(rank: int, dtype: type[DType]) -> ctypes.Structure: + return _get_nd_memref_descr(int(rank), asdtype(dtype)) + + +@fn_cache +def _get_nd_memref_descr(rank: int, dtype: type[DType]) -> ctypes.Structure: return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) @@ -43,12 +45,6 @@ def free_memref(obj: ctypes.Structure) -> None: libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) -class MlirType(abc.ABC): - @classmethod - @abc.abstractmethod - def get_mlir_type(cls) -> ir.Type: ... - - @dataclass class PackedArgumentTuple: contents: tuple @@ -67,13 +63,13 @@ def _hold_self_ref_in_ret(fn): @functools.wraps(fn) def wrapped(self, *a, **kw): ret = fn(self, *a, **kw) - _take_owneship(ret, self) + _hold_ref(ret, self) return ret return wrapped -def _take_owneship(owner, obj): +def _hold_ref(owner, obj): ptr = ctypes.py_object(obj) ctypes.pythonapi.Py_IncRef(ptr) diff --git a/sparse/mlir_backend/_constructors.py b/sparse/mlir_backend/_constructors.py deleted file mode 100644 index c9717cc5..00000000 --- a/sparse/mlir_backend/_constructors.py +++ /dev/null @@ -1,414 +0,0 @@ -import ctypes -from collections.abc import Iterable -from typing import Any - -from mlir import ir -from mlir.dialects import sparse_tensor - -import numpy as np -import scipy.sparse as sps - -from ._common import ( - PackedArgumentTuple, - _hold_self_ref_in_ret, - _take_owneship, - fn_cache, - free_memref, - get_nd_memref_descr, - numpy_to_ranked_memref, - ranked_memref_to_numpy, -) -from ._core import ctx -from ._dtypes import DType, asdtype - -########### -# Memrefs # -########### - - -########### -# Formats # -########### - - -@fn_cache -def get_sparse_vector_class( - values_dtype: type[DType], - index_dtype: type[DType], -) -> type[ctypes.Structure]: - class SparseVector(ctypes.Structure): - _fields_ = [ - ("indptr", get_nd_memref_descr(1, index_dtype)), - ("indices", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arrs: list[np.ndarray]) -> "SparseVector": - sv_instance = cls(*[numpy_to_ranked_memref(arr) for arr in arrs]) - for arr in arrs: - _take_owneship(sv_instance, arr) - return sv_instance - - def to_sps(self, shape: tuple[int, ...]) -> int: - return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.indptr)), - ctypes.pointer(ctypes.pointer(self.indices)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.indptr, self.indices, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.compressed,) - ordering = ir.AffineMap.get_permutation([0]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return SparseVector - - -@fn_cache -def get_csx_class( - values_dtype: type[DType], - index_dtype: type[DType], - order: str, -) -> type[ctypes.Structure]: - class Csx(ctypes.Structure): - _fields_ = [ - ("indptr", get_nd_memref_descr(1, index_dtype)), - ("indices", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - _order = order - - @classmethod - def from_sps(cls, arr: sps.csr_array | sps.csc_array) -> "Csx": - indptr = numpy_to_ranked_memref(arr.indptr) - indices = numpy_to_ranked_memref(arr.indices) - data = numpy_to_ranked_memref(arr.data) - - csr_instance = cls(indptr=indptr, indices=indices, data=data) - _take_owneship(csr_instance, arr) - - return csr_instance - - def to_sps(self, shape: tuple[int, ...]) -> sps.csr_array | sps.csc_array: - pos = ranked_memref_to_numpy(self.indptr) - crd = ranked_memref_to_numpy(self.indices) - data = ranked_memref_to_numpy(self.data) - return get_csx_scipy_class(self._order)((data, crd, pos), shape=shape) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.indptr)), - ctypes.pointer(ctypes.pointer(self.indices)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.indptr, self.indices, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.dense, sparse_tensor.LevelFormat.compressed) - ordering = ir.AffineMap.get_permutation(get_order_tuple(cls._order)) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Csx - - -@fn_cache -def get_coo_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - class Coo(ctypes.Structure): - _fields_ = [ - ("pos", get_nd_memref_descr(1, index_dtype)), - ("coords", get_nd_memref_descr(2, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arr: sps.coo_array | Iterable[np.ndarray]) -> "Coo": - if isinstance(arr, sps.coo_array): - if not arr.has_canonical_format: - raise Exception("COO must have canonical format") - np_pos = np.array([0, arr.size], dtype=index_dtype.np_dtype) - np_coords = np.stack(arr.coords, axis=1, dtype=index_dtype.np_dtype) - np_data = arr.data - else: - if len(arr) != 3: - raise Exception("COO must be comprised of three arrays") - np_pos, np_coords, np_data = arr - - pos = numpy_to_ranked_memref(np_pos) - coords = numpy_to_ranked_memref(np_coords) - data = numpy_to_ranked_memref(np_data) - coo_instance = cls(pos=pos, coords=coords, data=data) - _take_owneship(coo_instance, np_pos) - _take_owneship(coo_instance, np_coords) - _take_owneship(coo_instance, np_data) - - return coo_instance - - def to_sps(self, shape: tuple[int, ...]) -> sps.coo_array | list[np.ndarray]: - pos = ranked_memref_to_numpy(self.pos) - coords = ranked_memref_to_numpy(self.coords)[pos[0] : pos[1]] - data = ranked_memref_to_numpy(self.data) - return ( - sps.coo_array((data, coords.T), shape=shape) - if len(shape) == 2 - else PackedArgumentTuple((pos, coords, data)) - ) - - def to_module_arg(self) -> list: - return [ - ctypes.pointer(ctypes.pointer(self.pos)), - ctypes.pointer(ctypes.pointer(self.coords)), - ctypes.pointer(ctypes.pointer(self.data)), - ] - - def get__fields_(self) -> list: - return [self.pos, self.coords, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - compressed_lvl = sparse_tensor.EncodingAttr.build_level_type( - sparse_tensor.LevelFormat.compressed, [sparse_tensor.LevelProperty.non_unique] - ) - mid_singleton_lvls = [ - sparse_tensor.EncodingAttr.build_level_type( - sparse_tensor.LevelFormat.singleton, [sparse_tensor.LevelProperty.non_unique] - ) - ] * (len(shape) - 2) - levels = (compressed_lvl, *mid_singleton_lvls, sparse_tensor.LevelFormat.singleton) - ordering = ir.AffineMap.get_permutation([*range(len(shape))]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Coo - - -@fn_cache -def get_csf_class( - values_dtype: type[DType], - index_dtype: type[DType], -) -> type[ctypes.Structure]: - class Csf(ctypes.Structure): - _fields_ = [ - ("indptr_1", get_nd_memref_descr(1, index_dtype)), - ("indices_1", get_nd_memref_descr(1, index_dtype)), - ("indptr_2", get_nd_memref_descr(1, index_dtype)), - ("indices_2", get_nd_memref_descr(1, index_dtype)), - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arrs: list[np.ndarray]) -> "Csf": - csf_instance = cls(*[numpy_to_ranked_memref(arr) for arr in arrs]) - for arr in arrs: - _take_owneship(csf_instance, arr) - return csf_instance - - def to_sps(self, shape: tuple[int, ...]) -> list[np.ndarray]: - return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) - - def to_module_arg(self) -> list: - return [ctypes.pointer(ctypes.pointer(field)) for field in self.get__fields_()] - - def get__fields_(self) -> list: - return [self.indptr_1, self.indices_1, self.indptr_2, self.indices_2, self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = ( - sparse_tensor.LevelFormat.dense, - sparse_tensor.LevelFormat.compressed, - sparse_tensor.LevelFormat.compressed, - ) - ordering = ir.AffineMap.get_permutation([0, 1, 2]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Csf - - -@fn_cache -def get_dense_class(values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - class Dense(ctypes.Structure): - _fields_ = [ - ("data", get_nd_memref_descr(1, values_dtype)), - ] - dtype = values_dtype - _index_dtype = index_dtype - - @classmethod - def from_sps(cls, arr: np.ndarray) -> "Dense": - data = numpy_to_ranked_memref(arr.ravel()) - - dense_instance = cls(data=data) - _take_owneship(dense_instance, arr) - - return dense_instance - - def to_sps(self, shape: tuple[int, ...]) -> np.ndarray: - data = ranked_memref_to_numpy(self.data) - return data.reshape(shape) - - def to_module_arg(self) -> list: - return [ctypes.pointer(ctypes.pointer(self.data))] - - def get__fields_(self) -> list: - return [self.data] - - @classmethod - @fn_cache - def get_tensor_definition(cls, shape: tuple[int, ...]) -> ir.RankedTensorType: - with ir.Location.unknown(ctx): - values_dtype = cls.dtype.get_mlir_type() - index_dtype = cls._index_dtype.get_mlir_type() - index_width = getattr(index_dtype, "width", 0) - levels = (sparse_tensor.LevelFormat.dense,) * len(shape) - ordering = ir.AffineMap.get_permutation([*range(len(shape))]) - encoding = sparse_tensor.EncodingAttr.get(levels, ordering, ordering, index_width, index_width) - return ir.RankedTensorType.get(list(shape), values_dtype, encoding) - - return Dense - - -def _is_scipy_sparse_obj(x) -> bool: - return hasattr(x, "__module__") and x.__module__.startswith("scipy.sparse") - - -def _is_numpy_obj(x) -> bool: - return isinstance(x, np.ndarray) - - -def _is_mlir_obj(x) -> bool: - return isinstance(x, ctypes.Structure) - - -def get_order_tuple(order: str) -> tuple[int, int]: - if order in ("r", "c"): - return (0, 1) if order == "r" else (1, 0) - raise Exception(f"Invalid order: {order}") - - -def get_csx_scipy_class(order: str) -> type[sps.sparray]: - if order in ("r", "c"): - return sps.csr_array if order == "r" else sps.csc_array - raise Exception(f"Invalid order: {order}") - - -_constructor_class_dict = { - "csr": get_csx_class, - "csc": get_csx_class, - "csf": get_csf_class, - "coo": get_coo_class, - "sparse_vector": get_sparse_vector_class, - "dense": get_dense_class, -} - - -################ -# Tensor class # -################ - - -class Tensor: - def __init__( - self, - obj: Any, - shape: tuple[int, ...] | None = None, - dtype: type[DType] | None = None, - format: str | None = None, - ) -> None: - self.shape = shape if shape is not None else obj.shape - self.ndim = len(self.shape) - self._values_dtype = dtype if dtype is not None else asdtype(obj.dtype) - - if _is_scipy_sparse_obj(obj): - self._owns_memory = False - - if obj.format in ("csr", "csc"): - order = "r" if obj.format == "csr" else "c" - self._index_dtype = asdtype(obj.indptr.dtype) - self._format_class = get_csx_class(self._values_dtype, self._index_dtype, order) - self._obj = self._format_class.from_sps(obj) - elif obj.format == "coo": - self._index_dtype = asdtype(obj.coords[0].dtype) - self._format_class = get_coo_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - else: - raise Exception(f"{obj.format} SciPy format not supported.") - - elif _is_numpy_obj(obj): - self._owns_memory = False - self._index_dtype = asdtype(np.intp) - self._format_class = get_dense_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - - elif _is_mlir_obj(obj): - self._owns_memory = True - self._format_class = type(obj) - self._obj = obj - - elif format is not None: - if format in ["csf", "coo", "sparse_vector"]: - fn_format_class = _constructor_class_dict[format] - self._owns_memory = False - self._index_dtype = asdtype(np.intp) - self._format_class = fn_format_class(self._values_dtype, self._index_dtype) - self._obj = self._format_class.from_sps(obj) - - else: - raise Exception(f"Format {format} not supported.") - - else: - raise Exception(f"{type(obj)} not supported.") - - def __del__(self): - if self._owns_memory: - for field in self._obj.get__fields_(): - free_memref(field) - - @_hold_self_ref_in_ret - def to_scipy_sparse(self) -> sps.sparray | np.ndarray: - return self._obj.to_sps(self.shape) - - -def asarray(obj, shape=None, dtype=None, format=None) -> Tensor: - return Tensor(obj, shape, dtype, format) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py new file mode 100644 index 00000000..9337327d --- /dev/null +++ b/sparse/mlir_backend/_conversions.py @@ -0,0 +1,180 @@ +import functools + +import numpy as np + +from ._array import Array +from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy +from ._levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format + +try: + import scipy.sparse as sps + + ScipySparseArray = sps.sparray | sps.spmatrix +except ImportError: + sps = None + ScipySparseArray = None + + +def _guard_scipy(f): + @functools.wraps(f) + def wrapped(*args, **kwargs): + if sps is None: + raise RuntimeError("Could not import `scipy.sparse`. Please install `scipy`.") + + return f(*args, **kwargs) + + return wrapped + + +def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: + shape = arr.shape + arr = np.asarray(arr, order="C", copy=copy).flatten() + levels = (Level(LevelFormat.Dense),) * len(shape) + dense_format = get_storage_format( + levels=levels, + order="C", + pos_width=64, + crd_width=64, + dtype=arr.dtype, + owns_memory=False, + ) + storage = dense_format.get_ctypes_type()(numpy_to_ranked_memref(arr)) + _hold_ref(storage, arr) + return Array(storage=storage, shape=shape) + + +def to_numpy(arr): + storage = arr._storage + storage_format: StorageFormat = storage.get_storage_format() + + if not all(LevelFormat.Dense == level.format for level in storage_format.levels): + raise TypeError(f"Cannot convert a non-dense array to NumPy. `{storage_format=}`") + + data = ranked_memref_to_numpy(arr._storage.values) + _hold_ref(data, storage) + arg_order = [0] * storage_format.storage_rank + for i, o in enumerate(storage_format.order): + arg_order[o] = i + arg_order = tuple(arg_order) + storage_shape = tuple(int(arr.shape[o]) for o in arg_order) + return data.reshape(storage_shape).transpose(arg_order) + + +@_guard_scipy +def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: + if not isinstance(arr, ScipySparseArray): + raise TypeError(f"`arr` is not a `scipy.sparse` array, `{type(arr)=}`.") + match arr.format: + case "csr" | "csc": + pos_width = arr.indptr.dtype.itemsize * 8 + crd_width = arr.indices.dtype.itemsize * 8 + csr_format = get_storage_format( + levels=( + Level(LevelFormat.Dense), + Level( + LevelFormat.Compressed, + LevelProperties(0) + if arr.has_canonical_format + else LevelProperties.NonUnique | LevelProperties.NonOrdered, + ), + ), + order=(0, 1) if "csr" in type(arr).__name__ else (1, 0), + pos_width=pos_width, + crd_width=crd_width, + dtype=arr.dtype, + owns_memory=False, + ) + + indptr_np = arr.indptr + indices_np = arr.indices + data_np = arr.data + + if copy: + indptr_np = indptr_np.copy() + indices_np = indices_np.copy() + data_np = data_np.copy() + + indptr = numpy_to_ranked_memref(indptr_np) + indices = numpy_to_ranked_memref(indices_np) + data = numpy_to_ranked_memref(data_np) + + storage = csr_format.get_ctypes_type()(indptr, indices, data) + _hold_ref(storage, indptr_np) + _hold_ref(storage, indices_np) + _hold_ref(storage, data_np) + return Array(storage=storage, shape=arr.shape) + case "coo": + if copy is not None and not copy: + raise RuntimeError(f"`scipy.sparse.{type(arr.__name__)}` cannot be zero-copy converted.") + coords_np = np.stack([arr.row, arr.col], axis=1) + pos_np = np.array([0, arr.nnz], dtype=np.int64) + pos_width = pos_np.dtype.itemsize * 8 + crd_width = coords_np.dtype.itemsize * 8 + data_np = arr.data.copy() + + level_props = LevelProperties.NonUnique + if not arr.has_canonical_format: + level_props |= LevelProperties.NonOrdered + + coo_format = get_storage_format( + levels=( + Level(LevelFormat.Compressed, level_props), + Level(LevelFormat.Singleton, level_props), + ), + order=(0, 1), + pos_width=pos_width, + crd_width=crd_width, + dtype=arr.dtype, + owns_memory=False, + ) + + pos = numpy_to_ranked_memref(pos_np) + crd = numpy_to_ranked_memref(coords_np) + data = numpy_to_ranked_memref(data_np) + + storage = coo_format.get_ctypes_type()(pos, crd, data) + _hold_ref(storage, pos_np) + _hold_ref(storage, coords_np) + _hold_ref(storage, data_np) + return Array(storage=storage, shape=arr.shape) + case _: + raise NotImplementedError(f"No conversion implemented for `scipy.sparse.{type(arr.__name__)}`.") + + +@_guard_scipy +def to_scipy(arr) -> ScipySparseArray: + storage = arr._storage + storage_format: StorageFormat = storage.get_storage_format() + + match storage_format.levels: + case (Level(LevelFormat.Dense, _), Level(LevelFormat.Compressed, _)): + data = ranked_memref_to_numpy(storage.values) + indices = ranked_memref_to_numpy(storage.indices_1) + indptr = ranked_memref_to_numpy(storage.pointers_to_1) + if storage_format.order == (0, 1): + sps_arr = sps.csr_array((data, indices, indptr), shape=arr.shape) + else: + sps_arr = sps.csc_array((data, indices, indptr), shape=arr.shape) + case (Level(LevelFormat.Compressed, _), Level(LevelFormat.Singleton, _)): + data = ranked_memref_to_numpy(storage.values) + coords = ranked_memref_to_numpy(storage.indices_1) + sps_arr = sps.coo_array((data, (coords[:, 0], coords[:, 1])), shape=arr.shape) + case _: + raise RuntimeError(f"No conversion implemented for `{storage_format=}`.") + + _hold_ref(sps_arr, storage) + return sps_arr + + +def asarray(arr, copy: bool | None = None) -> Array: + if sps is not None and isinstance(arr, ScipySparseArray): + return _from_scipy(arr, copy=copy) + if isinstance(arr, np.ndarray): + return _from_numpy(arr, copy=copy) + + if isinstance(arr, Array): + if copy: + arr = arr.copy() + return arr + + return _from_numpy(np.asarray(arr, copy=copy), copy=None) diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py index 2ab41401..a854e609 100644 --- a/sparse/mlir_backend/_dtypes.py +++ b/sparse/mlir_backend/_dtypes.py @@ -1,3 +1,4 @@ +import abc import inspect import math import sys @@ -7,7 +8,11 @@ import numpy as np -from ._common import MlirType + +class MlirType(abc.ABC): + @classmethod + @abc.abstractmethod + def get_mlir_type(cls) -> ir.Type: ... def _get_pointer_width() -> int: diff --git a/sparse/mlir_backend/_levels.py b/sparse/mlir_backend/_levels.py index 5f43a2b3..8cb14b9d 100644 --- a/sparse/mlir_backend/_levels.py +++ b/sparse/mlir_backend/_levels.py @@ -5,7 +5,6 @@ import re import typing -import mlir.runtime as rt from mlir import ir from mlir.dialects import sparse_tensor @@ -13,11 +12,14 @@ from ._common import ( PackedArgumentTuple, - _take_owneship, + _hold_ref, fn_cache, + free_memref, + get_nd_memref_descr, numpy_to_ranked_memref, ranked_memref_to_numpy, ) +from ._core import ctx from ._dtypes import DType, asdtype _CAMEL_TO_SNAKE = [re.compile("(.)([A-Z][a-z]+)"), re.compile("([a-z0-9])([A-Z])")] @@ -30,11 +32,6 @@ def _camel_to_snake(name: str) -> str: return name.lower() -@fn_cache -def get_nd_memref_descr(rank: int, dtype: type[DType]) -> type: - return rt.make_nd_memref_descriptor(rank, dtype.to_ctype()) - - class LevelProperties(enum.Flag): NonOrdered = enum.auto() NonUnique = enum.auto() @@ -52,22 +49,23 @@ def build(self) -> sparse_tensor.LevelFormat: return getattr(sparse_tensor.LevelFormat, self.value) -@dataclasses.dataclass(eq=True, frozen=True, kw_only=True) +@dataclasses.dataclass(eq=True, frozen=True) class Level: format: LevelFormat properties: LevelProperties = LevelProperties(0) def build(self): - sparse_tensor.EncodingAttr.build_level_type(self.format.build(), self.properties.build()) + return sparse_tensor.EncodingAttr.build_level_type(self.format.build(), self.properties.build()) -@dataclasses.dataclass(kw_only=True) +@dataclasses.dataclass(eq=True, frozen=True, kw_only=True) class StorageFormat: levels: tuple[Level, ...] - order: typing.Literal["C", "F"] | tuple[int, ...] + order: tuple[int, ...] pos_width: int crd_width: int dtype: type[DType] + owns_memory: bool @property def storage_rank(self) -> int: @@ -78,41 +76,34 @@ def rank(self) -> int: return self.storage_rank def __post_init__(self): - rank = self.storage_rank - self.dtype = asdtype(self.dtype) - if self.order == "C": - self.order = tuple(range(rank)) - return - - if self.order == "F": - self.order = tuple(reversed(range(rank))) - return - - if sorted(self.order) != list(range(rank)): - raise ValueError(f"`sorted(self.order) != list(range(rank))`, {self.order=}, {rank=}.") - - self.order = tuple(self.order) + if sorted(self.order) != list(range(self.rank)): + raise ValueError(f"`sorted(self.order) != list(range(self.rank))`, `{self.order=}`, `{self.rank=}`.") @fn_cache def get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: if len(shape) != self.rank: raise ValueError(f"`len(shape) != self.rank`, {shape=}, {self.rank=}") - mlir_levels = [level.build() for level in self.levels] - mlir_order = list(self.order) - mlir_reverse_order = [0] * self.rank - for i, r in enumerate(mlir_order): - mlir_reverse_order[r] = i - - dtype = self.dtype.get_mlir_type() - encoding = sparse_tensor.EncodingAttr.get( - mlir_levels, mlir_order, mlir_reverse_order, self.pos_width, self.crd_width - ) - return ir.RankedTensorType.get(list(shape), dtype, encoding) + with ir.Location.unknown(ctx): + mlir_levels = [level.build() for level in self.levels] + mlir_order = list(self.order) + mlir_reverse_order = [0] * self.rank + for i, r in enumerate(mlir_order): + mlir_reverse_order[r] = i + + dtype = self.dtype.get_mlir_type() + encoding = sparse_tensor.EncodingAttr.get( + mlir_levels, + ir.AffineMap.get_permutation(mlir_order), + ir.AffineMap.get_permutation(mlir_reverse_order), + self.pos_width, + self.crd_width, + ) + return ir.RankedTensorType.get(list(shape), dtype, encoding) @fn_cache def get_ctypes_type(self): - ptr_dtype = asdtype(getattr(np, f"uint{self.pos_width}")) - idx_dtype = asdtype(getattr(np, f"uint{self.crd_width}")) + ptr_dtype = asdtype(getattr(np, f"int{self.pos_width}")) + idx_dtype = asdtype(getattr(np, f"int{self.crd_width}")) def get_fields(): fields = [] @@ -126,19 +117,19 @@ def get_fields(): else: fields.append((f"indices_{compressed_counter}", get_nd_memref_descr(1, idx_dtype))) - fields.append(("values", get_nd_memref_descr(1, self.dtype.np_dtype))) + fields.append(("values", get_nd_memref_descr(1, self.dtype))) return fields storage_format = self - class Format(ctypes.Structure): + class Storage(ctypes.Structure): _fields_ = get_fields() def get_mlir_type(self, *, shape: tuple[int, ...]): return self.get_storage_format().get_mlir_type(shape=shape) def to_module_arg(self) -> list: - return [ctypes.pointer(ctypes.pointer(f) for f in self.get__fields_())] + return [ctypes.pointer(ctypes.pointer(f)) for f in self.get__fields_()] def get__fields_(self) -> list: return [getattr(self, field[0]) for field in self._fields_] @@ -150,16 +141,61 @@ def get_storage_format(self) -> StorageFormat: return storage_format @classmethod - def from_constituent_arrays(cls, arrs: list[np.ndarray]) -> "Format": - inst = cls(*(numpy_to_ranked_memref(arr) for arr in arrs)) + def from_constituent_arrays(cls, arrs: list[np.ndarray]) -> "Storage": + storage = cls(*(numpy_to_ranked_memref(arr) for arr in arrs)) for arr in arrs: - _take_owneship(inst, arr) - return inst + _hold_ref(storage, arr) + return storage + + if storage_format.owns_memory: + + def __del__(self) -> None: + for field in self.get__fields_(): + free_memref(field) + + return Storage + + +def get_storage_format( + *, + levels: tuple[Level, ...], + order: typing.Literal["C", "F"] | tuple[int, ...], + pos_width: int, + crd_width: int, + dtype: type[DType], + owns_memory: bool, +) -> StorageFormat: + levels = tuple(levels) + if isinstance(order, str): + if order == "C": + order = tuple(range(len(levels))) + if order == "F": + order = tuple(reversed(range(len(levels)))) + return _get_storage_format( + levels=levels, + order=order, + pos_width=int(pos_width), + crd_width=int(crd_width), + dtype=asdtype(dtype), + owns_memory=bool(owns_memory), + ) - return Format - def __hash__(self): - return hash(id(self)) - - def __eq__(self, value): - return self is value +@fn_cache +def _get_storage_format( + *, + levels: tuple[Level, ...], + order: tuple[int, ...], + pos_width: int, + crd_width: int, + dtype: type[DType], + owns_memory: bool, +) -> StorageFormat: + return StorageFormat( + levels=levels, + order=order, + pos_width=pos_width, + crd_width=crd_width, + dtype=dtype, + owns_memory=owns_memory, + ) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 963bbd1c..8869333e 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -1,16 +1,15 @@ import ctypes +import dataclasses import mlir.execution_engine import mlir.passmanager from mlir import ir from mlir.dialects import arith, func, linalg, sparse_tensor, tensor -import numpy as np - +from ._array import Array from ._common import fn_cache -from ._constructors import Tensor, numpy_to_ranked_memref from ._core import CWD, DEBUG, MLIR_C_RUNNER_UTILS, ctx, pm -from ._dtypes import DType, FloatingDType, Index +from ._dtypes import DType, FloatingDType @fn_cache @@ -121,77 +120,23 @@ def broadcast_to(in_tensor): return mlir.execution_engine.ExecutionEngine(module, opt_level=2, shared_libs=[MLIR_C_RUNNER_UTILS]) -def add(x1: Tensor, x2: Tensor) -> Tensor: - ret_obj = x1._format_class() - out_tensor_type = x1._obj.get_tensor_definition(x1.shape) +def add(x1: Array, x2: Array) -> Array: + ret_storage_format = dataclasses.replace(x1._get_storage_format(), owns_memory=True) + ret_storage = ret_storage_format.get_ctypes_type()() + out_tensor_type = ret_storage_format.get_mlir_type(shape=x1.shape) # TODO: Decide what will be the output tensor_type add_module = get_add_module( - x1._obj.get_tensor_definition(x1.shape), - x2._obj.get_tensor_definition(x2.shape), + x1._get_mlir_type(), + x2._get_mlir_type(), out_tensor_type=out_tensor_type, - dtype=x1._values_dtype, + dtype=x1.dtype, rank=x1.ndim, ) add_module.invoke( "add", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x1._obj.to_module_arg(), - *x2._obj.to_module_arg(), - ) - return Tensor(ret_obj, shape=out_tensor_type.shape) - - -def _infer_format_class(rank: int, values_dtype: type[DType], index_dtype: type[DType]) -> type[ctypes.Structure]: - from ._constructors import get_csf_class, get_csx_class, get_dense_class - - if rank == 1: - return get_dense_class(values_dtype, index_dtype) - if rank == 2: - return get_csx_class(values_dtype, index_dtype, order="r") - if rank == 3: - return get_csf_class(values_dtype, index_dtype) - raise Exception(f"Rank not supported to infer format: {rank}") - - -def reshape(x: Tensor, /, shape: tuple[int, ...]) -> Tensor: - x_tensor_type = x._obj.get_tensor_definition(x.shape) - if len(x.shape) == len(shape): - out_tensor_type = x._obj.get_tensor_definition(shape) - ret_obj = x._format_class() - else: - format_class = _infer_format_class(len(shape), x._values_dtype, x._index_dtype) - out_tensor_type = format_class.get_tensor_definition(shape) - ret_obj = format_class() - - with ir.Location.unknown(ctx): - shape_tensor_type = ir.RankedTensorType.get([len(shape)], Index.get_mlir_type()) - - reshape_module = get_reshape_module(x_tensor_type, shape_tensor_type, out_tensor_type) - - shape = np.array(shape) - reshape_module.invoke( - "reshape", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x._obj.to_module_arg(), - ctypes.pointer(ctypes.pointer(numpy_to_ranked_memref(shape))), + ctypes.pointer(ctypes.pointer(ret_storage)), + *x1._to_module_arg(), + *x2._to_module_arg(), ) - - return Tensor(ret_obj, shape=out_tensor_type.shape) - - -def broadcast_to(x: Tensor, /, shape: tuple[int, ...], dimensions: list[int]) -> Tensor: - x_tensor_type = x._obj.get_tensor_definition(x.shape) - format_class = _infer_format_class(len(shape), x._values_dtype, x._index_dtype) - out_tensor_type = format_class.get_tensor_definition(shape) - ret_obj = format_class() - - broadcast_to_module = get_broadcast_to_module(x_tensor_type, out_tensor_type, tuple(dimensions)) - - broadcast_to_module.invoke( - "broadcast_to", - ctypes.pointer(ctypes.pointer(ret_obj)), - *x._obj.to_module_arg(), - ) - - return Tensor(ret_obj, shape=shape) + return Array(storage=ret_storage, shape=out_tensor_type.shape) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 0fb2e4d2..605107c9 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -32,21 +32,15 @@ def assert_csx_equal( expected: sps.csr_array | sps.csc_array, actual: sps.csr_array | sps.csc_array, ) -> None: - np.testing.assert_array_equal(expected.todense(), actual.todense()) - # Broken due to https://github.com/scipy/scipy/issues/21442 - # desired.sort_indices() - # desired.sum_duplicates() - # desired.prune() + expected.eliminate_zeros() + expected.sum_duplicates() - # actual.sort_indices() - # actual.sum_duplicates() - # actual.prune() + actual.eliminate_zeros() + actual.sum_duplicates() - # np.testing.assert_array_equal(desired.todense(), actual.todense()) - - # np.testing.assert_array_equal(desired.indptr, actual.indptr) - # np.testing.assert_array_equal(desired.indices, actual.indices) - # np.testing.assert_array_equal(desired.data, actual.data) + np.testing.assert_array_equal(expected.indptr, actual.indptr) + np.testing.assert_array_equal(expected.indices, actual.indices) + np.testing.assert_array_equal(expected.data, actual.data) def generate_sampler(dtype: np.dtype, rng: np.random.Generator) -> typing.Callable[[tuple[int, ...]], np.ndarray]: @@ -87,9 +81,9 @@ def get_exampe_csf_arrays(dtype: np.dtype) -> tuple: @parametrize_dtypes @pytest.mark.parametrize("shape", [(100,), (10, 200), (5, 10, 20)]) def test_dense_format(dtype, shape): - data = np.arange(math.prod(shape), dtype=dtype) + data = np.arange(math.prod(shape), dtype=dtype).reshape(shape) tensor = sparse.asarray(data) - actual = tensor.to_scipy_sparse() + actual = sparse.to_numpy(tensor) np.testing.assert_equal(actual, data) @@ -110,19 +104,19 @@ def test_2d_constructors(rng, dtype): coo_tensor = sparse.asarray(coo) dense_2_tensor = sparse.asarray(np.arange(100, dtype=dtype).reshape((25, 4)) + 10) - csr_retured = csr_tensor.to_scipy_sparse() + csr_retured = sparse.to_scipy(csr_tensor) assert_csx_equal(csr_retured, csr) - csc_retured = csc_tensor.to_scipy_sparse() + csc_retured = sparse.to_scipy(csc_tensor) assert_csx_equal(csc_retured, csc) - dense_returned = dense_tensor.to_scipy_sparse() + dense_returned = sparse.to_numpy(dense_tensor) np.testing.assert_equal(dense_returned, dense) - coo_returned = coo_tensor.to_scipy_sparse() + coo_returned = sparse.to_scipy(coo_tensor) np.testing.assert_equal(coo_returned.todense(), coo.todense()) - dense_2_returned = dense_2_tensor.to_scipy_sparse() + dense_2_returned = sparse.to_numpy(dense_2_tensor) np.testing.assert_equal(dense_2_returned, np.arange(100, dtype=dtype).reshape((25, 4)) + 10) @@ -145,23 +139,23 @@ def test_add(rng, dtype): dense_tensor = sparse.asarray(dense) coo_tensor = sparse.asarray(coo) - actual = sparse.add(csr_tensor, csr_2_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_tensor, csr_2_tensor)) expected = csr + csr_2 assert_csx_equal(expected, actual) - actual = sparse.add(csc_tensor, csc_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csc_tensor, csc_tensor)) expected = csc + csc assert_csx_equal(expected, actual) - actual = sparse.add(csc_tensor, csr_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csc_tensor, csr_tensor)) expected = csc + csr assert_csx_equal(expected, actual) - actual = sparse.add(csr_tensor, dense_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_tensor, dense_tensor)) expected = sps.csr_matrix(csr + dense) assert_csx_equal(expected, actual) - actual = sparse.add(dense_tensor, csr_tensor).to_scipy_sparse() + actual = sparse.to_numpy(sparse.add(dense_tensor, csr_tensor)) expected = csr + dense assert isinstance(actual, np.ndarray) np.testing.assert_array_equal(actual, expected) @@ -172,9 +166,9 @@ def test_add(rng, dtype): # assert isinstance(actual, np.ndarray) # np.testing.assert_array_equal(actual, expected) - actual = sparse.add(csr_2_tensor, coo_tensor).to_scipy_sparse() + actual = sparse.to_scipy(sparse.add(csr_2_tensor, coo_tensor)) expected = csr_2 + coo - np.testing.assert_array_equal(actual.todense(), expected.todense()) + assert_csx_equal(expected, actual) # NOTE: https://discourse.llvm.org/t/passmanager-fails-on-simple-coo-addition-example/81247 # actual = sparse.add(d_tensor, d_tensor).to_scipy_sparse() @@ -246,127 +240,3 @@ def test_sparse_vector_format(dtype): dense_tensor = sparse.asarray(dense) res_tensor = sparse.add(dense_tensor, sv_tensor).to_scipy_sparse() np.testing.assert_array_equal(res_tensor, dense * 2) - - -@parametrize_dtypes -def test_reshape(rng, dtype): - DENSITY = 0.5 - sampler = generate_sampler(dtype, rng) - - # CSR, CSC, COO - for shape, new_shape in [ - ((100, 50), (25, 200)), - ((100, 50), (10, 500, 1)), - ((80, 1), (8, 10)), - ((80, 1), (80,)), - ]: - for format in ["csr", "csc", "coo"]: - if format == "coo": - # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 - continue - if format == "csc": - # NOTE: Blocked by https://github.com/llvm/llvm-project/issues/109641 - continue - - arr = sps.random_array( - shape, density=DENSITY, format=format, dtype=dtype, random_state=rng, data_sampler=sampler - ) - arr.sum_duplicates() - tensor = sparse.asarray(arr) - - actual = sparse.reshape(tensor, shape=new_shape).to_scipy_sparse() - if isinstance(actual, sparse.PackedArgumentTuple): - continue # skip checking CSF output - if not isinstance(actual, np.ndarray): - actual = actual.todense() - expected = arr.todense().reshape(new_shape) - - np.testing.assert_array_equal(actual, expected) - - # CSF - csf_shape = (2, 2, 4) - for shape, new_shape, expected_arrs in [ - ( - csf_shape, - (4, 4, 1), - [ - np.array([0, 0, 3, 5, 7]), - np.array([0, 1, 3, 0, 3, 0, 1]), - np.array([0, 1, 2, 3, 4, 5, 6, 7]), - np.array([0, 0, 0, 0, 0, 0, 0]), - np.array([1, 2, 3, 4, 5, 6, 7]), - ], - ), - ( - csf_shape, - (2, 1, 8), - [ - np.array([0, 1, 2]), - np.array([0, 0]), - np.array([0, 3, 7]), - np.array([4, 5, 7, 0, 3, 4, 5]), - np.array([1, 2, 3, 4, 5, 6, 7]), - ], - ), - ]: - csf = get_exampe_csf_arrays(dtype) - csf_tensor = sparse.asarray(csf, shape=shape, dtype=sparse.asdtype(dtype), format="csf") - - result = sparse.reshape(csf_tensor, shape=new_shape).to_scipy_sparse() - - for actual, expected in zip(result, expected_arrs, strict=False): - np.testing.assert_array_equal(actual, expected) - - # DENSE - # NOTE: dense reshape is probably broken in MLIR in 19.x branch - # dense = np.arange(math.prod(SHAPE), dtype=dtype).reshape(SHAPE) - - -@parametrize_dtypes -def test_broadcast_to(dtype): - # CSR, CSC, COO - for shape, new_shape, dimensions, input_arr, expected_arrs in [ - ( - (3, 4), - (2, 3, 4), - [0], - np.array([[0, 1, 0, 3], [0, 0, 4, 5], [6, 7, 0, 0]]), - [ - np.array([0, 3, 6]), - np.array([0, 1, 2, 0, 1, 2]), - np.array([0, 2, 4, 6, 8, 10, 12]), - np.array([1, 3, 2, 3, 0, 1, 1, 3, 2, 3, 0, 1]), - np.array([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0]), - ], - ), - ( - (4, 2), - (4, 2, 2), - [1], - np.array([[0, 1], [0, 0], [2, 3], [4, 0]]), - [ - np.array([0, 2, 2, 4, 6]), - np.array([0, 1, 0, 1, 0, 1]), - np.array([0, 1, 2, 4, 6, 7, 8]), - np.array([1, 1, 0, 1, 0, 1, 0, 0]), - np.array([1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 4.0, 4.0]), - ], - ), - ]: - for fn_format in [sps.csr_array, sps.csc_array, sps.coo_array]: - arr = fn_format(input_arr, shape=shape, dtype=dtype) - arr.sum_duplicates() - tensor = sparse.asarray(arr) - result = sparse.broadcast_to(tensor, new_shape, dimensions=dimensions).to_scipy_sparse() - - for actual, expected in zip(result, expected_arrs, strict=False): - np.testing.assert_allclose(actual, expected) - - # DENSE - np_arr = np.array([0, 0, 2, 3, 0, 1]) - arr = np.asarray(np_arr, dtype=dtype) - tensor = sparse.asarray(arr) - result = sparse.broadcast_to(tensor, (3, 6), dimensions=[0]).to_scipy_sparse() - - assert result.format == "csr" - np.testing.assert_allclose(result.todense(), np.repeat(np_arr[np.newaxis], 3, axis=0)) From 93f8ef7e5dd07793757440820d8774bee5200247 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:01:04 +0200 Subject: [PATCH 03/15] Make certain methods private. --- sparse/mlir_backend/__init__.py | 18 ++++-------------- sparse/mlir_backend/_array.py | 4 ++-- sparse/mlir_backend/_conversions.py | 12 ++++++------ sparse/mlir_backend/_dtypes.py | 14 +++++++------- sparse/mlir_backend/_levels.py | 17 ++++++++++++----- sparse/mlir_backend/_ops.py | 6 +++--- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index de19598a..ca7ce6ba 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -9,18 +9,8 @@ from ._common import PackedArgumentTuple from ._conversions import asarray, to_numpy, to_scipy -from ._dtypes import ( - asdtype, -) -from ._ops import ( - add, -) +from ._dtypes import asdtype +from ._levels import StorageFormat +from ._ops import add -__all__ = [ - "add", - "asarray", - "asdtype", - "PackedArgumentTuple", - "to_numpy", - "to_scipy", -] +__all__ = ["add", "asarray", "asdtype", "to_numpy", "to_scipy", "PackedArgumentTuple", "StorageFormat"] diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index 0a7659b4..a20a2f55 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -29,7 +29,7 @@ def _get_storage_format(self) -> StorageFormat: return self._storage.get_storage_format() def _get_mlir_type(self): - return self._get_storage_format().get_mlir_type(shape=self.shape) + return self._get_storage_format()._get_mlir_type(shape=self.shape) def _to_module_arg(self): return self._storage.to_module_arg() @@ -40,6 +40,6 @@ def copy(self): fields = self._storage.get__fields_() arrs = [ranked_memref_to_numpy(f).copy() for f in fields] memrefs = [numpy_to_ranked_memref(arr) for arr in arrs] - arr = Array(storage=storage_format.get_ctypes_type()(*memrefs), shape=self.shape) + arr = Array(storage=storage_format._get_ctypes_type()(*memrefs), shape=self.shape) for carr in arrs: _hold_ref(arr, carr) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 9337327d..8cf0a63c 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -38,7 +38,7 @@ def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: dtype=arr.dtype, owns_memory=False, ) - storage = dense_format.get_ctypes_type()(numpy_to_ranked_memref(arr)) + storage = dense_format._get_ctypes_type()(numpy_to_ranked_memref(arr)) _hold_ref(storage, arr) return Array(storage=storage, shape=shape) @@ -98,7 +98,7 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: indices = numpy_to_ranked_memref(indices_np) data = numpy_to_ranked_memref(data_np) - storage = csr_format.get_ctypes_type()(indptr, indices, data) + storage = csr_format._get_ctypes_type()(indptr, indices, data) _hold_ref(storage, indptr_np) _hold_ref(storage, indices_np) _hold_ref(storage, data_np) @@ -112,13 +112,13 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: crd_width = coords_np.dtype.itemsize * 8 data_np = arr.data.copy() - level_props = LevelProperties.NonUnique + level_props = LevelProperties(0) if not arr.has_canonical_format: - level_props |= LevelProperties.NonOrdered + level_props |= LevelProperties.NonOrdered | LevelProperties.NonUnique coo_format = get_storage_format( levels=( - Level(LevelFormat.Compressed, level_props), + Level(LevelFormat.Compressed, level_props | LevelProperties.NonUnique), Level(LevelFormat.Singleton, level_props), ), order=(0, 1), @@ -132,7 +132,7 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: crd = numpy_to_ranked_memref(coords_np) data = numpy_to_ranked_memref(data_np) - storage = coo_format.get_ctypes_type()(pos, crd, data) + storage = coo_format._get_ctypes_type()(pos, crd, data) _hold_ref(storage, pos_np) _hold_ref(storage, coords_np) _hold_ref(storage, data_np) diff --git a/sparse/mlir_backend/_dtypes.py b/sparse/mlir_backend/_dtypes.py index a854e609..30af4475 100644 --- a/sparse/mlir_backend/_dtypes.py +++ b/sparse/mlir_backend/_dtypes.py @@ -12,7 +12,7 @@ class MlirType(abc.ABC): @classmethod @abc.abstractmethod - def get_mlir_type(cls) -> ir.Type: ... + def _get_mlir_type(cls) -> ir.Type: ... def _get_pointer_width() -> int: @@ -30,7 +30,7 @@ class SignedBW(SignedIntegerDType): bit_width = bw @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IntegerType.get_signless(cls.bit_width) SignedBW.__name__ = f"Int{bw}" @@ -41,7 +41,7 @@ class UnsignedBW(UnsignedIntegerDType): bit_width = bw @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IntegerType.get_signless(cls.bit_width) UnsignedBW.__name__ = f"UInt{bw}" @@ -68,7 +68,7 @@ class Float64(FloatingDType): bit_width = 64 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F64Type.get() @@ -77,7 +77,7 @@ class Float32(FloatingDType): bit_width = 32 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F32Type.get() @@ -86,7 +86,7 @@ class Float16(FloatingDType): bit_width = 16 @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.F16Type.get() @@ -106,7 +106,7 @@ class Index(DType): np_dtype = np.intp @classmethod - def get_mlir_type(cls): + def _get_mlir_type(cls): return ir.IndexType.get() diff --git a/sparse/mlir_backend/_levels.py b/sparse/mlir_backend/_levels.py index 8cb14b9d..129fdb30 100644 --- a/sparse/mlir_backend/_levels.py +++ b/sparse/mlir_backend/_levels.py @@ -80,7 +80,7 @@ def __post_init__(self): raise ValueError(f"`sorted(self.order) != list(range(self.rank))`, `{self.order=}`, `{self.rank=}`.") @fn_cache - def get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: + def _get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: if len(shape) != self.rank: raise ValueError(f"`len(shape) != self.rank`, {shape=}, {self.rank=}") with ir.Location.unknown(ctx): @@ -90,7 +90,7 @@ def get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: for i, r in enumerate(mlir_order): mlir_reverse_order[r] = i - dtype = self.dtype.get_mlir_type() + dtype = self.dtype._get_mlir_type() encoding = sparse_tensor.EncodingAttr.get( mlir_levels, ir.AffineMap.get_permutation(mlir_order), @@ -101,7 +101,7 @@ def get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: return ir.RankedTensorType.get(list(shape), dtype, encoding) @fn_cache - def get_ctypes_type(self): + def _get_ctypes_type(self): ptr_dtype = asdtype(getattr(np, f"int{self.pos_width}")) idx_dtype = asdtype(getattr(np, f"int{self.crd_width}")) @@ -125,8 +125,8 @@ def get_fields(): class Storage(ctypes.Structure): _fields_ = get_fields() - def get_mlir_type(self, *, shape: tuple[int, ...]): - return self.get_storage_format().get_mlir_type(shape=shape) + def _get_mlir_type(self, *, shape: tuple[int, ...]): + return self.get_storage_format()._get_mlir_type(shape=shape) def to_module_arg(self) -> list: return [ctypes.pointer(ctypes.pointer(f)) for f in self.get__fields_()] @@ -155,6 +155,13 @@ def __del__(self) -> None: return Storage + def from_constituent_arrays(self, arrs: list[np.ndarray], shape: tuple[int, ...]): + from ._array import Array + + storage = self._get_ctypes_type().from_constituent_arrays(arrs) + + return Array(storage=storage, shape=shape) + def get_storage_format( *, diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 8869333e..37a13c5c 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -24,7 +24,7 @@ def get_add_module( module = ir.Module.create() # TODO: add support for complex dialect/dtypes arith_op = arith.AddFOp if issubclass(dtype, FloatingDType) else arith.AddIOp - dtype = dtype.get_mlir_type() + dtype = dtype._get_mlir_type() ordering = ir.AffineMap.get_permutation(range(rank)) with ir.InsertionPoint(module.body): @@ -122,8 +122,8 @@ def broadcast_to(in_tensor): def add(x1: Array, x2: Array) -> Array: ret_storage_format = dataclasses.replace(x1._get_storage_format(), owns_memory=True) - ret_storage = ret_storage_format.get_ctypes_type()() - out_tensor_type = ret_storage_format.get_mlir_type(shape=x1.shape) + ret_storage = ret_storage_format._get_ctypes_type()() + out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape) # TODO: Decide what will be the output tensor_type add_module = get_add_module( From e9fcedb144f7569b3da6e90935ef6e8463dd48d8 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:02:20 +0200 Subject: [PATCH 04/15] Adjust public API, tests, and format definition language. --- pixi.toml | 13 ++- sparse/mlir_backend/__init__.py | 9 +- sparse/mlir_backend/_array.py | 9 +- sparse/mlir_backend/_common.py | 15 ---- sparse/mlir_backend/_conversions.py | 9 +- sparse/mlir_backend/{_levels.py => levels.py} | 17 ++-- sparse/mlir_backend/tests/test_simple.py | 88 +++++++++++++------ 7 files changed, 98 insertions(+), 62 deletions(-) rename sparse/mlir_backend/{_levels.py => levels.py} (92%) diff --git a/pixi.toml b/pixi.toml index d54050dd..01360600 100644 --- a/pixi.toml +++ b/pixi.toml @@ -27,8 +27,8 @@ mkdocs-jupyter = "*" [feature.tests.tasks] test = "pytest --pyargs sparse -n auto" -test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -v", env = { SPARSE_BACKEND = "MLIR" } } -test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -v", env = { SPARSE_BACKEND = "Finch", PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" }, depends-on = ["precompile"] } +test-mlir = { cmd = "pytest --pyargs sparse/mlir_backend -v" } +test-finch = { cmd = "pytest --pyargs sparse/tests -n auto -v", depends-on = ["precompile"] } [feature.tests.dependencies] pytest = ">=3.5" @@ -51,10 +51,19 @@ precompile = "python -c 'import finch'" scipy = ">=0.19" finch-tensor = ">=0.1.31" +[feature.finch.activation.env] +SPARSE_BACKEND = "Finch" + +[feature.finch.target.osx-arm64.activation.env] +PYTHONFAULTHANDLER = "${HOME}/faulthandler.log" + [feature.mlir.dependencies] scipy = ">=0.19" mlir-python-bindings = "19.*" +[feature.mlir.activation.env] +SPARSE_BACKEND = "MLIR" + [environments] tests = ["tests", "extras"] docs = ["docs", "extras"] diff --git a/sparse/mlir_backend/__init__.py b/sparse/mlir_backend/__init__.py index ca7ce6ba..a81d1d04 100644 --- a/sparse/mlir_backend/__init__.py +++ b/sparse/mlir_backend/__init__.py @@ -1,5 +1,7 @@ try: import mlir # noqa: F401 + + del mlir except ModuleNotFoundError as e: raise ImportError( "MLIR Python bindings not installed. Run " @@ -7,10 +9,9 @@ "to enable MLIR backend." ) from e -from ._common import PackedArgumentTuple -from ._conversions import asarray, to_numpy, to_scipy +from . import levels +from ._conversions import asarray, from_constituent_arrays, to_numpy, to_scipy from ._dtypes import asdtype -from ._levels import StorageFormat from ._ops import add -__all__ = ["add", "asarray", "asdtype", "to_numpy", "to_scipy", "PackedArgumentTuple", "StorageFormat"] +__all__ = ["add", "asarray", "asdtype", "to_numpy", "to_scipy", "levels", "from_constituent_arrays"] diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index a20a2f55..dde81dcb 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -1,7 +1,9 @@ import dataclasses +import numpy as np + from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy -from ._levels import StorageFormat +from .levels import StorageFormat class Array: @@ -43,3 +45,8 @@ def copy(self): arr = Array(storage=storage_format._get_ctypes_type()(*memrefs), shape=self.shape) for carr in arrs: _hold_ref(arr, carr) + + return arr + + def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: + return self._storage.get_constituent_arrays() diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index df3bd4cd..d4147e08 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -1,7 +1,6 @@ import ctypes import functools import weakref -from dataclasses import dataclass import mlir.runtime as rt @@ -45,20 +44,6 @@ def free_memref(obj: ctypes.Structure) -> None: libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) -@dataclass -class PackedArgumentTuple: - contents: tuple - - def __getitem__(self, index): - return self.contents[index] - - def __iter__(self): - yield from self.contents - - def __len__(self): - return len(self.contents) - - def _hold_self_ref_in_ret(fn): @functools.wraps(fn) def wrapped(self, *a, **kw): diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 8cf0a63c..fdae7815 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -1,10 +1,11 @@ +import dataclasses import functools import numpy as np from ._array import Array from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy -from ._levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format +from .levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format try: import scipy.sparse as sps @@ -178,3 +179,9 @@ def asarray(arr, copy: bool | None = None) -> Array: return arr return _from_numpy(np.asarray(arr, copy=copy), copy=None) + + +def from_constituent_arrays(*, format: StorageFormat, arrays: tuple[np.ndarray, ...], shape: tuple[int, ...]) -> Array: + storage_format: StorageFormat = dataclasses.replace(format, owns_memory=False) + storage = storage_format._get_ctypes_type().from_constituent_arrays(arrays) + return Array(storage=storage, shape=shape) diff --git a/sparse/mlir_backend/_levels.py b/sparse/mlir_backend/levels.py similarity index 92% rename from sparse/mlir_backend/_levels.py rename to sparse/mlir_backend/levels.py index 129fdb30..04e14cd5 100644 --- a/sparse/mlir_backend/_levels.py +++ b/sparse/mlir_backend/levels.py @@ -11,7 +11,6 @@ import numpy as np from ._common import ( - PackedArgumentTuple, _hold_ref, fn_cache, free_memref, @@ -24,6 +23,8 @@ _CAMEL_TO_SNAKE = [re.compile("(.)([A-Z][a-z]+)"), re.compile("([a-z0-9])([A-Z])")] +__all__ = ["LevelProperties", "LevelFormat", "StorageFormat", "Level", "get_storage_format"] + def _camel_to_snake(name: str) -> str: for exp in _CAMEL_TO_SNAKE: @@ -134,8 +135,11 @@ def to_module_arg(self) -> list: def get__fields_(self) -> list: return [getattr(self, field[0]) for field in self._fields_] - def to_constituent_arrays(self) -> PackedArgumentTuple: - return PackedArgumentTuple(tuple(ranked_memref_to_numpy(field) for field in self.get__fields_())) + def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: + arrays = tuple(ranked_memref_to_numpy(field) for field in self.get__fields_()) + for arr in arrays: + _hold_ref(arr, self) + return arrays def get_storage_format(self) -> StorageFormat: return storage_format @@ -155,13 +159,6 @@ def __del__(self) -> None: return Storage - def from_constituent_arrays(self, arrs: list[np.ndarray], shape: tuple[int, ...]): - from ._array import Array - - storage = self._get_ctypes_type().from_constituent_arrays(arrs) - - return Array(storage=storage, shape=shape) - def get_storage_format( *, diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 605107c9..66869009 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -178,65 +178,95 @@ def test_add(rng, dtype): @parametrize_dtypes def test_csf_format(dtype): + format = sparse.levels.get_storage_format( + levels=( + sparse.levels.Level(sparse.levels.LevelFormat.Dense), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + sparse.levels.Level(sparse.levels.LevelFormat.Compressed), + ), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + owns_memory=False, + ) + SHAPE = (2, 2, 4) pos_1, crd_1, pos_2, crd_2, data = get_exampe_csf_arrays(dtype) - csf = [pos_1, crd_1, pos_2, crd_2, data] + constituent_arrays = (pos_1, crd_1, pos_2, crd_2, data) - csf_tensor = sparse.asarray(csf, shape=SHAPE, dtype=sparse.asdtype(dtype), format="csf") - result = csf_tensor.to_scipy_sparse() - for actual, expected in zip(result, csf, strict=False): + csf_array = sparse.from_constituent_arrays(format=format, arrays=constituent_arrays, shape=SHAPE) + result_arrays = csf_array.get_constituent_arrays() + for actual, expected in zip(result_arrays, constituent_arrays, strict=True): np.testing.assert_array_equal(actual, expected) - res_tensor = sparse.add(csf_tensor, csf_tensor).to_scipy_sparse() - csf_2 = [pos_1, crd_1, pos_2, crd_2, data * 2] - for actual, expected in zip(res_tensor, csf_2, strict=False): + res_arrays = sparse.add(csf_array, csf_array).get_constituent_arrays() + expected_arrays = (pos_1, crd_1, pos_2, crd_2, data * 2) + for actual, expected in zip(res_arrays, expected_arrays, strict=True): np.testing.assert_array_equal(actual, expected) @parametrize_dtypes def test_coo_3d_format(dtype): + format = sparse.levels.get_storage_format( + levels=( + sparse.levels.Level(sparse.levels.LevelFormat.Compressed, sparse.levels.LevelProperties.NonUnique), + sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.NonUnique), + sparse.levels.Level(sparse.levels.LevelFormat.Singleton, sparse.levels.LevelProperties.NonUnique), + ), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + owns_memory=False, + ) + SHAPE = (2, 2, 4) pos = np.array([0, 7]) crd = np.array([[0, 1, 0, 0, 1, 1, 0], [1, 3, 1, 0, 0, 1, 0], [3, 1, 1, 0, 1, 1, 1]]) data = np.array([1, 2, 3, 4, 5, 6, 7], dtype=dtype) - coo = [pos, crd, data] + carrs = (pos, crd, data) - coo_tensor = sparse.asarray(coo, shape=SHAPE, dtype=sparse.asdtype(dtype), format="coo") - result = coo_tensor.to_scipy_sparse() - for actual, expected in zip(result, coo, strict=False): + coo_array = sparse.from_constituent_arrays(format=format, arrays=carrs, shape=SHAPE) + result = coo_array.get_constituent_arrays() + for actual, expected in zip(result, carrs, strict=True): np.testing.assert_array_equal(actual, expected) # NOTE: Blocked by https://github.com/llvm/llvm-project/pull/109135 - # res_tensor = sparse.add(coo_tensor, coo_tensor).to_scipy_sparse() - # coo_2 = [pos, crd, data * 2] - # for actual, expected in zip(res_tensor, coo_2, strict=False): + # res_arrays = sparse.add(coo_array, coo_array).get_constituent_arrays() + # res_expected = (pos, crd, data * 2) + # for actual, expected in zip(res_arrays, res_expected, strict=False): # np.testing.assert_array_equal(actual, expected) @parametrize_dtypes def test_sparse_vector_format(dtype): + format = sparse.levels.get_storage_format( + levels=(sparse.levels.Level(sparse.levels.LevelFormat.Compressed),), + order="C", + pos_width=64, + crd_width=64, + dtype=sparse.asdtype(dtype), + owns_memory=False, + ) + SHAPE = (10,) pos = np.array([0, 6]) crd = np.array([0, 1, 2, 6, 8, 9]) data = np.array([1, 2, 3, 4, 5, 6], dtype=dtype) - sparse_vector = [pos, crd, data] + carrs = (pos, crd, data) - sv_tensor = sparse.asarray( - sparse_vector, - shape=SHAPE, - dtype=sparse.asdtype(dtype), - format="sparse_vector", - ) - result = sv_tensor.to_scipy_sparse() - for actual, expected in zip(result, sparse_vector, strict=False): + sv_array = sparse.from_constituent_arrays(format=format, arrays=carrs, shape=SHAPE) + result = sv_array.get_constituent_arrays() + for actual, expected in zip(result, carrs, strict=True): np.testing.assert_array_equal(actual, expected) - res_tensor = sparse.add(sv_tensor, sv_tensor).to_scipy_sparse() - sparse_vector_2 = [pos, crd, data * 2] - for actual, expected in zip(res_tensor, sparse_vector_2, strict=False): + res_arrs = sparse.add(sv_array, sv_array).get_constituent_arrays() + sv2_expected = (pos, crd, data * 2) + for actual, expected in zip(res_arrs, sv2_expected, strict=True): np.testing.assert_array_equal(actual, expected) dense = np.array([1, 2, 3, 0, 0, 0, 4, 0, 5, 6], dtype=dtype) - dense_tensor = sparse.asarray(dense) - res_tensor = sparse.add(dense_tensor, sv_tensor).to_scipy_sparse() - np.testing.assert_array_equal(res_tensor, dense * 2) + dense_array = sparse.asarray(dense) + res = sparse.to_numpy(sparse.add(dense_array, sv_array)) + np.testing.assert_array_equal(res, dense * 2) From 0027e00a5b6c27bbacd188dde84aa4c8e02134b9 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:17:05 +0200 Subject: [PATCH 05/15] Try to fix `numpy<2`. --- sparse/mlir_backend/_conversions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index fdae7815..5b838edd 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -29,7 +29,9 @@ def wrapped(*args, **kwargs): def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: shape = arr.shape - arr = np.asarray(arr, order="C", copy=copy).flatten() + arr_flat = np.asarray(arr, order="C").flatten() + if copy and arr_flat.base is arr: + arr_flat = arr_flat.copy() levels = (Level(LevelFormat.Dense),) * len(shape) dense_format = get_storage_format( levels=levels, @@ -39,8 +41,8 @@ def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: dtype=arr.dtype, owns_memory=False, ) - storage = dense_format._get_ctypes_type()(numpy_to_ranked_memref(arr)) - _hold_ref(storage, arr) + storage = dense_format._get_ctypes_type()(numpy_to_ranked_memref(arr_flat)) + _hold_ref(storage, arr_flat) return Array(storage=storage, shape=shape) From 09316d5d935f304c6de81ab8cf79413d34b62b21 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:38:01 +0200 Subject: [PATCH 06/15] Simplify `_from_scipy` function. --- sparse/mlir_backend/_conversions.py | 50 +++++++++++------------------ 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 5b838edd..dab1e130 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -71,7 +71,7 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: case "csr" | "csc": pos_width = arr.indptr.dtype.itemsize * 8 crd_width = arr.indices.dtype.itemsize * 8 - csr_format = get_storage_format( + csx_format = get_storage_format( levels=( Level(LevelFormat.Dense), Level( @@ -81,39 +81,33 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: else LevelProperties.NonUnique | LevelProperties.NonOrdered, ), ), - order=(0, 1) if "csr" in type(arr).__name__ else (1, 0), + order=(0, 1) if arr.format == "csr" else (1, 0), pos_width=pos_width, crd_width=crd_width, dtype=arr.dtype, owns_memory=False, ) - indptr_np = arr.indptr - indices_np = arr.indices - data_np = arr.data + indptr = arr.indptr + indices = arr.indices + data = arr.data if copy: - indptr_np = indptr_np.copy() - indices_np = indices_np.copy() - data_np = data_np.copy() - - indptr = numpy_to_ranked_memref(indptr_np) - indices = numpy_to_ranked_memref(indices_np) - data = numpy_to_ranked_memref(data_np) - - storage = csr_format._get_ctypes_type()(indptr, indices, data) - _hold_ref(storage, indptr_np) - _hold_ref(storage, indices_np) - _hold_ref(storage, data_np) - return Array(storage=storage, shape=arr.shape) + indptr = indptr.copy() + indices = indices.copy() + data = data.copy() + + return from_constituent_arrays(format=csx_format, arrays=(indptr, indices, data), shape=arr.shape) case "coo": if copy is not None and not copy: raise RuntimeError(f"`scipy.sparse.{type(arr.__name__)}` cannot be zero-copy converted.") - coords_np = np.stack([arr.row, arr.col], axis=1) - pos_np = np.array([0, arr.nnz], dtype=np.int64) - pos_width = pos_np.dtype.itemsize * 8 - crd_width = coords_np.dtype.itemsize * 8 - data_np = arr.data.copy() + coords = np.stack([arr.row, arr.col], axis=1) + pos = np.array([0, arr.nnz], dtype=np.int64) + pos_width = pos.dtype.itemsize * 8 + crd_width = coords.dtype.itemsize * 8 + data = arr.data + if copy: + data = arr.data.copy() level_props = LevelProperties(0) if not arr.has_canonical_format: @@ -131,15 +125,7 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: owns_memory=False, ) - pos = numpy_to_ranked_memref(pos_np) - crd = numpy_to_ranked_memref(coords_np) - data = numpy_to_ranked_memref(data_np) - - storage = coo_format._get_ctypes_type()(pos, crd, data) - _hold_ref(storage, pos_np) - _hold_ref(storage, coords_np) - _hold_ref(storage, data_np) - return Array(storage=storage, shape=arr.shape) + return from_constituent_arrays(format=coo_format, arrays=(pos, coords, data), shape=arr.shape) case _: raise NotImplementedError(f"No conversion implemented for `scipy.sparse.{type(arr.__name__)}`.") From 62b71c846b6c205460d2b0405a371a79f05106bf Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:59:02 +0200 Subject: [PATCH 07/15] Simplify `Array.copy`. --- sparse/mlir_backend/_array.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index dde81dcb..86323243 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -1,8 +1,5 @@ -import dataclasses - import numpy as np -from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy from .levels import StorageFormat @@ -37,16 +34,10 @@ def _to_module_arg(self): return self._storage.to_module_arg() def copy(self): - storage_format: StorageFormat = dataclasses.replace(self._get_storage_format(), owns_memory=False) - - fields = self._storage.get__fields_() - arrs = [ranked_memref_to_numpy(f).copy() for f in fields] - memrefs = [numpy_to_ranked_memref(arr) for arr in arrs] - arr = Array(storage=storage_format._get_ctypes_type()(*memrefs), shape=self.shape) - for carr in arrs: - _hold_ref(arr, carr) + from ._conversions import from_constituent_arrays - return arr + arrs = tuple(arr.copy() for arr in self.get_constituent_arrays()) + return from_constituent_arrays(format=self._get_storage_format(), arrays=arrs, shape=self.shape) def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: return self._storage.get_constituent_arrays() From 261c7d89000b219a2d460a9c2dce38181b4c9bbe Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:06:23 +0200 Subject: [PATCH 08/15] Test copy. --- sparse/mlir_backend/_common.py | 10 ---------- sparse/mlir_backend/tests/test_simple.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sparse/mlir_backend/_common.py b/sparse/mlir_backend/_common.py index d4147e08..eeeef8f8 100644 --- a/sparse/mlir_backend/_common.py +++ b/sparse/mlir_backend/_common.py @@ -44,16 +44,6 @@ def free_memref(obj: ctypes.Structure) -> None: libc.free(ctypes.cast(obj.allocated, ctypes.c_void_p)) -def _hold_self_ref_in_ret(fn): - @functools.wraps(fn) - def wrapped(self, *a, **kw): - ret = fn(self, *a, **kw) - _hold_ref(ret, self) - return ret - - return wrapped - - def _hold_ref(owner, obj): ptr = ctypes.py_object(obj) ctypes.pythonapi.Py_IncRef(ptr) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 66869009..b300a709 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -270,3 +270,15 @@ def test_sparse_vector_format(dtype): dense_array = sparse.asarray(dense) res = sparse.to_numpy(sparse.add(dense_array, sv_array)) np.testing.assert_array_equal(res, dense * 2) + + +def test_copy(): + arr_np_orig = np.arange(25).reshape((5, 5)) + arr_np_copy = arr_np_orig.copy() + + arr_sp1 = sparse.asarray(arr_np_copy, copy=True) + arr_sp2 = sparse.asarray(arr_np_copy, copy=False).copy() + arr_np_copy[2, 2] = 42 + + np.testing.assert_array_equal(sparse.to_numpy(arr_sp1), arr_np_orig) + np.testing.assert_array_equal(sparse.to_numpy(arr_sp2), arr_np_orig) From 0d48a5ebbc07e2088e6fd84e54bb3c57783b472b Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:08:49 +0200 Subject: [PATCH 09/15] Remove unsupported/untested code. --- sparse/mlir_backend/levels.py | 3 --- sparse/mlir_backend/tests/test_simple.py | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index 04e14cd5..fa2cafc5 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -126,9 +126,6 @@ def get_fields(): class Storage(ctypes.Structure): _fields_ = get_fields() - def _get_mlir_type(self, *, shape: tuple[int, ...]): - return self.get_storage_format()._get_mlir_type(shape=shape) - def to_module_arg(self) -> list: return [ctypes.pointer(ctypes.pointer(f)) for f in self.get__fields_()] diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index b300a709..86ddd0f2 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -278,7 +278,9 @@ def test_copy(): arr_sp1 = sparse.asarray(arr_np_copy, copy=True) arr_sp2 = sparse.asarray(arr_np_copy, copy=False).copy() + arr_sp3 = sparse.asarray(arr_np_copy, copy=False) arr_np_copy[2, 2] = 42 np.testing.assert_array_equal(sparse.to_numpy(arr_sp1), arr_np_orig) np.testing.assert_array_equal(sparse.to_numpy(arr_sp2), arr_np_orig) + np.testing.assert_array_equal(sparse.to_numpy(arr_sp3), arr_np_copy) From 1f7279711ad8c91222829eb60ba41acf6497e6b9 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:19:33 +0200 Subject: [PATCH 10/15] Fix bug with `copy=False`. --- sparse/mlir_backend/_conversions.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index dab1e130..9e8dba83 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -4,7 +4,7 @@ import numpy as np from ._array import Array -from ._common import _hold_ref, numpy_to_ranked_memref, ranked_memref_to_numpy +from ._common import _hold_ref, ranked_memref_to_numpy from .levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format try: @@ -28,11 +28,12 @@ def wrapped(*args, **kwargs): def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: - shape = arr.shape - arr_flat = np.asarray(arr, order="C").flatten() - if copy and arr_flat.base is arr: - arr_flat = arr_flat.copy() - levels = (Level(LevelFormat.Dense),) * len(shape) + if copy is not None and not copy and not arr.flags["C_CONTIGUOUS"]: + raise NotImplementedError("Cannot only convert C-contiguous arrays at the moment.") + if copy: + arr = arr.copy(order="C") + arr_flat = np.ascontiguousarray(arr).reshape(-1) + levels = (Level(LevelFormat.Dense),) * arr.ndim dense_format = get_storage_format( levels=levels, order="C", @@ -41,9 +42,7 @@ def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: dtype=arr.dtype, owns_memory=False, ) - storage = dense_format._get_ctypes_type()(numpy_to_ranked_memref(arr_flat)) - _hold_ref(storage, arr_flat) - return Array(storage=storage, shape=shape) + return from_constituent_arrays(format=dense_format, arrays=(arr_flat,), shape=arr.shape) def to_numpy(arr): From 63a5817f41630fcc115d3b6b739cf39bc9b8dbce Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:34:08 +0200 Subject: [PATCH 11/15] Refactor memory ownership out of the public API. --- sparse/mlir_backend/_conversions.py | 7 +------ sparse/mlir_backend/_ops.py | 5 ++--- sparse/mlir_backend/levels.py | 9 ++------- sparse/mlir_backend/tests/test_simple.py | 3 --- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 9e8dba83..8fe897eb 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -1,4 +1,3 @@ -import dataclasses import functools import numpy as np @@ -40,7 +39,6 @@ def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: pos_width=64, crd_width=64, dtype=arr.dtype, - owns_memory=False, ) return from_constituent_arrays(format=dense_format, arrays=(arr_flat,), shape=arr.shape) @@ -84,7 +82,6 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: pos_width=pos_width, crd_width=crd_width, dtype=arr.dtype, - owns_memory=False, ) indptr = arr.indptr @@ -121,7 +118,6 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: pos_width=pos_width, crd_width=crd_width, dtype=arr.dtype, - owns_memory=False, ) return from_constituent_arrays(format=coo_format, arrays=(pos, coords, data), shape=arr.shape) @@ -169,6 +165,5 @@ def asarray(arr, copy: bool | None = None) -> Array: def from_constituent_arrays(*, format: StorageFormat, arrays: tuple[np.ndarray, ...], shape: tuple[int, ...]) -> Array: - storage_format: StorageFormat = dataclasses.replace(format, owns_memory=False) - storage = storage_format._get_ctypes_type().from_constituent_arrays(arrays) + storage = format._get_ctypes_type().from_constituent_arrays(arrays) return Array(storage=storage, shape=shape) diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 37a13c5c..3fd195a8 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -1,5 +1,4 @@ import ctypes -import dataclasses import mlir.execution_engine import mlir.passmanager @@ -121,8 +120,8 @@ def broadcast_to(in_tensor): def add(x1: Array, x2: Array) -> Array: - ret_storage_format = dataclasses.replace(x1._get_storage_format(), owns_memory=True) - ret_storage = ret_storage_format._get_ctypes_type()() + ret_storage_format = x1._get_storage_format() + ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape) # TODO: Decide what will be the output tensor_type diff --git a/sparse/mlir_backend/levels.py b/sparse/mlir_backend/levels.py index fa2cafc5..91f0e9c9 100644 --- a/sparse/mlir_backend/levels.py +++ b/sparse/mlir_backend/levels.py @@ -66,7 +66,6 @@ class StorageFormat: pos_width: int crd_width: int dtype: type[DType] - owns_memory: bool @property def storage_rank(self) -> int: @@ -102,7 +101,7 @@ def _get_mlir_type(self, *, shape: tuple[int, ...]) -> ir.RankedTensorType: return ir.RankedTensorType.get(list(shape), dtype, encoding) @fn_cache - def _get_ctypes_type(self): + def _get_ctypes_type(self, *, owns_memory=False): ptr_dtype = asdtype(getattr(np, f"int{self.pos_width}")) idx_dtype = asdtype(getattr(np, f"int{self.crd_width}")) @@ -148,7 +147,7 @@ def from_constituent_arrays(cls, arrs: list[np.ndarray]) -> "Storage": _hold_ref(storage, arr) return storage - if storage_format.owns_memory: + if owns_memory: def __del__(self) -> None: for field in self.get__fields_(): @@ -164,7 +163,6 @@ def get_storage_format( pos_width: int, crd_width: int, dtype: type[DType], - owns_memory: bool, ) -> StorageFormat: levels = tuple(levels) if isinstance(order, str): @@ -178,7 +176,6 @@ def get_storage_format( pos_width=int(pos_width), crd_width=int(crd_width), dtype=asdtype(dtype), - owns_memory=bool(owns_memory), ) @@ -190,7 +187,6 @@ def _get_storage_format( pos_width: int, crd_width: int, dtype: type[DType], - owns_memory: bool, ) -> StorageFormat: return StorageFormat( levels=levels, @@ -198,5 +194,4 @@ def _get_storage_format( pos_width=pos_width, crd_width=crd_width, dtype=dtype, - owns_memory=owns_memory, ) diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 86ddd0f2..0adf1680 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -188,7 +188,6 @@ def test_csf_format(dtype): pos_width=64, crd_width=64, dtype=sparse.asdtype(dtype), - owns_memory=False, ) SHAPE = (2, 2, 4) @@ -218,7 +217,6 @@ def test_coo_3d_format(dtype): pos_width=64, crd_width=64, dtype=sparse.asdtype(dtype), - owns_memory=False, ) SHAPE = (2, 2, 4) @@ -247,7 +245,6 @@ def test_sparse_vector_format(dtype): pos_width=64, crd_width=64, dtype=sparse.asdtype(dtype), - owns_memory=False, ) SHAPE = (10,) From a5b692e3bf6d552dc299d94b7d7d571fbf46895a Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:39:59 +0200 Subject: [PATCH 12/15] Rename `Array._get_storage_format`. --- sparse/mlir_backend/_array.py | 7 ++++--- sparse/mlir_backend/_ops.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index 86323243..ca1dbd79 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -24,11 +24,12 @@ def ndim(self) -> int: def dtype(self): return self._storage.get_storage_format().dtype - def _get_storage_format(self) -> StorageFormat: + @property + def format(self) -> StorageFormat: return self._storage.get_storage_format() def _get_mlir_type(self): - return self._get_storage_format()._get_mlir_type(shape=self.shape) + return self.format._get_mlir_type(shape=self.shape) def _to_module_arg(self): return self._storage.to_module_arg() @@ -37,7 +38,7 @@ def copy(self): from ._conversions import from_constituent_arrays arrs = tuple(arr.copy() for arr in self.get_constituent_arrays()) - return from_constituent_arrays(format=self._get_storage_format(), arrays=arrs, shape=self.shape) + return from_constituent_arrays(format=self.format, arrays=arrs, shape=self.shape) def get_constituent_arrays(self) -> tuple[np.ndarray, ...]: return self._storage.get_constituent_arrays() diff --git a/sparse/mlir_backend/_ops.py b/sparse/mlir_backend/_ops.py index 3fd195a8..2fe9b975 100644 --- a/sparse/mlir_backend/_ops.py +++ b/sparse/mlir_backend/_ops.py @@ -120,7 +120,7 @@ def broadcast_to(in_tensor): def add(x1: Array, x2: Array) -> Array: - ret_storage_format = x1._get_storage_format() + ret_storage_format = x1.format ret_storage = ret_storage_format._get_ctypes_type(owns_memory=True)() out_tensor_type = ret_storage_format._get_mlir_type(shape=x1.shape) From 4ad635bb194b43c52f7d841a3f1a890f69e10263 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 12:16:25 +0200 Subject: [PATCH 13/15] Simplify and type hint `to_numpy` and `to_scipy`. --- sparse/mlir_backend/_conversions.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 8fe897eb..8913c49e 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -43,15 +43,14 @@ def _from_numpy(arr: np.ndarray, copy: bool | None = None) -> Array: return from_constituent_arrays(format=dense_format, arrays=(arr_flat,), shape=arr.shape) -def to_numpy(arr): - storage = arr._storage - storage_format: StorageFormat = storage.get_storage_format() +def to_numpy(arr: Array) -> np.ndarray: + storage_format: StorageFormat = arr.format if not all(LevelFormat.Dense == level.format for level in storage_format.levels): raise TypeError(f"Cannot convert a non-dense array to NumPy. `{storage_format=}`") data = ranked_memref_to_numpy(arr._storage.values) - _hold_ref(data, storage) + _hold_ref(data, arr._storage) arg_order = [0] * storage_format.storage_rank for i, o in enumerate(storage_format.order): arg_order[o] = i @@ -126,27 +125,23 @@ def _from_scipy(arr: ScipySparseArray, copy: bool | None = None) -> Array: @_guard_scipy -def to_scipy(arr) -> ScipySparseArray: - storage = arr._storage - storage_format: StorageFormat = storage.get_storage_format() +def to_scipy(arr: Array) -> ScipySparseArray: + storage_format = arr.format match storage_format.levels: case (Level(LevelFormat.Dense, _), Level(LevelFormat.Compressed, _)): - data = ranked_memref_to_numpy(storage.values) - indices = ranked_memref_to_numpy(storage.indices_1) - indptr = ranked_memref_to_numpy(storage.pointers_to_1) + indptr, indices, data = arr.get_constituent_arrays() if storage_format.order == (0, 1): sps_arr = sps.csr_array((data, indices, indptr), shape=arr.shape) else: sps_arr = sps.csc_array((data, indices, indptr), shape=arr.shape) case (Level(LevelFormat.Compressed, _), Level(LevelFormat.Singleton, _)): - data = ranked_memref_to_numpy(storage.values) - coords = ranked_memref_to_numpy(storage.indices_1) + _, coords, data = arr.get_constituent_arrays() sps_arr = sps.coo_array((data, (coords[:, 0], coords[:, 1])), shape=arr.shape) case _: raise RuntimeError(f"No conversion implemented for `{storage_format=}`.") - _hold_ref(sps_arr, storage) + _hold_ref(sps_arr, arr._storage) return sps_arr From 018c567b9d3d120d5f1ebcc3e5a378afb6e53b43 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:09:06 +0200 Subject: [PATCH 14/15] Simplify and type hint `to_numpy` and `to_scipy` even more. --- sparse/mlir_backend/_conversions.py | 5 +---- sparse/mlir_backend/tests/test_simple.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/sparse/mlir_backend/_conversions.py b/sparse/mlir_backend/_conversions.py index 8913c49e..ce47aaec 100644 --- a/sparse/mlir_backend/_conversions.py +++ b/sparse/mlir_backend/_conversions.py @@ -3,7 +3,6 @@ import numpy as np from ._array import Array -from ._common import _hold_ref, ranked_memref_to_numpy from .levels import Level, LevelFormat, LevelProperties, StorageFormat, get_storage_format try: @@ -49,8 +48,7 @@ def to_numpy(arr: Array) -> np.ndarray: if not all(LevelFormat.Dense == level.format for level in storage_format.levels): raise TypeError(f"Cannot convert a non-dense array to NumPy. `{storage_format=}`") - data = ranked_memref_to_numpy(arr._storage.values) - _hold_ref(data, arr._storage) + (data,) = arr.get_constituent_arrays() arg_order = [0] * storage_format.storage_rank for i, o in enumerate(storage_format.order): arg_order[o] = i @@ -141,7 +139,6 @@ def to_scipy(arr: Array) -> ScipySparseArray: case _: raise RuntimeError(f"No conversion implemented for `{storage_format=}`.") - _hold_ref(sps_arr, arr._storage) return sps_arr diff --git a/sparse/mlir_backend/tests/test_simple.py b/sparse/mlir_backend/tests/test_simple.py index 0adf1680..2fd06b3e 100644 --- a/sparse/mlir_backend/tests/test_simple.py +++ b/sparse/mlir_backend/tests/test_simple.py @@ -32,6 +32,7 @@ def assert_csx_equal( expected: sps.csr_array | sps.csc_array, actual: sps.csr_array | sps.csc_array, ) -> None: + assert expected.format == actual.format expected.eliminate_zeros() expected.sum_duplicates() From f7162f8b12d0d0fb8e7e316249356f04ebe52c11 Mon Sep 17 00:00:00 2001 From: Hameer Abbasi <2190658+hameerabbasi@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:10:04 +0200 Subject: [PATCH 15/15] Address review comments by @mtsokol. --- sparse/mlir_backend/_array.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sparse/mlir_backend/_array.py b/sparse/mlir_backend/_array.py index ca1dbd79..681833d8 100644 --- a/sparse/mlir_backend/_array.py +++ b/sparse/mlir_backend/_array.py @@ -1,5 +1,6 @@ import numpy as np +from ._dtypes import DType from .levels import StorageFormat @@ -21,7 +22,7 @@ def ndim(self) -> int: return len(self.shape) @property - def dtype(self): + def dtype(self) -> type[DType]: return self._storage.get_storage_format().dtype @property @@ -34,7 +35,7 @@ def _get_mlir_type(self): def _to_module_arg(self): return self._storage.to_module_arg() - def copy(self): + def copy(self) -> "Array": from ._conversions import from_constituent_arrays arrs = tuple(arr.copy() for arr in self.get_constituent_arrays())