Skip to content

Commit e63c28f

Browse files
committed
TestDataLoader: load_text and more powerful preprocess
1 parent 948f928 commit e63c28f

File tree

6 files changed

+184
-34
lines changed

6 files changed

+184
-34
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- `openeo.testing.io.TestDataLoader`: unit test utility to compactly load (and optionally preprocess) tests data (text/JSON/...)
13+
1214
### Changed
1315

1416
- `DataCube.apply_dimension()`: not explicitly specifying the `dimension` argument is deprecated and will trigger warnings ([#774](https://github.com/Open-EO/openeo-python-client/issues/774))

openeo/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.42.0a1"
1+
__version__ = "0.42.0a2"

openeo/testing/__init__.py

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,6 @@
22
Utilities for testing of openEO client workflows.
33
"""
44

5-
import json
6-
from pathlib import Path
7-
from typing import Callable, Optional, Union
85

9-
10-
class TestDataLoader:
11-
"""
12-
Helper to resolve paths to test data files, load them as JSON, optionally preprocess them, etc.
13-
14-
It's intended to be used as a pytest fixture, e.g. from ``conftest.py``:
15-
16-
.. code-block:: python
17-
18-
@pytest.fixture
19-
def test_data() -> TestDataLoader:
20-
return TestDataLoader(root=Path(__file__).parent / "data")
21-
22-
.. versionadded:: 0.30.0
23-
"""
24-
25-
def __init__(self, root: Union[str, Path]):
26-
self.data_root = Path(root)
27-
28-
def get_path(self, filename: Union[str, Path]) -> Path:
29-
"""Get absolute path to a test data file"""
30-
return self.data_root / filename
31-
32-
def load_json(self, filename: Union[str, Path], preprocess: Optional[Callable[[str], str]] = None) -> dict:
33-
"""Parse data from a test JSON file"""
34-
data = self.get_path(filename).read_text(encoding="utf8")
35-
if preprocess:
36-
data = preprocess(data)
37-
return json.loads(data)
6+
# Legacy import for backwards compatibility
7+
from openeo.testing.io import TestDataLoader

openeo/testing/io.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import json
2+
import re
3+
from pathlib import Path
4+
from typing import Callable, Optional, Union
5+
6+
7+
class TestDataLoader:
8+
"""
9+
Test data helper: easily resolve paths to test data files,
10+
load them as bytes, text, JSON,
11+
optionally preprocess them, etc.
12+
13+
It's intended to be used as a pytest fixture, e.g. from ``conftest.py``:
14+
15+
.. code-block:: python
16+
17+
@pytest.fixture
18+
def test_data() -> TestDataLoader:
19+
return TestDataLoader(root=Path(__file__).parent / "data")
20+
21+
.. versionadded:: 0.30.0
22+
23+
.. versionchanged:: 0.42.0
24+
Moved to ``openeo.testing.io``.
25+
Added ``load_bytes()`` and ``load_text()``.
26+
Improved ``preprocess``: can now be a replacement dict (with regex support).
27+
"""
28+
29+
def __init__(self, root: Union[str, Path]):
30+
self.data_root = Path(root)
31+
32+
def get_path(self, filename: Union[str, Path]) -> Path:
33+
"""Get absolute path to a test data file"""
34+
return self.data_root / filename
35+
36+
def load_bytes(self, filename: Union[str, Path]) -> bytes:
37+
return self.get_path(filename).read_bytes()
38+
39+
def _get_preprocess(self, preprocess: Union[None, dict, Callable[[str], str]]) -> Callable[[str], str]:
40+
"""Normalize preprocess argument to a callable"""
41+
if preprocess is None:
42+
return lambda x: x
43+
elif isinstance(preprocess, dict):
44+
45+
def replace(text: str) -> str:
46+
for key, value in preprocess.items():
47+
if isinstance(key, re.Pattern):
48+
text = key.sub(value, text)
49+
elif isinstance(key, str):
50+
text = text.replace(key, value)
51+
else:
52+
raise ValueError(key)
53+
return text
54+
55+
return replace
56+
else:
57+
return preprocess
58+
59+
def load_text(
60+
self,
61+
filename: Union[str, Path],
62+
*,
63+
preprocess: Union[None, dict, Callable[[str], str]] = None,
64+
encoding: str = "utf8",
65+
) -> str:
66+
"""
67+
Load text file, optionally with some text based preprocessing
68+
69+
:param filename: Path to the file relative to the test data root
70+
:param preprocess: Optional preprocessing to do on the text, given as
71+
72+
- Callable that takes a string and returns a string
73+
- Dictionary mapping needles to replacements.
74+
Needle can be a simple string that will be replaced with the replacement value,
75+
or it can be a ``re.Pattern`` that will be used in ``re.sub()`` style
76+
(which supports group references, e.g. "\1" for first group in match)
77+
:param encoding: Encoding to use when reading the file
78+
"""
79+
text = self.get_path(filename).read_text(encoding=encoding)
80+
text = self._get_preprocess(preprocess)(text)
81+
return text
82+
83+
def load_json(
84+
self,
85+
filename: Union[str, Path],
86+
*,
87+
preprocess: Union[None, dict, Callable[[str], str]] = None,
88+
) -> dict:
89+
"""
90+
Load data from a JSON file, optionally with some text based preprocessing
91+
92+
:param filename: Path to the file relative to the test data root
93+
:param preprocess: Optional preprocessing to do on the text, given as
94+
95+
- Callable that takes a string and returns a string
96+
- Dictionary mapping needles to replacements.
97+
Needle can be a simple string that will be replaced with the replacement value,
98+
or it can be a ``re.Pattern`` that will be used in ``re.sub()`` style
99+
(which supports group references, e.g. "\1" for first group in match)
100+
"""
101+
raw = self.get_path(filename).read_text(encoding="utf8")
102+
raw = self._get_preprocess(preprocess)(raw)
103+
return json.loads(raw)

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
)
1010

1111

12-
from openeo.testing import TestDataLoader
12+
from openeo.testing.io import TestDataLoader
1313
from openeo.util import ensure_dir
1414

1515
pytest_plugins = "pytester"

tests/testing/test_io.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import json
2+
import re
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from openeo.testing.io import TestDataLoader
8+
9+
10+
class TestTestDataLoader:
11+
def test_get_path(self, tmp_path):
12+
path = tmp_path / "hello" / "world.txt"
13+
loader = TestDataLoader(root=tmp_path)
14+
assert loader.get_path("hello/world.txt") == path
15+
assert loader.get_path(Path("hello/world.txt")) == path
16+
17+
def test_load_bytes(self, tmp_path):
18+
path = tmp_path / "hello" / "world.txt"
19+
path.parent.mkdir(parents=True)
20+
path.write_bytes(b"Hello W\x00rld")
21+
22+
loader = TestDataLoader(root=tmp_path)
23+
assert loader.load_bytes("hello/world.txt") == b"Hello W\x00rld"
24+
25+
@pytest.mark.parametrize(
26+
["preprocess", "expected"],
27+
[
28+
(None, "Hello, World!"),
29+
(lambda s: s.lower(), "hello, world!"),
30+
(lambda s: s.replace("World", "Earth"), "Hello, Earth!"),
31+
({"World": "Earth"}, "Hello, Earth!"),
32+
({"Hello": "Greetings", "World": "Terra"}, "Greetings, Terra!"),
33+
({re.compile("l+"): "|_"}, "He|_o, Wor|_d!"),
34+
({re.compile("([A-Z])"): r"\1\1\1"}, "HHHello, WWWorld!"),
35+
],
36+
)
37+
def test_load_text(self, tmp_path, preprocess, expected):
38+
(tmp_path / "hello.txt").write_text("Hello, World!", encoding="utf8")
39+
40+
loader = TestDataLoader(root=tmp_path)
41+
assert loader.load_text("hello.txt", preprocess=preprocess) == expected
42+
43+
@pytest.mark.parametrize(
44+
["preprocess", "expected"],
45+
[
46+
(None, {"salutation": "Hello", "target": "World"}),
47+
(lambda s: s.upper(), {"SALUTATION": "HELLO", "TARGET": "WORLD"}),
48+
(
49+
lambda s: s.replace("World", "Terra"),
50+
{"salutation": "Hello", "target": "Terra"},
51+
),
52+
(
53+
lambda s: s.replace('"World"', '["Terra","Earth"]'),
54+
{"salutation": "Hello", "target": ["Terra", "Earth"]},
55+
),
56+
(
57+
{"World": "Earth", "salutation": "say", "target": "to"},
58+
{"say": "Hello", "to": "Earth"},
59+
),
60+
(
61+
{"Hello": "Greetings", '"World"': '["Terra","Earth"]'},
62+
{"salutation": "Greetings", "target": ["Terra", "Earth"]},
63+
),
64+
(
65+
{re.compile("([aeoiu])"): r"\1\1\1"},
66+
{"saaaluuutaaatiiiooon": "Heeellooo", "taaargeeet": "Wooorld"},
67+
),
68+
],
69+
)
70+
def test_load_json(self, tmp_path, preprocess, expected):
71+
with (tmp_path / "data.json").open("w", encoding="utf8") as f:
72+
json.dump({"salutation": "Hello", "target": "World"}, f)
73+
74+
loader = TestDataLoader(root=tmp_path)
75+
assert loader.load_json("data.json", preprocess=preprocess) == expected

0 commit comments

Comments
 (0)