ruff format and fixup extra/job_management

soxofaan · soxofaan · commit 47b1e5b8ece9 · 2025-09-09T16:50:23.000+02:00
diff --git a/openeo/extra/job_management/__init__.py b/openeo/extra/job_management/__init__.py
@@ -383,7 +383,6 @@ def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabas
         self._worker_pool = _JobManagerWorkerThreadPool()
 
         def run_loop():
-
             # TODO: support user-provided `stats`
             stats = collections.defaultdict(int)
 
@@ -726,7 +725,6 @@ def _process_threadworker_updates(
         job_db.persist(df_updates)
         stats["job_db persist"] = stats.get("job_db persist", 0) + 1
 
-
     def on_job_done(self, job: BatchJob, row):
         """
         Handles jobs that have finished. Can be overridden to provide custom behaviour.
@@ -862,10 +860,10 @@ def _track_statuses(self, job_db: JobDatabaseInterface, stats: Optional[dict] =
                     active.loc[i, "running_start_time"] = rfc3339.now_utc()
 
                 if self._cancel_running_job_after and new_status == "running":
-                    if  (not active.loc[i, "running_start_time"] or pd.isna(active.loc[i, "running_start_time"])):
+                    if not active.loc[i, "running_start_time"] or pd.isna(active.loc[i, "running_start_time"]):
                         _log.warning(
                             f"Unknown 'running_start_time' for running job {job_id}. Using current time as an approximation."
-                            )
+                        )
                         stats["job started running"] += 1
                         active.loc[i, "running_start_time"] = rfc3339.now_utc()
 
@@ -910,7 +908,6 @@ def ignore_connection_errors(context: Optional[str] = None, sleep: int = 5):
 
 
 class FullDataFrameJobDatabase(JobDatabaseInterface):
-
     def __init__(self):
         super().__init__()
         self._df = None
@@ -1272,7 +1269,7 @@ def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob:
                 # Skip optional parameters without any fallback default value
                 continue
             else:
-                raise ValueError(f"Missing required parameter {param_name !r} for process {process_id!r}")
+                raise ValueError(f"Missing required parameter {param_name!r} for process {process_id!r}")
 
             # Prepare some values/dtypes for JSON encoding
             if isinstance(value, numpy.integer):
diff --git a/openeo/extra/job_management/stac_job_db.py b/openeo/extra/job_management/stac_job_db.py
@@ -188,9 +188,7 @@ def get_by_status(self, statuses: Iterable[str], max: Optional[int] = None) -> p
 
         if df.shape[0] == 0:
             # TODO: What if default columns are overwritten by the user?
-            df = self._normalize_df(
-                df
-            )  # Even for an empty dataframe the default columns are required
+            df = self._normalize_df(df)  # Even for an empty dataframe the default columns are required
         return df
 
     def get_by_indices(self, indices: Iterable[Union[int, str]]) -> pd.DataFrame:
diff --git a/tests/extra/job_management/test_job_management.py b/tests/extra/job_management/test_job_management.py
@@ -6,7 +6,6 @@
 import logging
 import re
 import threading
-import time
 from pathlib import Path
 from time import sleep
 from typing import Union
@@ -109,7 +108,6 @@ def execute(self) -> _TaskResult:
 
 
 class TestMultiBackendJobManager:
-
     @pytest.fixture
     def job_manager_root_dir(self, tmp_path):
         return tmp_path / "job_mgr_root"
@@ -582,7 +580,6 @@ def start_job(row, connection_provider, connection, **kwargs):
                 12 * 60 * 60,
                 "finished",
             ),
-
         ],
     )
     def test_automatic_cancel_of_too_long_running_jobs(
@@ -672,30 +669,28 @@ def test_status_logging(self, tmp_path, job_manager, job_manager_root_dir, sleep
         needle = re.compile(r"Job status histogram:.*'finished': 5.*Run stats:.*'job_queued_for_start': 5")
         assert needle.search(caplog.text)
 
-
-
     @pytest.mark.parametrize(
-    ["create_time", "start_time", "running_start_time", "end_time", "end_status", "cancel_after_seconds"],
-    [
-        # Scenario 1: Missing running_start_time (None)
-        (
-            "2024-09-01T09:00:00Z",  # Job creation time
-            "2024-09-01T09:00:00Z",  # Job start time (should be 1 hour after create_time)
-            None,                     # Missing running_start_time
-            "2024-09-01T20:00:00Z",  # Job end time
-            "finished",               # Job final status
-            6 * 60 * 60,              # Cancel after 6 hours
-        ),
-        # Scenario 2: NaN running_start_time
-        (
-            "2024-09-01T09:00:00Z",
-            "2024-09-01T09:00:00Z",
-            float("nan"),             # NaN running_start_time
-            "2024-09-01T20:00:00Z",  # Job end time
-            "finished",               # Job final status
-            6 * 60 * 60,              # Cancel after 6 hours
-        ),
-    ]
+        ["create_time", "start_time", "running_start_time", "end_time", "end_status", "cancel_after_seconds"],
+        [
+            # Scenario 1: Missing running_start_time (None)
+            (
+                "2024-09-01T09:00:00Z",  # Job creation time
+                "2024-09-01T09:00:00Z",  # Job start time (should be 1 hour after create_time)
+                None,  # Missing running_start_time
+                "2024-09-01T20:00:00Z",  # Job end time
+                "finished",  # Job final status
+                6 * 60 * 60,  # Cancel after 6 hours
+            ),
+            # Scenario 2: NaN running_start_time
+            (
+                "2024-09-01T09:00:00Z",
+                "2024-09-01T09:00:00Z",
+                float("nan"),  # NaN running_start_time
+                "2024-09-01T20:00:00Z",  # Job end time
+                "finished",  # Job final status
+                6 * 60 * 60,  # Cancel after 6 hours
+            ),
+        ],
     )
     def test_ensure_running_start_time_is_datetime(
         self,
@@ -726,10 +721,12 @@ def get_status(job_id, current_status):
         job_manager.add_backend("foo", connection=dummy_backend_foo.connection)
 
         # Create a DataFrame representing the job database
-        df = pd.DataFrame({
-            "year": [2024],
-            "running_start_time": [running_start_time],  # Initial running_start_time
-        })
+        df = pd.DataFrame(
+            {
+                "year": [2024],
+                "running_start_time": [running_start_time],  # Initial running_start_time
+            }
+        )
 
         # Move the time machine to the job creation time
         time_machine.move_to(create_time)
@@ -871,6 +868,7 @@ def execute(self):
         assert any("Skipping invalid db_update" in msg for msg in caplog.messages)
         assert any("Skipping invalid stats_update" in msg for msg in caplog.messages)
 
+
 JOB_DB_DF_BASICS = pd.DataFrame(
     {
         "numbers": [3, 2, 1],
@@ -986,7 +984,6 @@ def test_count_by_status(self, tmp_path, db_class):
 
 
 class TestCsvJobDatabase:
-
     def test_repr(self, tmp_path):
         path = tmp_path / "db.csv"
         db = CsvJobDatabase(path)
@@ -1153,7 +1150,6 @@ def test_read_with_crs_column(self, tmp_path):
 
 
 class TestParquetJobDatabase:
-
     def test_repr(self, tmp_path):
         path = tmp_path / "db.pq"
         db = ParquetJobDatabase(path)
diff --git a/tests/extra/job_management/test_job_splitting.py b/tests/extra/job_management/test_job_splitting.py
@@ -10,6 +10,7 @@
 
 # TODO: using fixtures for these simple objects is a bit overkill, makes the test harder to follow, and undermines opportunity to parameterize
 
+
 @pytest.fixture
 def mock_polygon_wgs():
     return shapely.geometry.box(0.0, 0.0, 1.0, 1.0)
diff --git a/tests/extra/job_management/test_stac_job_db.py b/tests/extra/job_management/test_stac_job_db.py
@@ -105,7 +105,6 @@ def _pystac_item(
 
 class TestSTACAPIJobDatabase:
     def test_exists(self, job_db_exists, job_db_not_exists):
-
         assert job_db_exists.exists() == True
         assert job_db_not_exists.exists() == False
 
@@ -431,8 +430,6 @@ def post_bulk_items(request, context):
         job_db_exists.persist(bulk_dataframe)
         assert post_bulk_items_mock.called
 
-
-
     def test_persist_multiple_chunks(self, requests_mock, job_db_exists):
         rows = 12
         bulk_dataframe = pd.DataFrame(
@@ -572,11 +569,7 @@ def _post_collections_bulk_items(self, request, context):
     def _get_search(self, request, context):
         """Handler of `GET /search` requests."""
         collections = request.qs["collections"][0].split(",")
-        items = [
-            item
-            for cid in collections
-            for item in self.items.get(cid, {}).values()
-        ]
+        items = [item for cid in collections for item in self.items.get(cid, {}).values()]
         if "ids" in request.qs:
             [ids] = request.qs["ids"]
             ids = set(ids.split(","))
diff --git a/tests/extra/job_management/test_thread_worker.py b/tests/extra/job_management/test_thread_worker.py
@@ -5,7 +5,6 @@
 from typing import Iterator
 
 import pytest
-import requests
 
 from openeo.extra.job_management._thread_worker import (
     Task,
@@ -69,7 +68,7 @@ def test_start_failure(self, dummy_backend, caplog):
         )
         assert job.status() == "error"
         assert caplog.messages == [
-            "Failed to start job 'job-000': OpenEoApiError('[500] Internal: No job starting " "for you, buddy')"
+            "Failed to start job 'job-000': OpenEoApiError('[500] Internal: No job starting for you, buddy')"
         ]
 
     @pytest.mark.parametrize("serializer", [repr, str])

Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,6 @@`
`5`	`5`	`from typing import Iterator`
`6`	`6`
`7`	`7`	`import pytest`
`8`		`-import requests`
`9`	`8`
`10`	`9`	`from openeo.extra.job_management._thread_worker import (`
`11`	`10`	`Task,`
`@@ -69,7 +68,7 @@ def test_start_failure(self, dummy_backend, caplog):`
`69`	`68`	`)`
`70`	`69`	`assert job.status() == "error"`
`71`	`70`	`assert caplog.messages == [`
`72`		`- "Failed to start job 'job-000': OpenEoApiError('[500] Internal: No job starting " "for you, buddy')"`
	`71`	`+ "Failed to start job 'job-000': OpenEoApiError('[500] Internal: No job starting for you, buddy')"`
`73`	`72`	`]`
`74`	`73`
`75`	`74`	`@pytest.mark.parametrize("serializer", [repr, str])`