PR #736 various tweaks based on review notes

soxofaan · soxofaan · commit af55fd68312c · 2025-09-09T13:55:16.000+02:00
- split `Task` hierarchy for better separation of concerns
- more tests for some basic classes
- clean up unused fixtures from tests
- DummyBackend: add setup_job_start_failure
diff --git a/openeo/extra/job_management/_thread_worker.py b/openeo/extra/job_management/_thread_worker.py
@@ -1,3 +1,7 @@
+"""
+Internal utilities to handle job management tasks through threads.
+"""
+
 import concurrent.futures
 import logging
 from abc import ABC, abstractmethod
@@ -9,7 +13,7 @@
 _log = logging.getLogger(__name__)
 
 
-@dataclass
+@dataclass(frozen=True)
 class _TaskResult:
     """
     Container for the result of a task execution.
@@ -32,92 +36,86 @@ class _TaskResult:
     stats_update: Dict[str, int] = field(default_factory=dict)  # Optional
 
 
+@dataclass(frozen=True)
 class Task(ABC):
     """
-    Abstract base class for asynchronous tasks.
+    Abstract base class for a unit of work associated with a job (identified by a job id)
+    and to be processed by :py:classs:`_JobManagerWorkerThreadPool`.
+
+    Because the work is intended to be executed in a thread/process pool,
+    it is recommended to keep the state of the task object as simple/immutable as possible
+    (e.g. just some string/number attributes) and avoid sharing complex objects and state.
 
-    A task encapsulates a unit of work, typically executed asynchronously,
-    and returns a `_TaskResult` with job-related metadata and updates.
+    The main API for subclasses to implement is the `execute`method
+    which should return a :py:class:`_TaskResult` object.
+    with job-related metadata and updates.
 
-    Implementations must override the `execute` method to define the task logic.
+    :param job_id:
+        Identifier of the job to start on the backend.
     """
 
+    # TODO: strictly speaking, a job id does not unambiguously identify a job when multiple backends are in play.
+    job_id: str
+
     @abstractmethod
     def execute(self) -> _TaskResult:
         """Execute the task and return a raw result"""
         pass
 
 
-@dataclass
-class _JobStartTask(Task):
+@dataclass(frozen=True)
+class ConnectedTask(Task):
     """
-    Task for starting a backend job asynchronously.
+    Base class for tasks that involve an (authenticated) connection to a backend.
 
-    Connects to an OpenEO backend using the provided URL and optional token,
-    retrieves the specified job, and attempts to start it.
-
-    Usage example:
-
-    .. code-block:: python
-
-        task = _JobStartTask(
-            job_id="1234",
-            root_url="https://openeo.test",
-            bearer_token="secret"
-        )
-        result = task.execute()
-
-    :param job_id:
-        Identifier of the job to start on the backend.
+    Backend is specified by a root URL,
+    and (optional) authentication is done through an openEO-style bearer token.
 
     :param root_url:
         The root URL of the OpenEO backend to connect to.
 
     :param bearer_token:
         Optional Bearer token used for authentication.
 
-    :raises ValueError:
-        If any of the input parameters are invalid (e.g., empty strings).
     """
 
-    job_id: str
     root_url: str
     bearer_token: Optional[str]
 
-    def __post_init__(self) -> None:
-        # Validation remains unchanged
-        if not isinstance(self.root_url, str) or not self.root_url.strip():
-            raise ValueError(f"root_url must be a non-empty string, got {self.root_url!r}")
-        if self.bearer_token is not None and (not isinstance(self.bearer_token, str) or not self.bearer_token.strip()):
-            raise ValueError(f"bearer_token must be a non-empty string or None, got {self.bearer_token!r}")
-        if not isinstance(self.job_id, str) or not self.job_id.strip():
-            raise ValueError(f"job_id must be a non-empty string, got {self.job_id!r}")
+    def get_connection(self) -> openeo.Connection:
+        connection = openeo.connect(self.root_url)
+        if self.bearer_token:
+            connection.authenticate_bearer_token(self.bearer_token)
+        return connection
+
+
+class _JobStartTask(ConnectedTask):
+    """
+    Task for starting an openEO batch job (the `POST /jobs/<job_id>/result` request).
+    """
 
     def execute(self) -> _TaskResult:
         """
-        Executes the job start process using the OpenEO connection.
-
-        Authenticates if a bearer token is provided, retrieves the job by ID,
-        and attempts to start it.
+        Start job identified by `job_id` on the backend.
 
         :returns:
             A `_TaskResult` with status and statistics metadata, indicating
             success or failure of the job start.
         """
+        # TODO: move main try-except block to base class?
         try:
-            conn = openeo.connect(self.root_url)
-            if self.bearer_token:
-                conn.authenticate_bearer_token(self.bearer_token)
-            job = conn.job(self.job_id)
+            job = self.get_connection().job(self.job_id)
+            # TODO: only start when status is "queued"?
             job.start()
-            _log.info(f"Job {self.job_id} started successfully")
+            _log.info(f"Job {self.job_id!r} started successfully")
             return _TaskResult(
                 job_id=self.job_id,
                 db_update={"status": "queued"},
                 stats_update={"job start": 1},
             )
         except Exception as e:
-            _log.error(f"Failed to start job {self.job_id}: {e}")
+            _log.error(f"Failed to start job {self.job_id!r}: {e!r}")
+            # TODO: more insights about the failure (e.g. the exception) are just logged, but lost from the result
             return _TaskResult(
                 job_id=self.job_id, db_update={"status": "start_failed"}, stats_update={"start_job error": 1}
             )
@@ -175,13 +173,13 @@ def process_futures(self) -> List[_TaskResult]:
             if future in done:
                 try:
                     result = future.result()
-
                 except Exception as e:
-                    _log.exception(f"Error processing task: {e}")
+                    _log.exception(f"Failed to get result from future: {e}")
                     result = _TaskResult(
-                        job_id=task.job_id, db_update={"status": "start_failed"}, stats_update={"start_job error": 1}
+                        job_id=task.job_id,
+                        db_update={"status": "future.result() failed"},
+                        stats_update={"future.result() error": 1},
                     )
-
                 results.append(result)
             else:
                 to_keep.append((future, task))
diff --git a/openeo/rest/_testing.py b/openeo/rest/_testing.py
@@ -48,6 +48,7 @@ class DummyBackend:
         "next_result",
         "next_validation_errors",
         "_forced_job_status",
+        "_fail_on_job_start",
         "job_status_updater",
         "job_id_generator",
         "extra_job_metadata_fields",
@@ -73,6 +74,7 @@ def __init__(
         self.next_validation_errors = []
         self.extra_job_metadata_fields = []
         self._forced_job_status: Dict[str, str] = {}
+        self._fail_on_job_start = {}
 
         # Job status update hook:
         #   callable that is called on starting a job, and getting job metadata
@@ -221,24 +223,51 @@ def _get_job_id(self, request) -> str:
         assert job_id in self.batch_jobs
         return job_id
 
+    def _set_job_status(self, job_id: str, status: str):
+        """Forced override of job status (e.g. for "canceled" or "error")"""
+        self.batch_jobs[job_id]["status"] = self._forced_job_status[job_id] = status
+
     def _get_job_status(self, job_id: str, current_status: str) -> str:
         if job_id in self._forced_job_status:
             return self._forced_job_status[job_id]
         return self.job_status_updater(job_id=job_id, current_status=current_status)
 
+    def setup_job_start_failure(
+        self,
+        *,
+        job_id: Union[str, None] = None,
+        status_code: int = 500,
+        response_body: Union[None, str, dict] = None,
+    ):
+        """
+        Setup for failure when starting a job.
+        :param job_id: job id to fail on, or None (wildcard) for all jobs
+        """
+        if response_body is None:
+            response_body = {"code": "Internal", "message": "No job starting for you, buddy"}
+        if not isinstance(response_body, bytes):
+            response_body = json.dumps(response_body).encode("utf-8")
+        self._fail_on_job_start[job_id] = {"status_code": status_code, "response_body": response_body}
+
     def _handle_post_job_results(self, request, context):
         """Handler of `POST /job/{job_id}/results` (start batch job)."""
         job_id = self._get_job_id(request)
         assert self.batch_jobs[job_id]["status"] == "created"
-        self.batch_jobs[job_id]["status"] = self._get_job_status(
-            job_id=job_id, current_status=self.batch_jobs[job_id]["status"]
-        )
-        context.status_code = HTTP_202_ACCEPTED
+        failure = self._fail_on_job_start.get(job_id) or self._fail_on_job_start.get(None)
+        if not failure:
+            self.batch_jobs[job_id]["status"] = self._get_job_status(
+                job_id=job_id, current_status=self.batch_jobs[job_id]["status"]
+            )
+            context.status_code = HTTP_202_ACCEPTED
+        else:
+            self._set_job_status(job_id=job_id, status="error")
+            context.status_code = failure["status_code"]
+            return failure["response_body"]
 
     def _handle_get_job(self, request, context):
         """Handler of `GET /job/{job_id}` (get batch job status and metadata)."""
         job_id = self._get_job_id(request)
-        # Allow updating status with `job_status_setter` once job got past status "created"
+        # Allow updating status with `job_status_updater` once job got past status "created"
         if self.batch_jobs[job_id]["status"] != "created":
             self.batch_jobs[job_id]["status"] = self._get_job_status(
                 job_id=job_id, current_status=self.batch_jobs[job_id]["status"]
@@ -269,8 +298,7 @@ def _handle_get_job_results(self, request, context):
     def _handle_delete_job_results(self, request, context):
         """Handler of `DELETE /job/{job_id}/results` (cancel job)."""
         job_id = self._get_job_id(request)
-        self.batch_jobs[job_id]["status"] = "canceled"
-        self._forced_job_status[job_id] = "canceled"
+        self._set_job_status(job_id=job_id, status="canceled")
         context.status_code = HTTP_204_NO_CONTENT
 
     def _handle_get_job_result_asset(self, request, context):
diff --git a/tests/extra/job_management/test_job_management.py b/tests/extra/job_management/test_job_management.py
@@ -94,7 +94,7 @@ class DummyTask(Task):
     """
 
     def __init__(self, job_id, db_update, stats_update, delay=0.0):
-        self.job_id = job_id
+        super().__init__(job_id=job_id)
         self._db_update = db_update or {}
         self._stats_update = stats_update or {}
         self._delay = delay
diff --git a/tests/extra/job_management/test_thread_worker.py b/tests/extra/job_management/test_thread_worker.py
@@ -1,17 +1,65 @@
+import logging
 import time
 
-import pandas as pd
 import pytest
 import requests
 
-# Import the refactored classes and helper functions from your codebase.
-# Adjust the import paths as needed.
 from openeo.extra.job_management._thread_worker import (
     _JobManagerWorkerThreadPool,
     _JobStartTask,
+    _TaskResult,
 )
+from openeo.rest._testing import DummyBackend
 
-# --- Fixtures and Helpers ---
+
+@pytest.fixture
+def dummy_backend(requests_mock) -> DummyBackend:
+    dummy = DummyBackend.at_url("https://foo.test", requests_mock=requests_mock)
+    dummy.setup_simple_job_status_flow(queued=3, running=5)
+    return dummy
+
+
+class TestTaskResult:
+    def test_default(self):
+        result = _TaskResult(job_id="j-123")
+        assert result.job_id == "j-123"
+        assert result.db_update == {}
+        assert result.stats_update == {}
+
+
+class TestJobStartTask:
+    def test_start_success(self, dummy_backend, caplog):
+        caplog.set_level(logging.WARNING)
+        job = dummy_backend.connection.create_job(process_graph={})
+
+        task = _JobStartTask(job_id=job.job_id, root_url=dummy_backend.connection.root_url, bearer_token="h4ll0")
+        result = task.execute()
+
+        assert result == _TaskResult(
+            job_id="job-000",
+            db_update={"status": "queued"},
+            stats_update={"job start": 1},
+        )
+        assert job.status() == "queued"
+        assert caplog.messages == []
+
+    def test_start_failure(self, dummy_backend, caplog):
+        caplog.set_level(logging.WARNING)
+        job = dummy_backend.connection.create_job(process_graph={})
+        dummy_backend.setup_job_start_failure()
+
+        task = _JobStartTask(job_id=job.job_id, root_url=dummy_backend.connection.root_url, bearer_token="h4ll0")
+        result = task.execute()
+
+        assert result == _TaskResult(
+            job_id="job-000",
+            db_update={"status": "start_failed"},
+            stats_update={"start_job error": 1},
+        )
+        assert job.status() == "error"
+        assert caplog.messages == [
+            "Failed to start job 'job-000': OpenEoApiError('[500] Internal: No job starting " "for you, buddy')"
+        ]
 
 
 @pytest.fixture
@@ -22,23 +70,6 @@ def worker_pool():
     pool.shutdown()
 
 
-@pytest.fixture
-def sample_dataframe():
-    """Creates a pandas DataFrame for job tracking."""
-    df = pd.DataFrame(
-        [
-            {"id": "job-123", "status": "queued_for_start", "other_field": "foo"},
-            {"id": "job-456", "status": "queued_for_start", "other_field": "bar"},
-            {"id": "job-789", "status": "other", "other_field": "baz"},
-        ]
-    )
-    return df
-
-
-@pytest.fixture
-def initial_stats():
-    """Returns a dictionary with initial stats counters."""
-    return {"job start": 0, "job start failed": 0}
 
 
 @pytest.fixture
@@ -47,6 +78,7 @@ def successful_backend_mock(requests_mock):
     Returns a helper to set up a successful backend.
     Mocks a version check, job start, and job status check.
     """
+    # TODO: use DummyBackend here instead?
 
     def _setup(root_url: str, job_id: str, status: str = "queued"):
         # Backend version check
@@ -67,7 +99,6 @@ def valid_task():
     return _JobStartTask(root_url="https://foo.test", bearer_token="test-token", job_id="test-job-123")
 
 
-import time
 
 
 def wait_for_results(worker_pool, timeout=3.0, interval=0.1):
@@ -85,7 +116,6 @@ def wait_for_results(worker_pool, timeout=3.0, interval=0.1):
     raise TimeoutError(f"Timed out after {timeout}s waiting for worker pool results.")
 
 
-# --- Tests for the Worker Thread Pool and Futures Postprocessing ---
 
 
 class TestJobManagerWorkerThreadPool:
diff --git a/tests/rest/test_testing.py b/tests/rest/test_testing.py
@@ -1,5 +1,8 @@
+import re
+
 import pytest
 
+from openeo.rest import OpenEoApiError
 from openeo.rest._testing import DummyBackend
 
 
@@ -94,3 +97,10 @@ def test_setup_simple_job_status_flow_final_per_job(self, dummy_backend, con120)
             assert job0.status() == "finished"
             assert job1.status() == "error"
             assert job2.status() == "finished"
+
+    def test_setup_job_start_failure(self, dummy_backend):
+        job = dummy_backend.connection.create_job(process_graph={})
+        dummy_backend.setup_job_start_failure()
+        with pytest.raises(OpenEoApiError, match=re.escape("[500] Internal: No job starting for you, buddy")):
+            job.start()
+        assert job.status() == "error"