Fix/tfm predict after train from ds (#2860)

dennisbader · web-flow · commit 67ea666b021c · 2025-07-31T16:28:07.000+02:00
* fix tfm predict methods after being trained with fit_from_dataset using static covariates

* update changelog

* fix failing test
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - Fixed a bug in `LightGBMModel` and `CatBoostModel` when using component-specific lags and categorical features, where certain lag scenarios could result in incorrect categorical feature declaration. [#2852](https://github.com/unit8co/darts/pull/2852) by [Dennis Bader](https://github.com/dennisbader).
 - Fixed a bug in `darts.utils.timeseries_generation.sine_timeseries()`, where the returned series ignored the specified `dtype`. [#2856](https://github.com/unit8co/darts/pull/2856) by [Dennis Bader](https://github.com/dennisbader).
 - Fixed a bug in `TimeSeries.__getitem__()`, where indexing with a list of integers of `length <= 2` resulted in an error. [#2857](https://github.com/unit8co/darts/pull/2857) by [Dennis Bader](https://github.com/dennisbader).
+- Fixed a bug in `TorchForecastingModel` which raised an error when calling any predict method after training the model with `fit_from_dataset()` on a dataset that uses static covariates. [#2860](https://github.com/unit8co/darts/pull/2860) by [Dennis Bader](https://github.com/dennisbader).
 - Removed `darts/tests` and `examples` from the Darts package distribution. These are only required for internal testing. [#2854](https://github.com/unit8co/darts/pull/2854) by [Dennis Bader](https://github.com/dennisbader).
 
 **Dependencies**
diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py
@@ -737,8 +737,17 @@ def _update_covariates_use(self):
         _, past_cov, historic_future_cov, future_cov, static_cov, _ = self.train_sample
 
         self._uses_past_covariates = past_cov is not None
+        self._expect_past_covariates = (
+            self.uses_past_covariates and self.past_covariate_series is None
+        )
         self._uses_future_covariates = future_cov is not None
+        self._expect_future_covariates = (
+            self.uses_future_covariates and self.future_covariate_series is None
+        )
         self._uses_static_covariates = static_cov is not None
+        self._expect_static_covariates = (
+            self.uses_static_covariates and self.static_covariates is None
+        )
 
     def to_onnx(self, path: Optional[str] = None, **kwargs):
         """Export model to ONNX format for optimized inference, wrapping around PyTorch Lightning's
@@ -1204,6 +1213,10 @@ def _setup_for_train(
                 f" provided input/output dimensions = {sample_shapes}",
             )
 
+        # update the covariates usage based on the training sample (required if model training was called
+        # with `fit_from_dataset()`)
+        self._update_covariates_use()
+
         # loss must not reduce the output when using sample weight
         train_sample_weight = train_sample[-2]
         val_sample_weight = val_dataset[0][-2] if val_dataset is not None else None
@@ -1581,8 +1594,10 @@ def predict(
             if self.training_series is None:
                 raise_log(
                     ValueError(
-                        "Input `series` must be provided. This is the result either from fitting on multiple series, "
-                        "from not having fit the model yet, or from loading a model saved with `clean=True`."
+                        "Input `series` must be provided. This is the result either from "
+                        "fitting on multiple series, from fitting with `fit_from_dataset()`, "
+                        "from not having fit the model yet, or from loading a model saved with "
+                        "`clean=True`."
                     ),
                     logger,
                 )
diff --git a/darts/tests/models/forecasting/test_global_forecasting_models.py b/darts/tests/models/forecasting/test_global_forecasting_models.py
@@ -358,10 +358,18 @@ def test_save_load_model(self, tmpdir_fn, model):
         # The serie to predict need to be provided at prediction time
         with pytest.raises(ValueError) as err:
             loaded_model_clean_str.predict(self.forecasting_horizon)
-        assert str(err.value) == (
-            "Input `series` must be provided. This is the result either from fitting on multiple series, "
-            "from not having fit the model yet, or from loading a model saved with `clean=True`."
-        )
+        if isinstance(model, TorchForecastingModel):
+            assert str(err.value) == (
+                "Input `series` must be provided. This is the result either from fitting on multiple series, "
+                "from fitting with `fit_from_dataset()`, from not having fit the model yet, or from loading a "
+                "model saved with `clean=True`."
+            )
+        else:
+            assert str(err.value) == (
+                "Input `series` must be provided. This is the result either from fitting on multiple series, "
+                "from not having fit the model yet, or from loading a "
+                "model saved with `clean=True`."
+            )
 
         # When the serie to predict is provided, the prediction is the same
         assert model_prediction == loaded_model_clean_str.predict(
diff --git a/darts/tests/models/forecasting/test_torch_forecasting_model.py b/darts/tests/models/forecasting/test_torch_forecasting_model.py
@@ -15,6 +15,12 @@
 from darts.dataprocessing.transformers import BoxCox, Scaler
 from darts.metrics import mape
 from darts.tests.conftest import TORCH_AVAILABLE, tfm_kwargs, tfm_kwargs_dev
+from darts.utils.data.torch_datasets.inference_dataset import (
+    SequentialTorchInferenceDataset,
+)
+from darts.utils.data.torch_datasets.training_dataset import (
+    SequentialTorchTrainingDataset,
+)
 
 if not TORCH_AVAILABLE:
     pytest.skip(
@@ -232,7 +238,8 @@ def on_train_epoch_end(self, trainer, pl_module):
             no_train_model.predict(n=4)
         assert str(err.value) == (
             "Input `series` must be provided. This is the result either from fitting on multiple series, "
-            "from not having fit the model yet, or from loading a model saved with `clean=True`."
+            "from fitting with `fit_from_dataset()`, from not having fit the model yet, or from loading a "
+            "model saved with `clean=True`."
         )
 
         model_manual_save.fit(self.series, epochs=1)
@@ -284,7 +291,8 @@ def on_train_epoch_end(self, trainer, pl_module):
                 model_manual_save.predict(n=4)
             assert str(err.value) == (
                 "Input `series` must be provided. This is the result either from fitting on multiple series, "
-                "from not having fit the model yet, or from loading a model saved with `clean=True`."
+                "from fitting with `fit_from_dataset()`, from not having fit the model yet, or from loading a "
+                "model saved with `clean=True`."
             )
             # Predicting while giving the training series in args should yield same prediction
             assert model_manual_save.predict(
@@ -2374,6 +2382,173 @@ def test_fit_with_stride(self, stride):
             assert len(train_set) == len(val_set) == math.ceil(3 / stride)
             assert train_set.stride == val_set.stride == stride
 
+    def test_predict_after_fit_from_dataset(self):
+        """Test that the model can predict after being trained with `fit_from_dataset` using all covariates."""
+        icl, ocl = kwargs["input_chunk_length"], kwargs["output_chunk_length"]
+        n = 1
+        series = [
+            self.series[: icl + ocl].with_static_covariates(pd.DataFrame({"sc": [0.0]}))
+        ]
+        pc = [self.series[: icl + ocl]]
+        fc = [self.series[: icl + ocl + n]]
+
+        model = TiDEModel(**kwargs)
+
+        train_dataset = SequentialTorchTrainingDataset(
+            series=series,
+            past_covariates=pc,
+            future_covariates=fc,
+            input_chunk_length=icl,
+            output_chunk_length=ocl,
+            use_static_covariates=True,
+        )
+
+        # check training works and covariates are used
+        model.fit_from_dataset(train_dataset=train_dataset)
+        assert model.uses_past_covariates
+        assert model.uses_future_covariates
+        assert model.uses_static_covariates
+        assert model._expect_past_covariates
+        assert model._expect_future_covariates
+        assert model._expect_static_covariates
+
+        with pytest.raises(ValueError) as exc:
+            _ = model.predict(n=n)
+        assert str(exc.value).startswith("Input `series` must be provided.")
+
+        hfc_kwargs = {"forecast_horizon": n, "retrain": False, "overlap_end": True}
+        self.helper_predict_raise_on_missing_input(
+            model, "predict", series, pc, fc, n=n
+        )
+        self.helper_predict_raise_on_missing_input(
+            model, "historical_forecasts", series, pc, fc, **hfc_kwargs
+        )
+        self.helper_predict_from_ds_raise_on_missing_input(
+            model,
+            series,
+            pc,
+            fc,
+            n=n,
+            input_chunk_length=icl,
+            output_chunk_length=ocl,
+        )
+
+        # check predict methods
+        inference_dataset = SequentialTorchInferenceDataset(
+            series=series,
+            past_covariates=pc,
+            future_covariates=fc,
+            n=n,
+            input_chunk_length=icl,
+            output_chunk_length=ocl,
+            use_static_covariates=True,
+        )
+        pred1 = model.predict(
+            n=n, series=series, past_covariates=pc, future_covariates=fc
+        )[0]
+        pred2 = model.predict_from_dataset(n=n, dataset=inference_dataset)[0]
+        pred3 = model.historical_forecasts(
+            forecast_horizon=n,
+            series=series,
+            past_covariates=pc,
+            future_covariates=fc,
+            retrain=False,
+            overlap_end=True,
+        )[0]
+        # extract only the last hist fc which should be the same as the regular predictions
+        pred3 = pred3[-1]
+        assert pred1 == pred2
+        np.testing.assert_array_almost_equal(pred3.all_values(), pred1.all_values())
+        assert pred3.time_index.equals(pred1.time_index)
+        assert pred3.static_covariates.equals(series[0].static_covariates)
+
+    def helper_predict_raise_on_missing_input(
+        self, model, fn: str, series, pc, fc, **kwargs
+    ):
+        """Helper function to test that the model raises an error when calling `predict()` or `historical_forecasts()`
+        after `fit_from_dataset()` with missing inputs."""
+        with pytest.raises(ValueError) as exc:
+            _ = getattr(model, fn)(series=series, **kwargs)
+        assert str(exc.value).startswith("The model was trained with past covariates.")
+        with pytest.raises(ValueError) as exc:
+            _ = getattr(model, fn)(series=series, past_covariates=pc, **kwargs)
+        assert str(exc.value).startswith(
+            "The model was trained with future covariates."
+        )
+        with pytest.raises(ValueError) as exc:
+            _ = getattr(model, fn)(series=series, future_covariates=fc, **kwargs)
+        assert str(exc.value).startswith("The model was trained with past covariates.")
+        with pytest.raises(ValueError) as exc:
+            _ = getattr(model, fn)(
+                series=[series[0].with_static_covariates(None)],
+                past_covariates=pc,
+                future_covariates=fc,
+                **kwargs,
+            )
+        assert str(exc.value).startswith(
+            "The model was trained with static covariates."
+        )
+
+    def helper_predict_from_ds_raise_on_missing_input(
+        self,
+        model,
+        series,
+        pc,
+        fc,
+        n,
+        **kwargs,
+    ):
+        """Helper function to test that the model raises an error when calling `predict_from_dataset()` after
+        `fit_from_dataset()` with missing inputs."""
+        inf_dataset = SequentialTorchInferenceDataset(
+            n=n,
+            series=series,
+            **kwargs,
+        )
+        with pytest.raises(ValueError) as exc:
+            _ = model.predict_from_dataset(n=n, dataset=inf_dataset)
+        assert str(exc.value).startswith(
+            "This model has been trained with `past_covariates`"
+        )
+
+        inf_dataset = SequentialTorchInferenceDataset(
+            n=n,
+            series=series,
+            past_covariates=pc,
+            **kwargs,
+        )
+        with pytest.raises(ValueError) as exc:
+            _ = model.predict_from_dataset(n=n, dataset=inf_dataset)
+        assert str(exc.value).startswith(
+            "This model has been trained with `historic_future_covariates`"
+        )
+
+        inf_dataset = SequentialTorchInferenceDataset(
+            n=n,
+            series=series,
+            future_covariates=fc,
+            **kwargs,
+        )
+        with pytest.raises(ValueError) as exc:
+            _ = model.predict_from_dataset(n=n, dataset=inf_dataset)
+        assert str(exc.value).startswith(
+            "This model has been trained with `past_covariates`"
+        )
+
+        inf_dataset = SequentialTorchInferenceDataset(
+            n=n,
+            series=series,
+            past_covariates=pc,
+            future_covariates=fc,
+            use_static_covariates=False,
+            **kwargs,
+        )
+        with pytest.raises(ValueError) as exc:
+            _ = model.predict_from_dataset(n=n, dataset=inf_dataset)
+        assert str(exc.value).startswith(
+            "This model has been trained with `static_covariates`"
+        )
+
     def helper_equality_encoders(
         self, first_encoders: dict[str, Any], second_encoders: dict[str, Any]
     ):