fix: deterministic expenv, feature: uniform distribution

iraedeus · iraedeus · commit b35d129d17b6 · 2025-05-08T17:03:51.000+03:00
diff --git a/experimental_env/analysis/analyze_summarizers/error_summarizer.py b/experimental_env/analysis/analyze_summarizers/error_summarizer.py
@@ -38,10 +38,11 @@ def calculate(self, results: list[ExperimentDescription]) -> tuple:
 
             errors.append(error)
 
+        if not errors:
+            return 0, 0, 0
+
         mean = np.sum(errors) / len(errors)
         standart_deviation = np.sqrt(np.sum([(x - mean) ** 2 for x in errors]) / len(errors))
-
-        errors.sort()
         median = errors[len(errors) // 2]
 
         return float(mean), float(standart_deviation), float(median)
diff --git a/experimental_env/experiment/estimators.py b/experimental_env/experiment/estimators.py
@@ -50,7 +50,7 @@ def __init__(self, brkpointer: EM.ABreakpointer, dst_checker: EM.ADistributionCh
 
     @property
     def name(self):
-        return "MLE-EM"
+        return "EM"
 
     def _helper(self, problem: OrderedProblem):
         """
@@ -90,7 +90,7 @@ def __init__(self, brkpointer, dst_checker):
 
     @property
     def name(self):
-        return "LM-EM"
+        return "ELM"
 
     def _helper(self, problem: OrderedProblem):
         """
diff --git a/experimental_env/experiment/experiment_executors/abstract_executor.py b/experimental_env/experiment/experiment_executors/abstract_executor.py
@@ -1,5 +1,6 @@
 """A module that provides an abstract class for performing the 2nd stage of the experiment"""
 
+import random
 import warnings
 from abc import ABC, abstractmethod
 from pathlib import Path
@@ -20,7 +21,7 @@ class AExecutor(ABC):
     as well as the implementation of the execute method, to implement the 2nd stage of the experiment.
     """
 
-    def __init__(self, path: Path, cpu_count: int, seed):
+    def __init__(self, path: Path, cpu_count: int, seed: int):
         """
         Class constructor
 
@@ -31,6 +32,8 @@ def __init__(self, path: Path, cpu_count: int, seed):
         self._out_dir = path
         self._cpu_count = cpu_count
         self._seed = seed
+
+        random.seed(self._seed)
         np.random.seed(self._seed)
 
     @abstractmethod
diff --git a/experimental_env/experiment/experiment_executors/random_executor.py b/experimental_env/experiment/experiment_executors/random_executor.py
@@ -19,7 +19,7 @@ def init_problems(self, ds_descriptions, models):
         return [
             Problem(
                 descr.samples,
-                RandomMixtureGenerator(self._seed).create_mixture(models),
+                RandomMixtureGenerator().create_mixture(models),
             )
             for i, descr in enumerate(ds_descriptions)
         ]
diff --git a/experimental_env/experiment/experiment_executors/standart_executor.py b/experimental_env/experiment/experiment_executors/standart_executor.py
@@ -19,7 +19,7 @@ def init_problems(self, ds_descriptions, models):
         return [
             Problem(
                 descr.samples,
-                StandartMixtureGenerator(self._seed).create_mixture(models),
+                StandartMixtureGenerator().create_mixture(models),
             )
             for i, descr in enumerate(ds_descriptions)
         ]
diff --git a/experimental_env/mixture_generators/abstract_generator.py b/experimental_env/mixture_generators/abstract_generator.py
@@ -1,6 +1,5 @@
 """A module that provides an abstract class for generating a mixture."""
 
-import random
 from abc import ABC, abstractmethod
 
 from mpest import Distribution, MixtureDistribution
@@ -12,9 +11,6 @@ class AMixtureGenerator(ABC):
     An abstract class for generating mixtures.
     """
 
-    def __init__(self, seed: int = 42):
-        random.seed(seed)
-
     @abstractmethod
     def generate_priors(self, models: list[type[AModel]]) -> list[float | None]:
         """
diff --git a/experimental_env/mixture_generators/utils.py b/experimental_env/mixture_generators/utils.py
@@ -3,7 +3,8 @@
 from random import uniform
 
 from mpest import Distribution
-from mpest.models import AModel, ExponentialModel, GaussianModel
+from mpest.models import AModel, Beta, Cauchy, ExponentialModel, GaussianModel, Pareto
+from mpest.models.uniform import Uniform
 
 
 def generate_standart_params(models: list[type[AModel]]) -> list[Distribution]:
@@ -14,10 +15,14 @@ def generate_standart_params(models: list[type[AModel]]) -> list[Distribution]:
     for m in models:
         if m == ExponentialModel:
             params = [1.0]
-        elif m == GaussianModel:
+        elif m in (GaussianModel, Uniform, Cauchy):
             params = [0.0, 1.0]
-        else:
-            params = [1.0, 1.5]
+        elif m == Beta:
+            params = [1.0, 1.0]
+        elif m == Pareto:
+            params = [1.0, 2.0]
+        else:  # Weibull
+            params = [1.0, 1.0]
 
         dists.append(Distribution.from_params(m, params))
 
@@ -34,7 +39,15 @@ def generate_uniform_params(models: list[type[AModel]]) -> list[Distribution]:
             params = [uniform(0.1, 5.0)]
         elif m == GaussianModel:
             params = [uniform(-5.0, 5.0), uniform(0.1, 5.0)]
-        else:
+        elif m == Uniform:
+            params = list(sorted([uniform(-5.0, 5.0), uniform(-5.0, 5.0)]))
+        elif m == Cauchy:
+            params = [uniform(-5.0, 5.0), uniform(0.1, 5.0)]
+        elif m == Beta:
+            params = [uniform(0.1, 5.0), uniform(0.1, 5.0)]
+        elif m == Pareto:
+            params = [uniform(0.1, 5.0), uniform(1.0, 5.0)]
+        else:  # Weibull
             params = [uniform(0.1, 5.0), uniform(0.1, 5.0)]
 
         dists.append(Distribution.from_params(m, params))
diff --git a/experimental_env/preparation/dataset_generator.py b/experimental_env/preparation/dataset_generator.py
@@ -26,9 +26,11 @@ def __init__(self, seed: int = 42):
         """
         Setting seed for determined result.
         """
-        random.seed(seed)
         self._seed = seed
 
+        random.seed(self._seed)
+        np.random.seed(self._seed)
+
     def generate(
         self,
         samples_size: int,
@@ -59,7 +61,6 @@ class ConcreteDatasetGenerator:
     """
 
     def __init__(self, seed: int = 42):
-        np.random.seed(seed)
         self._dists: list[Distribution] = []
         self._priors: list[float | None] = []
 
diff --git a/mpest/em/methods/likelihood_method.py b/mpest/em/methods/likelihood_method.py
@@ -8,8 +8,7 @@
 from mpest.core.mixture_distribution import MixtureDistribution
 from mpest.core.problem import Problem, Result
 from mpest.em.methods.abstract_steps import AExpectation, AMaximization
-from mpest.exceptions import SampleError
-from mpest.models import AModel, AModelDifferentiable
+from mpest.models import AModel, AModelDifferentiable, Uniform
 from mpest.optimizers import AOptimizerJacobian, TOptimizer
 from mpest.utils import ResultWithError
 
@@ -31,16 +30,8 @@ def step(self, problem: Problem) -> EResult:
         samples = problem.samples
         mixture = problem.distributions
         p_xij = []
-        active_samples = []
         for x in samples:
-            p = np.array([d.model.pdf(x, d.params) for d in mixture])
-            if np.any(p):
-                p_xij.append(p)
-                active_samples.append(x)
-
-        if not active_samples:
-            error = SampleError("None of the elements in the sample is correct for this mixture")
-            return ResultWithError(mixture, error)
+            p_xij.append(np.array([d.model.pdf(x, d.params) for d in mixture]))
 
         # h[j, i] contains probability of X_i to be a part of distribution j
         m = len(p_xij)
@@ -56,7 +47,7 @@ def step(self, problem: Problem) -> EResult:
 
             h[:, i] = wp / swp
 
-        return active_samples, h, problem
+        return samples, h, problem
 
 
 # class ML(AExpectation[EResult]):
@@ -109,8 +100,30 @@ def step(self, e_result: EResult) -> Result:
         for j, ch in enumerate(h[:]):
             d = mixture[j]
 
+            if isinstance(d.model, Uniform):
+                threshold = 1e-2
+                curr_a, curr_b = d.params
+                relevant_indices = np.where(ch > threshold)[0]
+                if len(relevant_indices) == 0:
+                    new_params = d.params
+                else:
+                    relevant_samples = np.array(samples)[relevant_indices]
+
+                    new_a = np.min(relevant_samples)
+                    new_b = np.max(relevant_samples)
+
+                    new_params = np.array([new_a, new_b])
+
+                new_distributions.append(Distribution(d.model, new_params))
+                continue
+
             def log_likelihood(params, ch, model: AModel):
-                return -np.sum(ch * [model.lpdf(x, params) for x in samples])
+                Y = np.array([model.lpdf(x, params) for x in samples])
+                penalty = 1e20
+                weighted_neg_lpdf = [-c * y if y != -np.inf else penalty * c for y, c in zip(Y, ch)]
+                output = np.sum(weighted_neg_lpdf)
+
+                return output
 
             def jacobian(params, ch, model: AModelDifferentiable):
                 return -np.sum(
diff --git a/mpest/models/__init__.py b/mpest/models/__init__.py
@@ -7,6 +7,7 @@
 )
 from mpest.models.exponential import ExponentialModel
 from mpest.models.gaussian import GaussianModel
+from mpest.models.uniform import Uniform
 from mpest.models.weibull import WeibullModelExp
 from mpest.models.cauchy import Cauchy
 from mpest.models.pareto import Pareto
@@ -19,4 +20,5 @@
     Cauchy().name: Cauchy,
     Pareto().name: Pareto,
     Beta().name: Beta,
+    Uniform().name: Uniform,
 }
diff --git a/mpest/models/uniform.py b/mpest/models/uniform.py
@@ -0,0 +1,68 @@
+import numpy as np
+
+from mpest.annotations import Params, Samples
+from mpest.models import AModelDifferentiable, AModelWithGenerator
+
+
+class LMomentsParameterMixin:
+    def calc_alpha(self, moments: list[float]) -> float:
+        return moments[0] - 3 * moments[1]
+
+    def calc_beta(self, moments: list[float]) -> float:
+        return moments[0] + 3 * moments[1]
+
+
+class Uniform(AModelDifferentiable, AModelWithGenerator, LMomentsParameterMixin):
+    @property
+    def name(self) -> str:
+        """Returns the name of the distribution.
+
+        Returns:
+            str: The name of the distribution "Uniform".
+        """
+
+        return "Uniform"
+
+    def params_convert_to_model(self, params: Params) -> Params:
+        return params
+
+    def params_convert_from_model(self, params: Params) -> Params:
+        return params
+
+    def generate(self, params: Params, size: int = 1, normalized: bool = True) -> Samples:
+        return np.random.uniform(params[0], params[1], size=size)
+
+    def pdf(self, x: float, params: Params) -> float:
+        a, b = params
+        return 1 / (b - a) if a <= x <= b else 0
+
+    def lpdf(self, x: float, params: Params) -> float:
+        a, b = params
+        if a <= x <= b:
+            return -np.log(b - a)
+        else:
+            return -np.inf
+
+    def lda(self, x: float, params: Params) -> float:
+        a, b = params
+        if a <= x <= b:
+            return 1 / (b - a)
+        else:
+            return -np.inf
+
+    def ldb(self, x: float, params: Params) -> float:
+        a, b = params
+        if a <= x <= b:
+            return -1 / (b - a)
+        else:
+            return -np.inf
+
+    def ld_params(self, x: float, params: Params) -> np.ndarray:
+        """
+        Method which returns logarithm of derivative with respect to params
+        """
+
+        return np.array([self.lda(x, params), self.ldb(x, params)])
+
+    def calc_params(self, moments: list[float]) -> np.ndarray:
+        return np.array([self.calc_alpha(moments), self.calc_beta(moments)])
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,31 +1,27 @@
-[project]
+[tool.poetry]
 name = "mpest"
 version = "0.1.0"
 description = "Parameter estimation of mixture distribution problem solver, based on EM algorithm"
+authors = ["ToxaKaz <anton.a.kazancev@gmail.com>", "iraedeus <dtotjmyanin@mail.ru>"]
 keywords = ["EM algorithm", "mixture distribution", "parameter estimation"]
-authors = [
-    {name = "ToxaKaz", email = "anton.a.kazancev@gmail.com"},
-    {name = "iraedeus", email = "dtotjmyanin@mail.ru"}
-]
-license = { file = "LICENSE" }
+
+license = "LICENSE"
 readme = "README.md"
-requires-python = ">=3.11"
 classifiers = [
     "Development Status :: 3 - Alpha",
     "License :: OSI Approved :: MIT License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
 ]
-dependencies = [
-    "matplotlib (>=3.10.0,<4.0.0)",
-    "numpy (>=2.2.3,<3.0.0)",
-    "scikit-learn (>=1.6.1,<2.0.0)",
-    "scipy (>=1.15.2,<2.0.0)",
-    "seaborn (>=0.13.2,<0.14.0)", 
-]
 
 
-[tool.poetry]
+[tool.poetry.dependencies]
+python = ">=3.11"
+matplotlib = ">=3.10.0,<4.0.0"
+numpy = ">=2.2.3,<3.0.0"
+scikit-learn = ">=1.6.1,<2.0.0"
+scipy = ">=1.15.2,<2.0.0"
+seaborn = ">=0.13.2,<0.14.0"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.4"
diff --git a/tests/core/test_mixture_distribution.py b/tests/core/test_mixture_distribution.py
@@ -142,7 +142,7 @@ def test_init(self):
         mock_distributions = [MagicMock(spec=DistributionInMixture) for _ in range(3)]
 
         with patch.object(MixtureDistribution, "_normalize") as mock_normalize:
-            mixture = MixtureDistribution(distributions=mock_distributions) # noqa: F841
+            mixture = MixtureDistribution(distributions=mock_distributions)  # noqa: F841
             mock_normalize.assert_called_once()
 
     def test_iter(self):
@@ -309,7 +309,7 @@ def test_normalize_creates_new_distributions(self):
         mock_dist2.params = np.array([2.0])
 
         with patch("mpest.core.mixture_distribution.DistributionInMixture") as mock_constructor:
-            mixture = MixtureDistribution([mock_dist1, mock_dist2]) # noqa: F841
+            mixture = MixtureDistribution([mock_dist1, mock_dist2])  # noqa: F841
 
             assert mock_constructor.call_count == dists_count
             assert mock_constructor.call_args_list[0] == call(mock_dist1.model, mock_dist1.params, 2.0 / 5.0)
diff --git a/tests/experimental_env/__init__.py b/tests/experimental_env/__init__.py
diff --git a/tests/experimental_env/test_reproducibility_expenv.py b/tests/experimental_env/test_reproducibility_expenv.py
diff --git a/tests/models/test_cauchy.py b/tests/models/test_cauchy.py
diff --git a/tests/models/test_uniform.py b/tests/models/test_uniform.py
diff --git a/tests/tests_likelihood/test_two_same_distributions_complex.py b/tests/tests_likelihood/test_two_same_distributions_complex.py

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ def init_problems(self, ds_descriptions, models):`
`19`	`19`	`return [`
`20`	`20`	`Problem(`
`21`	`21`	`descr.samples,`
`22`		`- RandomMixtureGenerator(self._seed).create_mixture(models),`
	`22`	`+ RandomMixtureGenerator().create_mixture(models),`
`23`	`23`	`)`
`24`	`24`	`for i, descr in enumerate(ds_descriptions)`
`25`	`25`	`]`
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@`
`7`	`7`	`)`
`8`	`8`	`from mpest.models.exponential import ExponentialModel`
`9`	`9`	`from mpest.models.gaussian import GaussianModel`
	`10`	`+from mpest.models.uniform import Uniform`
`10`	`11`	`from mpest.models.weibull import WeibullModelExp`
`11`	`12`	`from mpest.models.cauchy import Cauchy`
`12`	`13`	`from mpest.models.pareto import Pareto`
`@@ -19,4 +20,5 @@`
`19`	`20`	`Cauchy().name: Cauchy,`
`20`	`21`	`Pareto().name: Pareto,`
`21`	`22`	`Beta().name: Beta,`
	`23`	`+ Uniform().name: Uniform,`
`22`	`24`	`}`