Skip to content

[BUG] sporadic failures in GLMRegressor #594

@fkiraly

Description

@fkiraly

GLMRegressor sporadically fails fitting of the models. To reproduce, run check_estimator repeatedly until failure. Observed on most python/OS combinations, in CI. Example traceback:

skpro/regression/base/_base.py:114: in fit
    return self._fit(X_inner, y_inner)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
        C          = None
        X          =          age       sex       bmi  ...        s4        s5        s6
27 -0.023677 -0.044642  0.059541  ... -0.039493 -0....002592  0.019907 -0.017646
34  0.016281 -0.044642 -0.063330  ... -0.039493 -0.059471 -0.067351

[37 rows x 10 columns]
        X_inner    =          age       sex       bmi  ...        s4        s5        s6
27 -0.023677 -0.044642  0.059541  ... -0.039493 -0....002592  0.019907 -0.017646
34  0.016281 -0.044642 -0.063330  ... -0.039493 -0.059471 -0.067351

[37 rows x 10 columns]
        capa_surv  = False
        check_ret  = {'X_inner':          age       sex       bmi  ...        s4        s5        s6
27 -0.023677 -0.044642  0.059541  ... ...5.0, 'y_metadata': {'feature_names': ['target'], 'mtype': 'pd_DataFrame_Table', 'n_instances': 37, 'scitype': 'Table'}}
        self       = GLMRegressor(family='Gamma')
        y          =     target
27    85.0
36   265.0
33    87.0
25   202.0
5     97.0
10   101.0
        cov_type   = 'nonrobust'
        criterion  = [inf, 30.324771564896093, 10832.587976420831, 730.0612040100709, 580.3587970851831, 342.99670686827557, ...]
        dev        = 30.324771564896093
        endog      = array([ 85., 265.,  87., 202.,  97., 101., 341.,  75., 166., 110., 100.,
        75.,  63.,  68., 190., 185., 171., 17...., 142., 206.,
       137., 310.,  59., 129., 184., 102., 131., 135.,  90.,  68.,  92.,
       142., 245., 151.,  65.])
        history    = {'deviance': [inf, 30.324771564896093, 10832.587976420831, 730.0612040100709, 580.3587970851831, 342.99670686827557, ....    6.93199706e-01, -7.02561251e-01, -9.33801128e-02,  6.28021680e-04,
       -4.01062417e-01, -1.16555137e-01]), ...]}
        iteration  = 21
        kwargs     = {}
        lin_pred   = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])
        maxiter    = 100
        mu         = array([inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf,
       inf, inf, inf, inf, inf, inf, inf, inf, inf, inf, inf])
        rtol       = 0.0
        scale      = None
        self       = <statsmodels.genmod.generalized_linear_model.GLM object at 0x30824ded0>
        start_params = array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
        tol        = 1e-08
        tol_criterion = 'deviance'
        use_t      = None
        wls_method = 'lstsq'
        wls_mod    = <statsmodels.regression._tools._MinimalWLS object at 0x306427090>
        wls_results = {'params': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'fittedvalues': array([0., 0., 0., 0., 0., 0., 0., 0., 0.,...9,
       -1.75291345e+243]), 'model': <statsmodels.regression._tools._MinimalWLS object at 0x306427090>, 'scale': 0.0}
        wlsendog   = array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
        wlsexog    = array([[-0.02367725, -0.04464164,  0.05954058, -0.04009893, -0.04284755,
        -0.04358892,  0.01182372, -0.03949338...4164, -0.06332999, -0.05731319, -0.05798303,
        -0.04891244,  0.00814208, -0.03949338, -0.05947118, -0.06735141]])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <statsmodels.regression._tools._MinimalWLS object at 0x306427ed0>
endog = array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
exog = array([[-0.02367725, -0.04464164,  0.05954058, -0.04009893, -0.04284755,
        -0.04358892,  0.01182372, -0.03949338...4164, -0.06332999, -0.05731319, -0.05798303,
        -0.04891244,  0.00814208, -0.03949338, -0.05947118, -0.06735141]])
weights = array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan])
check_endog = True, check_weights = True

    def __init__(self, endog, exog, weights=1.0, check_endog=False,
                 check_weights=False):
        self.endog = endog
        self.exog = exog
        self.weights = weights
        w_half = np.sqrt(weights)
        if check_weights:
            if not np.all(np.isfinite(w_half)):
>               raise ValueError(self.msg.format('weights'))
E               ValueError: NaN, inf or invalid value detected in weights, estimation infeasible.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions