Skip to content

Commit 4904808

Browse files
authored
[Minor] Split prep_or_copy_df into copy and check_multiple_series_id (#1647)
* separate copy from check multiple ID * fix remaining references * clean up * add copy to tests * fix tests * update tests * fixes * finish fixes
1 parent fe309be commit 4904808

File tree

10 files changed

+125
-85
lines changed

10 files changed

+125
-85
lines changed

docs/source/how-to-guides/feature-guides/global_local_trend.ipynb

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,28 +1263,7 @@
12631263
"cell_type": "code",
12641264
"execution_count": null,
12651265
"metadata": {},
1266-
"outputs": [
1267-
{
1268-
"ename": "ValueError",
1269-
"evalue": "Invalid frequency: NaT",
1270-
"output_type": "error",
1271-
"traceback": [
1272-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1273-
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
1274-
"Input \u001b[0;32mIn [27]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m future \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mmake_future_dataframe(df_test)\n\u001b[0;32m----> 2\u001b[0m forecast \u001b[38;5;241m=\u001b[39m \u001b[43mm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m metrics \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mtest(df_test)\n\u001b[1;32m 4\u001b[0m forecast_trend \u001b[38;5;241m=\u001b[39m m\u001b[38;5;241m.\u001b[39mpredict_trend(df_test)\n",
1275-
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:831\u001b[0m, in \u001b[0;36mNeuralProphet.predict\u001b[0;34m(self, df, decompose, raw)\u001b[0m\n\u001b[1;32m 829\u001b[0m df, received_ID_col, received_single_time_series, _ \u001b[38;5;241m=\u001b[39m df_utils\u001b[38;5;241m.\u001b[39mprep_or_copy_df(df)\n\u001b[1;32m 830\u001b[0m \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[0;32m--> 831\u001b[0m df, periods_added \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_extend_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 832\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_dataframe_to_predict(df)\n\u001b[1;32m 833\u001b[0m \u001b[38;5;66;03m# normalize\u001b[39;00m\n",
1276-
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/forecaster.py:2773\u001b[0m, in \u001b[0;36mNeuralProphet._maybe_extend_df\u001b[0;34m(self, df)\u001b[0m\n\u001b[1;32m 2771\u001b[0m extended_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame()\n\u001b[1;32m 2772\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 2773\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43mdf_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minfer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_lags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_lags\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_freq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2774\u001b[0m \u001b[38;5;66;03m# to get all forecasteable values with df given, maybe extend into future:\u001b[39;00m\n\u001b[1;32m 2775\u001b[0m periods_add[df_name] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_maybe_extend_periods(df_i)\n",
1277-
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1324\u001b[0m, in \u001b[0;36minfer_frequency\u001b[0;34m(df, freq, n_lags, min_freq_percentage)\u001b[0m\n\u001b[1;32m 1322\u001b[0m freq_df \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m()\n\u001b[1;32m 1323\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m df_name, df_i \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mgroupby(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mID\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 1324\u001b[0m freq_df\u001b[38;5;241m.\u001b[39mappend(\u001b[43m_infer_frequency\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf_i\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_freq_percentage\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mset\u001b[39m(freq_df)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m n_lags \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 1326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOne or more dataframes present different major frequencies, please make sure all dataframes present the same major frequency for auto-regression\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1328\u001b[0m )\n",
1278-
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1252\u001b[0m, in \u001b[0;36m_infer_frequency\u001b[0;34m(df, freq, min_freq_percentage)\u001b[0m\n\u001b[1;32m 1250\u001b[0m dominant_freq_percentage \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mmax() \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mlen\u001b[39m(df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mds\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 1251\u001b[0m num_freq \u001b[38;5;241m=\u001b[39m frequencies[np\u001b[38;5;241m.\u001b[39margmax(distribution)] \u001b[38;5;66;03m# get value of most common diff\u001b[39;00m\n\u001b[0;32m-> 1252\u001b[0m inferred_freq \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_num_to_str_freq\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_freq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mds\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miloc\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1254\u001b[0m log\u001b[38;5;241m.\u001b[39minfo(\n\u001b[1;32m 1255\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMajor frequency \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minferred_freq\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m corresponds to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnp\u001b[38;5;241m.\u001b[39mround(dominant_freq_percentage \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m100\u001b[39m, \u001b[38;5;241m3\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m% of the data.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1256\u001b[0m )\n\u001b[1;32m 1257\u001b[0m ideal_freq_exists \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m dominant_freq_percentage \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m min_freq_percentage \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
1279-
"File \u001b[0;32m~/Desktop/code/neural_prophet/neuralprophet/df_utils.py:1159\u001b[0m, in \u001b[0;36mconvert_num_to_str_freq\u001b[0;34m(freq_num, initial_time_stamp)\u001b[0m\n\u001b[1;32m 1144\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconvert_num_to_str_freq\u001b[39m(freq_num, initial_time_stamp):\n\u001b[1;32m 1145\u001b[0m \u001b[38;5;124;03m\"\"\"Convert numeric frequencies into frequency tags\u001b[39;00m\n\u001b[1;32m 1146\u001b[0m \n\u001b[1;32m 1147\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1157\u001b[0m \u001b[38;5;124;03m frequency tag\u001b[39;00m\n\u001b[1;32m 1158\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1159\u001b[0m aux_ts \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43minitial_time_stamp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_timedelta\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq_num\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1160\u001b[0m freq_str \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39minfer_freq(aux_ts)\n\u001b[1;32m 1161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m freq_str\n",
1280-
"File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/indexes/datetimes.py:1070\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, closed, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 1067\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m com\u001b[38;5;241m.\u001b[39many_none(periods, start, end):\n\u001b[1;32m 1068\u001b[0m freq \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1070\u001b[0m dtarr \u001b[38;5;241m=\u001b[39m \u001b[43mDatetimeArray\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1071\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1072\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1073\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1074\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1075\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1076\u001b[0m \u001b[43m \u001b[49m\u001b[43mnormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1077\u001b[0m \u001b[43m \u001b[49m\u001b[43minclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1078\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1079\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatetimeIndex\u001b[38;5;241m.\u001b[39m_simple_new(dtarr, name\u001b[38;5;241m=\u001b[39mname)\n",
1281-
"File \u001b[0;32m~/Desktop/code/neural_prophet/env/lib/python3.8/site-packages/pandas/core/arrays/datetimes.py:409\u001b[0m, in \u001b[0;36mDatetimeArray._generate_range\u001b[0;34m(cls, start, end, periods, freq, tz, normalize, ambiguous, nonexistent, inclusive)\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m com\u001b[38;5;241m.\u001b[39mcount_not_none(start, end, periods, freq) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m3\u001b[39m:\n\u001b[1;32m 405\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 406\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOf the four parameters: start, end, periods, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mand freq, exactly three must be specified\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 408\u001b[0m )\n\u001b[0;32m--> 409\u001b[0m freq \u001b[38;5;241m=\u001b[39m \u001b[43mto_offset\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m start \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 412\u001b[0m start \u001b[38;5;241m=\u001b[39m Timestamp(start)\n",
1282-
"File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3580\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
1283-
"File \u001b[0;32mpandas/_libs/tslibs/offsets.pyx:3682\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.offsets.to_offset\u001b[0;34m()\u001b[0m\n",
1284-
"\u001b[0;31mValueError\u001b[0m: Invalid frequency: NaT"
1285-
]
1286-
}
1287-
],
1266+
"outputs": [],
12881267
"source": [
12891268
"future = m.make_future_dataframe(df_test)\n",
12901269
"forecast = m.predict(future)\n",

neuralprophet/data/process.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,8 @@ def _check_dataframe(
399399
"Dataframe has less than n_forecasts + n_lags rows. "
400400
"Forecasting not possible. Please either use a larger dataset, or adjust the model parameters."
401401
)
402-
df, _, _, _ = df_utils.prep_or_copy_df(df)
402+
# df = df.copy(deep=True)
403+
# df, _, _, _ = df_utils.check_multiple_series_id(df)
403404
df, regressors_to_remove, lag_regressors_to_remove = df_utils.check_dataframe(
404405
df=df,
405406
check_y=check_y,
@@ -474,7 +475,8 @@ def _handle_missing_data(
474475
The pre-processed DataFrame, including imputed missing data, if applicable.
475476
476477
"""
477-
df, _, _, _ = df_utils.prep_or_copy_df(df)
478+
# df = df.copy(deep=True)
479+
# df, _, _, _ = df_utils.check_multiple_series_id(df)
478480

479481
if n_lags == 0 and not predicting:
480482
# drop rows with NaNs in y and count them

neuralprophet/data/transform.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ def _normalize(df: pd.DataFrame, config_normalization: Normalization) -> pd.Data
2424
-------
2525
df: pd.DataFrame, normalized
2626
"""
27-
df, _, _, _ = df_utils.prep_or_copy_df(df)
27+
# df = df.copy(deep=True)
28+
# df, _, _, _ = df_utils.check_multiple_series_id(df)
2829
df_norm = pd.DataFrame()
2930
for df_name, df_i in df.groupby("ID"):
3031
data_params = config_normalization.get_data_params(df_name)

neuralprophet/df_utils.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class ShiftScale:
2222
scale: float = 1.0
2323

2424

25-
def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
25+
def check_multiple_series_id(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[str]]:
2626
"""Copy df if it contains the ID column. Creates ID column with '__df__' if it is a df with a single time series.
2727
Parameters
2828
----------
@@ -42,26 +42,23 @@ def prep_or_copy_df(df: pd.DataFrame) -> tuple[pd.DataFrame, bool, bool, list[st
4242
if not isinstance(df, pd.DataFrame):
4343
raise ValueError("Provided DataFrame (df) must be of pd.DataFrame type.")
4444

45-
# Create a copy of the dataframe
46-
df_copy = df.copy(deep=True)
47-
48-
df_has_id_column = "ID" in df_copy.columns
45+
df_has_id_column = "ID" in df.columns
4946

5047
# If there is no ID column, then add one with a single value
5148
if not df_has_id_column:
5249
log.debug("Provided DataFrame (df) contains a single time series.")
53-
df_copy["ID"] = "__df__"
54-
return df_copy, df_has_id_column, True, ["__df__"]
50+
df["ID"] = "__df__"
51+
return df, df_has_id_column, True, ["__df__"]
5552

5653
# Create a list of unique ID values
57-
unique_id_values = list(df_copy["ID"].unique())
54+
unique_id_values = list(df["ID"].unique())
5855
# Check if there is only one unique ID value
5956
df_has_single_time_series = len(unique_id_values) == 1
57+
num_time_series_id = len(unique_id_values)
6058

61-
single_or_multiple_message = "a single" if df_has_single_time_series else "multiple"
62-
log.debug(f"Provided DataFrame (df) has an ID column and contains {single_or_multiple_message} time series.")
59+
log.debug(f"Provided DataFrame (df) has an ID column and contains {num_time_series_id} time series.")
6360

64-
return df_copy, df_has_id_column, df_has_single_time_series, unique_id_values
61+
return df, df_has_id_column, df_has_single_time_series, unique_id_values
6562

6663

6764
def return_df_in_original_format(df, received_ID_col=False, received_single_time_series=True):
@@ -285,7 +282,8 @@ def init_data_params(
285282
ShiftScale entries containing ``shift`` and ``scale`` parameters for each column
286283
"""
287284
# Compute Global data params
288-
df, _, _, _ = prep_or_copy_df(df)
285+
# df = df.copy(deep=True)
286+
# df, _, _, _ = check_multiple_series_id(df)
289287
df_merged = df.copy(deep=True).drop("ID", axis=1)
290288
global_data_params = data_params_definition(
291289
df_merged, normalize, config_lagged_regressors, config_regressors, config_events, config_seasonality
@@ -382,6 +380,8 @@ def normalize(df, data_params):
382380
"""
383381
df = df.copy(deep=True)
384382
for name in df.columns:
383+
if name == "ID":
384+
continue
385385
if name not in data_params.keys():
386386
raise ValueError(f"Unexpected column {name} in data")
387387
new_name = name
@@ -428,7 +428,8 @@ def check_dataframe(
428428
pd.DataFrame or dict
429429
checked dataframe
430430
"""
431-
df, _, _, _ = prep_or_copy_df(df)
431+
# df = df.copy(deep=True)
432+
# df, _, _, _ = check_multiple_series_id(df)
432433
if df.groupby("ID").size().min() < 1:
433434
raise ValueError("Dataframe has no rows.")
434435
if "ds" not in df:
@@ -642,7 +643,9 @@ def _crossvalidation_with_time_threshold(df, n_lags, n_forecasts, k, fold_pct, f
642643
min_train = total_samples - samples_fold - (k - 1) * (samples_fold - samples_overlap)
643644
assert min_train >= samples_fold
644645
folds = []
645-
df_fold, _, _, _ = prep_or_copy_df(df)
646+
df_fold = df
647+
# df_fold = df.copy(deep=True)
648+
# df_fold, _, _, _ = check_multiple_series_id(df_fold)
646649
for i in range(k, 0, -1):
647650
threshold_time_stamp = find_time_threshold(df_fold, n_lags, n_forecasts, samples_fold, inputs_overbleed=True)
648651
df_train, df_val = split_considering_timestamp(
@@ -704,7 +707,8 @@ def crossvalidation_split_df(
704707
705708
validation data
706709
"""
707-
df, _, _, _ = prep_or_copy_df(df)
710+
# df = df.copy(deep=True)
711+
df, _, _, _ = check_multiple_series_id(df)
708712
folds = []
709713
if len(df["ID"].unique()) == 1:
710714
for df_name, df_i in df.groupby("ID"):
@@ -764,7 +768,8 @@ def double_crossvalidation_split_df(df, n_lags, n_forecasts, k, valid_pct, test_
764768
tuple of k tuples [(folds_val, folds_test), …]
765769
elements same as :meth:`crossvalidation_split_df` returns
766770
"""
767-
df, _, _, _ = prep_or_copy_df(df)
771+
# df = df.copy(deep=True)
772+
# df, _, _, _ = check_multiple_series_id(df)
768773
if len(df["ID"].unique()) > 1:
769774
raise NotImplementedError("double_crossvalidation_split_df not implemented for df with many time series")
770775
fold_pct_test = float(test_pct) / k
@@ -885,7 +890,8 @@ def split_df(
885890
pd.DataFrame, dict
886891
validation data
887892
"""
888-
df, _, _, _ = prep_or_copy_df(df)
893+
# df = df.copy(deep=True)
894+
# df, _, _, _ = check_multiple_series_id(df)
889895
df_train = pd.DataFrame()
890896
df_val = pd.DataFrame()
891897
if local_split:
@@ -1367,7 +1373,8 @@ def infer_frequency(df, freq, n_lags, min_freq_percentage=0.7):
13671373
Valid frequency tag according to major frequency.
13681374
13691375
"""
1370-
df, _, _, _ = prep_or_copy_df(df)
1376+
# df = df.copy(deep=True)
1377+
# df, _, _, _ = check_multiple_series_id(df)
13711378
freq_df = list()
13721379
for df_name, df_i in df.groupby("ID"):
13731380
freq_df.append(_infer_frequency(df_i, freq, min_freq_percentage))
@@ -1410,8 +1417,8 @@ def create_dict_for_events_or_regressors(
14101417
if other_df is None:
14111418
# if other_df is None, create dictionary with None for each ID
14121419
return {df_name: None for df_name in df_names}
1413-
1414-
other_df, received_ID_col, _, _ = prep_or_copy_df(other_df)
1420+
other_df = other_df.copy(deep=True)
1421+
other_df, received_ID_col, _, _ = check_multiple_series_id(other_df)
14151422
# if other_df does not contain ID, create dictionary with original ID with the same other_df for each ID
14161423
if not received_ID_col:
14171424
other_df = other_df.drop("ID", axis=1)

0 commit comments

Comments
 (0)