Skip to content

Commit 19d8b16

Browse files
committed
Update docs to indicate IDs can be int or str; refactor on_off_status
1 parent 4bb48c5 commit 19d8b16

File tree

2 files changed

+55
-29
lines changed

2 files changed

+55
-29
lines changed

caar/histsummary.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,12 @@ def _get_column_of_data_label(df, label):
462462
return i
463463

464464

465+
def _sliced_by_id_or_ids_and_time_index(df, id_or_ids, start, end):
466+
sliced_by_id = _slice_by_single_index(df, id_index=id_or_ids)
467+
sliced_by_dt = _slice_by_single_index(sliced_by_id, time_index=(start, end))
468+
return sliced_by_dt
469+
470+
465471
def squared_avg_daily_data_points_per_id(df):
466472
""" Returns DataFrame grouped by the primary id (ThermostatId or
467473
LocationId) and by day. The value column has the count of data points

caar/timeseries.py

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import datetime as dt
33
import numpy as np
44
import pandas as pd
5-
from caar.histsummary import location_id_of_thermo, \
6-
_get_time_level_of_df_multiindex, _get_time_column_of_data, \
5+
from caar.histsummary import location_id_of_thermo, _get_time_column_of_data, \
6+
_get_time_level_of_df_multiindex, _sliced_by_id_or_ids_and_time_index, \
77
_get_column_of_data_label
88

99
from future import standard_library
@@ -68,7 +68,7 @@ def on_off_status(df, id, start, end, freq='1min'):
6868
Args:
6969
df (pandas DataFrame): The DataFrame should contain cycles data, and should have been created by the **history** module.
7070
71-
id (int): Thermostat ID.
71+
id (int or str): Thermostat ID.
7272
7373
start (datetime.datetime): Starting datetime.
7474
@@ -84,47 +84,67 @@ def on_off_status(df, id, start, end, freq='1min'):
8484
dtype=[('times', 'datetime64[m]'),
8585
('on', 'int8')])
8686
status_in_intervals['times'] = dt_index.to_pydatetime()
87-
idx = pd.IndexSlice
88-
# End should already be late enough that additional Timedelta of 1 unit of
89-
# frequency is not needed
90-
records = df.loc[idx[id, :, start:end], :]
91-
# Start times of ON cycles
92-
time_index = _get_time_level_of_df_multiindex(df)
93-
raw_record_starts = pd.DatetimeIndex(records
94-
.index
95-
.get_level_values(time_index))
96-
starts_by_freq = (raw_record_starts
97-
.snap(freq=freq)
98-
.tolist())
99-
time_column = _get_time_column_of_data(df)
100-
raw_record_ends = pd.DatetimeIndex(records.iloc[:, time_column]
101-
.tolist())
102-
record_ends_by_freq = (raw_record_ends
103-
.snap(freq=freq)
104-
.tolist())
105-
# Populate array
106-
starts = _integer_index_based_on_freq(starts_by_freq, start, freq)
107-
ends = _integer_index_based_on_freq(record_ends_by_freq, start, freq)
108-
109-
for i in range(len(records)):
87+
# Start and end times of ON cycles
88+
kwargs = {'id_or_ids': id, 'start': start, 'end': end, 'freq': freq}
89+
starts = _df_select_time_index_values(df, **kwargs)
90+
ends = _df_select_time_data_values(df, **kwargs)
91+
for i in range(len(starts)):
11092
status_in_intervals[starts[i]:ends[i] + 1]['on'] = 1
11193
return status_in_intervals
11294

11395

114-
def _integer_index_based_on_freq(datetimes, reference, frequency):
115-
time_deltas = pd.Series(datetimes) - reference
96+
def _integer_index_dec(func):
97+
def wrapper(arg, **kwargs):
98+
decorated = func(arg, **kwargs)
99+
start, freq = (kwargs.get(k) for k in ['start', 'freq'])
100+
decorated = _integer_index_based_on_freq(decorated, zero_index=start, frequency=freq)
101+
return decorated
102+
return wrapper
103+
104+
105+
def _integer_index_based_on_freq(datetimes, zero_index, frequency):
106+
time_deltas = pd.Series(datetimes) - zero_index
116107
freq = _timedelta_from_string(frequency)
117108
indexes = np.array(time_deltas/freq).astype(np.int)
118109
return indexes
119110

120111

112+
@_integer_index_dec
113+
def _df_select_time_index_values(df, id_or_ids=None, start=None, end=None, freq=None):
114+
sliced = _sliced_by_id_or_ids_and_time_index(df, id_or_ids, start, end)
115+
times_by_freq = (_df_time_index(sliced)
116+
.snap(freq=freq)
117+
.tolist())
118+
return times_by_freq
119+
120+
121+
@_integer_index_dec
122+
def _df_select_time_data_values(df, id_or_ids=None, start=None, end=None, freq=None):
123+
sliced = _sliced_by_id_or_ids_and_time_index(df, id_or_ids, start, end)
124+
time_column = _get_time_column_of_data(df)
125+
raw_record_ends = pd.DatetimeIndex(sliced.iloc[:, time_column]
126+
.tolist())
127+
ends_by_freq = (raw_record_ends
128+
.snap(freq=freq)
129+
.tolist())
130+
return ends_by_freq
131+
132+
133+
def _df_time_index(df):
134+
time_index = _get_time_level_of_df_multiindex(df)
135+
raw_record_times = pd.DatetimeIndex(df
136+
.index
137+
.get_level_values(time_index))
138+
return raw_record_times
139+
140+
121141
def temps_arr_by_freq(df, id, start, end, cols=None, freq='1min', actuals_only=False):
122142
"""Returns NumPy array containing timestamps ('times') and temperatures at the specified frequency. If *actuals_only* is True, only the observed temperatures will be returned in an array. Otherwise, by default, intervals without observations are filled with zeros.
123143
124144
Args:
125145
df (pandas DataFrame): DataFrame with temperatures from **history** module.
126146
127-
id (int): Thermostat ID.
147+
id (int or str): Thermostat ID or Location ID.
128148
129149
start (datetime.datetime): First interval to include in output array.
130150

0 commit comments

Comments
 (0)