Skip to content

Commit a044636

Browse files
authored
Merge pull request #106 from andrewm4894/make-list-of-models
Make list of models
2 parents 90b83df + 886369f commit a044636

File tree

4 files changed

+59
-31
lines changed

4 files changed

+59
-31
lines changed

anomstack/jobs/score.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ def noop():
5858

5959
metric_batch = spec["metric_batch"]
6060
model_path = spec["model_path"]
61-
model_tag = spec["model_config"].get("model_tag", "")
61+
model_configs = spec["model_configs"]
62+
model_combination_method = spec.get("model_combination_method", "mean")
6263
table_key = spec["table_key"]
6364
db = spec["db"]
6465
preprocess_params = spec["preprocess_params"]
@@ -110,17 +111,6 @@ def score(df) -> pd.DataFrame:
110111
logger.debug(f"preprocess {metric_name} in {metric_batch} score job.")
111112
logger.debug(f"df_metric:\n{df_metric.head()}")
112113

113-
# try load model and catch google.api_core.exceptions.NotFound
114-
try:
115-
model = load_model(metric_name, model_path, metric_batch, model_tag)
116-
except NotFound as e:
117-
logger.warning(e)
118-
logger.warning(
119-
f"model not found for {metric_name} in "
120-
f"{metric_batch} score job."
121-
)
122-
continue
123-
124114
X = preprocess(df_metric, **preprocess_params)
125115

126116
if len(X) == 0:
@@ -131,11 +121,40 @@ def score(df) -> pd.DataFrame:
131121

132122
logger.debug(f"X:\n{X.head()}")
133123

134-
scores = model.predict_proba(X)
124+
scores = {}
125+
for model_config in model_configs:
126+
127+
model_tag = model_config.get("model_tag", "")
128+
129+
try:
130+
model = load_model(
131+
metric_name, model_path, metric_batch, model_tag
132+
)
133+
scores_tmp = model.predict_proba(X)
134+
scores_tmp = scores_tmp[:, 1] # probability of anomaly
135+
scores[f'{metric_name}_{model_tag}'] = scores_tmp
136+
except NotFound as e:
137+
logger.warning(e)
138+
logger.warning(
139+
f"model not found for {metric_name} in "
140+
f"{metric_batch} score job."
141+
)
142+
continue
143+
144+
if model_combination_method == "mean":
145+
scores = pd.DataFrame(scores).mean(axis=1).values
146+
elif model_combination_method == "max":
147+
scores = pd.DataFrame(scores).max(axis=1).values
148+
elif model_combination_method == "min":
149+
scores = pd.DataFrame(scores).min(axis=1).values
150+
else:
151+
raise ValueError(
152+
f"model_combination_method {model_combination_method} not supported."
153+
)
135154

136155
# create initial df_score
137156
df_score = pd.DataFrame(
138-
data=scores[:, 1], # probability of anomaly
157+
data=scores,
139158
index=X.index,
140159
columns=["metric_value"],
141160
).round(3)

anomstack/jobs/train.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ def noop():
5757
db = spec["db"]
5858
model_path = spec["model_path"]
5959
preprocess_params = spec["preprocess_params"]
60-
model_name = spec["model_config"]["model_name"]
61-
model_tag = spec["model_config"].get("model_tag", "")
62-
model_params = spec["model_config"]["model_params"]
60+
model_configs = spec["model_configs"]
6361

6462
@job(
6563
name=f"{metric_batch}_train",
@@ -130,10 +128,18 @@ def train(df) -> List[Tuple[str, BaseDetector, str]]:
130128
f"len(X)={len(X)}"
131129
)
132130
)
133-
model = train_model(
134-
X, metric_name, model_name, model_params, model_tag
135-
)
136-
models.append((metric_name, model, model_tag))
131+
for model_config in model_configs:
132+
model_name = model_config["model_name"]
133+
model_params = model_config["model_params"]
134+
model_tag = model_config.get("model_tag", "")
135+
model = train_model(
136+
X,
137+
metric_name,
138+
model_name,
139+
model_params,
140+
model_tag
141+
)
142+
models.append((metric_name, model, model_tag))
137143
else:
138144
logger.info(
139145
f"no data for {metric_name} in {metric_batch} train job."

metrics/defaults/defaults.yaml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@ model_path: "local://./models" # path to where models are to be stored.
55
# model_path: "gs://your-bucket/models" # gcs path to where models are to be stored.
66
# model_path: "s3://your-bucket/models" # s3 path to where models are to be stored.
77
# model configs to pass to PyOD, model_params are passed to the model constructor.
8-
model_config:
9-
model_name: 'PCA'
10-
model_tag: 'pca_default'
11-
model_params:
12-
contamination: 0.01
13-
# metric_tags is a map of metric key value tags to metric names
8+
model_configs:
9+
- model_name: 'PCA'
10+
model_tag: 'pca_default'
11+
model_params:
12+
contamination: 0.01
13+
- model_name: 'KNN'
14+
model_tag: 'knn_default'
15+
model_params:
16+
contamination: 0.01
17+
model_combination_method: 'mean' # method to combine model scores, 'mean', 'min' or 'max'.
1418
# metric_tags:
1519
# metric_name:
1620
# key1: value1
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
metric_batch: "netdata"
2-
db: "bigquery"
3-
table_key: "andrewm4894.metrics.metrics"
4-
model_path: "gs://andrewm4894-tmp/models"
2+
db: "sqlite"
3+
table_key: "metrics"
54
ingest_cron_schedule: "*/10 * * * *"
65
train_cron_schedule: "*/60 * * * *"
76
score_cron_schedule: "*/15 * * * *"
@@ -11,6 +10,6 @@ llmalert_cron_schedule: "*/20 * * * *"
1110
plot_cron_schedule: "*/25 * * * *"
1211
alert_always: False
1312
disable_llmalert: False
14-
alert_methods: "email"
13+
alert_methods: "email,slack"
1514
ingest_fn: >
1615
{% include "./examples/netdata/netdata.py" %}

0 commit comments

Comments
 (0)