Skip to content

Commit 823bf02

Browse files
authored
Merge pull request #120 from andrewm4894/simplify-llm
Simplify llm
2 parents 70b9d81 + 907dd7c commit 823bf02

File tree

7 files changed

+133
-34
lines changed

7 files changed

+133
-34
lines changed

anomstack/alerts/asciiart.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -528,15 +528,21 @@ def make_alert_message(
528528
metric_timestamp_to = (
529529
df_alert_metric["metric_timestamp"].max().strftime("%Y-%m-%d %H:%M")
530530
)
531-
labels = (
532-
np.where(df_alert_metric["metric_alert"] == 1, anomaly_symbol, normal_symbol)
533-
+ (df_alert_metric[score_col].round(2) * 100).astype("int").astype("str")
534-
+ "% "
535-
)
536-
data = zip(labels, x)
537531
graph_title = f"{metric_name} ({metric_timestamp_from} to {metric_timestamp_to})"
538532
message = ""
539533
if ascii_graph:
534+
labels = (
535+
np.where(
536+
df_alert_metric["metric_alert"] == 1,
537+
anomaly_symbol,
538+
normal_symbol
539+
)
540+
+ (df_alert_metric[score_col].round(2) * 100)
541+
.astype("int")
542+
.astype("str")
543+
+ "% "
544+
)
545+
data = zip(labels, x)
540546
graph = Pyasciigraph(
541547
titlebar=" ", graphsymbol=graph_symbol, float_format=alert_float_format
542548
).graph(graph_title, data)

anomstack/jobs/llmalert.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def noop():
5858
alert_methods = spec["alert_methods"]
5959
llmalert_recent_n = spec["llmalert_recent_n"]
6060
llmalert_smooth_n = spec["llmalert_smooth_n"]
61-
llmalert_metric_rounding = spec.get("llmalert_metric_rounding", 4)
61+
llmalert_metric_rounding = spec.get("llmalert_metric_rounding", -1)
6262

6363
@job(
6464
name=f"{metric_batch}_llmalert_job",
@@ -77,7 +77,7 @@ def get_llmalert_data() -> pd.DataFrame:
7777
pd.DataFrame: A pandas DataFrame containing the data for the LLM Alert.
7878
"""
7979

80-
df = read_sql(render("plot_sql", spec), db)
80+
df = read_sql(render("llmalert_sql", spec), db)
8181

8282
return df
8383

@@ -105,11 +105,6 @@ def llmalert(context, df: pd.DataFrame) -> None:
105105
.sort_values(by="metric_timestamp", ascending=True)
106106
.reset_index(drop=True)
107107
)
108-
df_metric["metric_alert"] = df_metric["metric_alert"].fillna(0)
109-
df_metric["metric_score"] = df_metric["metric_score"].fillna(0)
110-
df_metric["metric_score_smooth"] = df_metric[
111-
"metric_score_smooth"
112-
].fillna(0)
113108
df_metric = df_metric.dropna()
114109
df_metric["metric_timestamp"] = pd.to_datetime(
115110
df_metric["metric_timestamp"]
@@ -130,8 +125,13 @@ def llmalert(context, df: pd.DataFrame) -> None:
130125
df_prompt = (
131126
df_metric[["metric_timestamp", "metric_value", "metric_recency"]]
132127
.dropna()
133-
.round(llmalert_metric_rounding)
134128
)
129+
df_prompt["metric_timestamp"] = df_metric[
130+
"metric_timestamp"
131+
].dt.strftime("%Y-%m-%d %H:%M:%S")
132+
df_prompt = df_prompt.set_index("metric_timestamp")
133+
if llmalert_metric_rounding >= 0:
134+
df_prompt = df_prompt.round(llmalert_metric_rounding)
135135

136136
# logger.debug(f"df_prompt: \n{df_prompt}")
137137

@@ -172,6 +172,7 @@ def llmalert(context, df: pd.DataFrame) -> None:
172172
"metric_timestamp": metric_timestamp_max,
173173
"alert_type": "llm",
174174
},
175+
score_col="metric_score"
175176
)
176177

177178
llmalert(get_llmalert_data())

metrics/defaults/defaults.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,9 @@ change_detect_last_n: 1 # number of last n observations to detect changes on.
6161
# llmalert params
6262
############################################
6363
llmalert_recent_n: 5 # only llmalert on recent n so as to avoid continually alerting.
64-
llmalert_smooth_n: 3 # smooth metric value prior to sending to llm.
65-
llmalert_metric_rounding: 4 # round metric values to this number of decimal places.
64+
llmalert_smooth_n: 0 # smooth metric value prior to sending to llm.
65+
llmalert_metric_rounding: -1 # round metric values to this number of decimal places.
66+
llmalert_metric_timestamp_max_days_ago: 1 # don't alert on metrics older than this.
6667

6768
############################################
6869
# schedules
@@ -100,6 +101,9 @@ change_sql: >
100101
# default templated plot sql
101102
plot_sql: >
102103
{% include "./defaults/sql/plot.sql" %}
104+
# default templated llmalert sql
105+
llmalert_sql: >
106+
{% include "./defaults/sql/llmalert.sql" %}
103107
# default templated dashboard sql
104108
dashboard_sql: >
105109
{% include "./defaults/sql/dashboard.sql" %}

metrics/defaults/python/prompt.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,19 @@ def make_prompt(df, llmalert_recent_n) -> str:
1010
str: A prompt for the user to check if there is an anomaly in the time series data.
1111
"""
1212

13-
from tabulate import tabulate
14-
15-
text_representation = tabulate(
16-
df.reset_index(), headers="keys", tablefmt="pipe", showindex=False
17-
)
13+
text_representation = df.to_markdown()
1814

1915
prompt = f"""
20-
You are a seasoned time series expert who has worked with time series data for many years and are very acomplished at spotting and explaining anomalies in time series data.
21-
22-
Can you help me check if there is an anomaly in this time series data for this metric?
16+
Can you help me check if there is an anomaly in the below time series data?
2317
24-
I am solely interested in looking at the last {llmalert_recent_n} observations (when metric_recency=recent) and if it looks like the more recent data may be anomalous or if it looks not all that much different from the rest of the data (metric_recency=baseline).
18+
I am solely interested in looking at the last {llmalert_recent_n} observations (when metric_recency=recent) and if it looks like the more recent data may be anomalous in comparison to rest of the data (when metric_recency=baseline).
2519
2620
Here are some questions to think about:
2721
2822
- Is there anything unusual about the last {llmalert_recent_n} recent values of the metric in the df DataFrame?
2923
- Are there any anomalies or outliers in the recent {llmalert_recent_n} observations of metric in df?
3024
- Can you identify any patterns or trends in the recent {llmalert_recent_n} values of the metric in df that could be indicative of an anomaly?
3125
- How does the distribution of the recent {llmalert_recent_n} values of the metric in df compare to the distribution of the entire dataset?
32-
- Are there any changes in the mean, median, or standard deviation of the metric in the recent {llmalert_recent_n} observations that could be indicative of an anomaly?
3326
- Is there a sudden increase or decrease in the metric in the recent {llmalert_recent_n} observations?
3427
- Is there a change in the slope of the metric trend line in the recent {llmalert_recent_n} observations?
3528
- Are there any spikes or dips in the metric in the recent {llmalert_recent_n} observations?
@@ -44,10 +37,6 @@ def make_prompt(df, llmalert_recent_n) -> str:
4437
- Focus only on how the most recent {llmalert_recent_n} observations and if they look anomalous or not in reference to the earlier baseline data.
4538
- The data comes from a pandas dataframe.
4639
47-
Here is the data (ordered in ascending order, so from oldest to newest (top to bottom)):
48-
49-
{text_representation}
50-
5140
I need a yes or no answer as to if you think the recent data looks anomalous or not.
5241
5342
Please also provide a description on why the metric looks anomalous if you think it does.
@@ -57,6 +46,10 @@ def make_prompt(df, llmalert_recent_n) -> str:
5746
Please think step by step and provide a description, along with evidence, of your thought process as you go through the data.
5847
5948
Think globally too like a human would if they were eyeballing the data.
49+
50+
Here is the data (ordered in ascending order, so from oldest to newest (top to bottom)):
51+
52+
{text_representation}
6053
"""
6154

6255
return prompt

metrics/defaults/sql/llmalert.sql

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
Template for generating the input data for the llmalert job.
3+
*/
4+
5+
with
6+
7+
metric_value_data as
8+
(
9+
select distinct
10+
metric_timestamp,
11+
metric_batch,
12+
metric_name,
13+
avg(metric_value) AS metric_value
14+
from
15+
{{ table_key }}
16+
where
17+
metric_batch = '{{ metric_batch }}'
18+
and
19+
metric_type = 'metric'
20+
and
21+
date(metric_timestamp) >= date('now', '-{{ llmalert_metric_timestamp_max_days_ago }} day')
22+
group by metric_timestamp, metric_batch, metric_name
23+
),
24+
25+
metric_score_data as
26+
(
27+
select distinct
28+
metric_timestamp,
29+
metric_batch,
30+
metric_name,
31+
avg(metric_value) AS metric_score
32+
from
33+
{{ table_key }}
34+
where
35+
metric_batch = '{{ metric_batch }}'
36+
and
37+
metric_type = 'score'
38+
and
39+
date(metric_timestamp) >= date('now', '-{{ llmalert_metric_timestamp_max_days_ago }} day')
40+
group by metric_timestamp, metric_batch, metric_name
41+
),
42+
43+
metric_alert_data as
44+
(
45+
select distinct
46+
metric_timestamp,
47+
metric_batch,
48+
metric_name,
49+
avg(metric_value) AS metric_alert
50+
from
51+
{{ table_key }}
52+
where
53+
metric_batch = '{{ metric_batch }}'
54+
and
55+
metric_type = 'alert'
56+
and
57+
date(metric_timestamp) >= date('now', '-{{ llmalert_metric_timestamp_max_days_ago }} day')
58+
group by metric_timestamp, metric_batch, metric_name
59+
),
60+
61+
metric_value_recency_ranked as
62+
(
63+
select
64+
metric_timestamp,
65+
metric_batch,
66+
metric_name,
67+
metric_value,
68+
row_number() over (partition by metric_name order by metric_timestamp desc) as metric_value_recency_rank
69+
from
70+
metric_value_data
71+
)
72+
73+
select
74+
m.metric_timestamp,
75+
m.metric_batch,
76+
m.metric_name,
77+
m.metric_value,
78+
ifnull(s.metric_score,0) as metric_score,
79+
ifnull(a.metric_alert,0) as metric_alert
80+
from
81+
metric_value_recency_ranked m
82+
left join
83+
metric_score_data s
84+
on
85+
m.metric_timestamp = s.metric_timestamp
86+
and
87+
m.metric_batch = s.metric_batch
88+
and
89+
m.metric_name = s.metric_name
90+
left join
91+
metric_alert_data a
92+
on
93+
m.metric_timestamp = a.metric_timestamp
94+
and
95+
m.metric_batch = a.metric_batch
96+
and
97+
m.metric_name = a.metric_name
98+
;

requirements.compile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,3 @@ slack_sdk
2525
snowflake-connector-python[pandas]
2626
sqlglot
2727
streamlit
28-
tabulate

requirements.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -506,9 +506,7 @@ streamlit==1.38.0
506506
structlog==24.4.0
507507
# via dagster
508508
tabulate==0.9.0
509-
# via
510-
# -r requirements.compile
511-
# dagster
509+
# via dagster
512510
tenacity==8.5.0
513511
# via
514512
# plotly

0 commit comments

Comments
 (0)