Merge pull request #111 from andrewm4894/make-python-random

andrewm4894 · web-flow · commit 3ffee451a02a · 2024-11-02T00:14:39.000+01:00
Make python random
diff --git a/README.md b/README.md
@@ -87,7 +87,7 @@ Supported storage for your trained models:
     </tbody>
 </table>
 
-Supported ways to receive alerts:
+Supported ways to receive [alerts](#alerts):
 
 <table>
     <thead>
diff --git a/anomstack/jobs/plot.py b/anomstack/jobs/plot.py
@@ -14,6 +14,7 @@
     MetadataValue,
     ScheduleDefinition,
     asset,
+    get_dagster_logger,
     job,
     op,
 )
@@ -39,6 +40,8 @@ def build_plot_job(spec: dict) -> JobDefinition:
         JobDefinition: The plot job definition.
     """
 
+    logger = get_dagster_logger()
+
     if spec.get("disable_plot"):
 
         @job(
@@ -95,14 +98,20 @@ def make_plot(context, df: pd.DataFrame) -> None:
                 df (pd.DataFrame): The plot data.
             """
 
-            fig = make_batch_plot(df)
+            if len(df) > 0:
+
+                fig = make_batch_plot(df)
+
+                buffer = BytesIO()
+                fig.savefig(buffer, format="png")
+                image_data = base64.b64encode(buffer.getvalue())
+                md_content = f"![img](data:image/png;base64,{image_data.decode()})"
+
+                context.add_output_metadata({"plot": MetadataValue.md(md_content)})
 
-            buffer = BytesIO()
-            fig.savefig(buffer, format="png")
-            image_data = base64.b64encode(buffer.getvalue())
-            md_content = f"![img](data:image/png;base64,{image_data.decode()})"
+            else:
 
-            context.add_output_metadata({"plot": MetadataValue.md(md_content)})
+                logger.info("no data to plot")
 
         make_plot(get_plot_data())
 
diff --git a/metrics/examples/README.md b/metrics/examples/README.md
@@ -1,3 +1,113 @@
 # Examples
 
 Some example metric batch sub folders. For example you might have one per source or subject, or whatever makes most sense to you really.
+
+- [bigquery](bigquery/): Example of a BigQuery metric batch.
+- [eirgrid](eirgrid/): Example of a metric batch that uses a custom python `ingest_fn` parameter to just use python to create an `ingest()` function that returns a pandas df.
+- [example_jinja](example_jinja/): Example of a metric batch that uses Jinja templating.
+- [example_simple](example_simple/): Example of a simple metric batch.
+- [example_sql_file](example_sql_file/): Example of a metric batch that uses a SQL file.
+- [freq](freq/): Example of a metric batch that uses the `freq` parameter.
+- [gsod](gsod/): Example of a metric batch that uses GSOD data from BigQuery.
+- [gtrends](gtrends/): Example of a metric batch that uses Google Trends data from BigQuery.
+- [hackernews](hackernews/): Example of a metric batch that uses the Hacker News API.
+- [netdata](netdata/): Example of a metric batch that uses the Netdata API.
+- [netdata_httpcheck](netdata_httpcheck/): Example of a metric batch that uses the Netdata API to check the status of a website.
+- [python](python/): Example of a metric batch that uses a custom python `ingest_fn` parameter to just use python to create an `ingest()` function that returns a pandas df.
+- [s3](s3/): Example of a metric batch that uses S3.
+- [sales](sales/): Example of a metric batch that uses a SQL file.
+- [snowflake](snowflake/): Example of a metric batch that uses Snowflake.
+- [tomtom](tomtom/): Example of a metric batch that uses the TomTom API.
+- [users](users/): Example of a metric batch that uses a SQL file.
+- [weather](weather/): Example of a metric batch that uses Open Meteo data.
+- [weather_forecast](weather_forecast/): Example of a metric batch that uses weather forecast data from Snowflake.
+- [yfinance](yfinance/): Example of a metric batch that uses the Yahoo Finance API.
+
+
+```
+.
+├── README.md
+├── bigquery
+│   ├── README.md
+│   └── bigquery_example_simple
+│       ├── README.md
+│       └── bigquery_example_simple.yaml
+├── eirgrid
+│   ├── README.md
+│   ├── eirgrid.py
+│   └── eirgrid.yaml
+├── example_jinja
+│   ├── README.md
+│   └── example_jinja.yaml
+├── example_simple
+│   ├── README.md
+│   └── example_simple.yaml
+├── example_sql_file
+│   ├── README.md
+│   ├── example_sql_file.sql
+│   └── example_sql_file.yaml
+├── freq
+│   ├── README.md
+│   └── freq.yaml
+├── gsod
+│   ├── README.md
+│   ├── gsod.sql
+│   └── gsod.yaml
+├── gtrends
+│   ├── README.md
+│   ├── gtrends.sql
+│   └── gtrends.yaml
+├── hackernews
+│   ├── README.md
+│   ├── hn_top_stories_scores.py
+│   └── hn_top_stories_scores.yaml
+├── netdata
+│   ├── README.md
+│   ├── netdata.py
+│   └── netdata.yaml
+├── netdata_httpcheck
+│   ├── netdata_httpcheck.py
+│   └── netdata_httpcheck.yaml
+├── python
+│   ├── README.md
+│   └── python_ingest_simple
+│       ├── README.md
+│       ├── ingest.py
+│       └── python_ingest_simple.yaml
+├── s3
+│   ├── README.md
+│   └── s3_example_simple
+│       ├── README.md
+│       └── s3_example_simple.yaml
+├── sales
+│   ├── README.md
+│   ├── sales.sql
+│   └── sales.yaml
+├── snowflake
+│   ├── README.md
+│   └── snowflake_example_simple
+│       ├── README.md
+│       └── snowflake_example_simple.yaml
+├── tomtom
+│   ├── README.md
+│   ├── tomtom.py
+│   └── tomtom.yaml
+├── users
+│   ├── README.md
+│   ├── users.sql
+│   └── users.yaml
+├── weather
+│   ├── README.md
+│   ├── ingest_weather.py
+│   └── weather.yaml
+├── weather_forecast
+│   ├── README.md
+│   ├── weather_forecast.sql
+│   └── weather_forecast.yaml
+└── yfinance
+    ├── README.md
+    ├── yfinance.py
+    └── yfinance.yaml
+
+25 directories, 58 files
+```
diff --git a/metrics/examples/python/README.md b/metrics/examples/python/README.md
@@ -1,3 +1,3 @@
 # Python Examples
 
-Examples using a custom python `ingest_fn` paramater to just use python to create an `ingest()` function that returns a pandas df.
+Examples using a custom python `ingest_fn` parameter to just use python to create an `ingest()` function that returns a pandas df.
diff --git a/metrics/examples/python/python_ingest_simple/ingest.py b/metrics/examples/python/python_ingest_simple/ingest.py
@@ -1,21 +1,52 @@
 def ingest():
+    """
+    Generate random metrics data with occasional anomalies (spikes, drops,
+    or plateaus).
+    """
+
     import random
     import time
 
     import pandas as pd
 
-    # generate random metrics
-    metric1_value = random.uniform(0, 10)
-    metric2_value = random.uniform(0, 10)
+    # Define the number of metrics
+    metrics = ["metric1", "metric2", "metric3", "metric4", "metric5"]
+    metric_values = []
 
-    # get current timestamp
+    # Get current timestamp
     current_timestamp = int(time.time())
 
-    # make df
+    # Different anomaly types
+    anomaly_types = ["spike", "drop", "plateau"]
+
+    # Generate random metrics and introduce occasional anomalies (1% chance)
+    anomaly_chance = random.random()
+    anomaly_type = (random.choice(anomaly_types)
+                    if anomaly_chance <= 0.01 else None)
+
+    for metric in metrics:
+        if anomaly_type == "spike":
+            # Generate a spike (e.g., a value significantly higher than normal)
+            metric_value = random.uniform(15, 30)
+        elif anomaly_type == "drop":
+            # Generate a drop (e.g., a negative or significantly low value)
+            metric_value = random.uniform(-10, -1)
+        elif anomaly_type == "plateau":
+            # Generate the same value for all metrics (e.g., a plateau)
+            plateau_value = random.uniform(5, 6)
+            metric_values = [plateau_value] * len(metrics)
+            break  # Exit the loop early since all metrics have the same value
+        else:
+            # Generate a normal value
+            metric_value = random.uniform(0, 10)
+
+        metric_values.append(metric_value)
+
+    # Create a DataFrame with the metric data
     data = {
-        "metric_name": ["metric1", "metric2"],
-        "metric_value": [metric1_value, metric2_value],
-        "metric_timestamp": [current_timestamp, current_timestamp],
+        "metric_name": metrics,
+        "metric_value": metric_values,
+        "metric_timestamp": [current_timestamp] * len(metrics),
     }
     df = pd.DataFrame(data)
 
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from anomstack.main import ingest_jobs, ingest_schedules, jobs, schedules, sensors
+from anomstack.main import ingest_jobs, ingest_schedules, jobs, schedules
 
 logger = logging.getLogger(__name__)
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`# Python Examples`
`2`	`2`
`3`		-Examples using a custom python `ingest_fn` paramater to just use python to create an `ingest()` function that returns a pandas df.
	`3`	+Examples using a custom python `ingest_fn` parameter to just use python to create an `ingest()` function that returns a pandas df.