diff --git a/.example.env b/.example.env index e6ce33a1..b427fd18 100644 --- a/.example.env +++ b/.example.env @@ -95,7 +95,6 @@ ANOMSTACK_LLM_PLATFORM=openai # some dagster env vars DAGSTER_LOG_LEVEL=DEBUG DAGSTER_CONCURRENCY=4 -DAGSTER_HOME=/opt/dagster/dagster_home # max runtime for a job in dagster # https://docs.dagster.io/deployment/run-monitoring#general-run-timeouts diff --git a/Makefile b/Makefile index e8e9417b..c3392b59 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,9 @@ SHELL=/bin/bash .PHONY: local locald kill-locald ps-locald dev -# start dagster locally +# start dagster locally (simple - just set DAGSTER_HOME directly) local: - dagster dev -f anomstack/main.py + DAGSTER_HOME=$$(pwd)/dagster_home dagster dev -f anomstack/main.py # start dagster locally as a daemon with no log file locald: @@ -124,10 +124,6 @@ docker-restart-dashboard: docker-restart-code: docker compose restart anomstack_code -# stop all containers -docker-stop: - docker compose down - # alias for docker-stop docker-down: docker compose down @@ -217,8 +213,16 @@ requirements-install: # run the PostHog example ingest function posthog-example: - python scripts/posthog_example.py + python scripts/posthog_example.py # kill any dagster runs exceeding configured timeout kill-long-runs: - python scripts/kill_long_running_tasks.py + python scripts/kill_long_running_tasks.py + +# run docker in dev mode with correct environment +docker-dev-env: + docker compose -f docker-compose.yaml -f docker-compose.dev.yaml up -d + +# stop docker containers +docker-stop: + docker compose -f docker-compose.yaml -f docker-compose.dev.yaml down diff --git a/dagster.yaml b/dagster.yaml index a0f10701..eed0c679 100644 --- a/dagster.yaml +++ b/dagster.yaml @@ -34,17 +34,13 @@ retention: failure: 7 success: 7 -#schedules: -# use_threads: true -# num_workers: 8 +schedules: + use_threads: true + num_workers: 8 -#sensors: -# use_threads: true -# num_workers: 4 +sensors: + use_threads: true + num_workers: 4 telemetry: enabled: true - -# kill sensor configuration -kill_sensor: - kill_after_minutes: 60 diff --git a/dagster_docker.yaml b/dagster_docker.yaml index 3841b66f..5039e23f 100644 --- a/dagster_docker.yaml +++ b/dagster_docker.yaml @@ -60,9 +60,13 @@ run_monitoring: start_timeout_seconds: 600 cancel_timeout_seconds: 300 -# kill sensor configuration -kill_sensor: - kill_after_minutes: 60 +schedules: + use_threads: true + num_workers: 8 + +sensors: + use_threads: true + num_workers: 4 schedule_storage: module: dagster_postgres.schedule_storage diff --git a/dagster_home/dagster.yaml b/dagster_home/dagster.yaml new file mode 100644 index 00000000..e69de29b diff --git a/docker-compose.yaml b/docker-compose.yaml index a498dec8..2f103f40 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -44,6 +44,7 @@ services: - ./tmp:/opt/dagster/app/tmp - anomstack_metrics_duckdb:/metrics_db/duckdb - ./dagster_home:/opt/dagster/dagster_home + - ./dagster_docker.yaml:/opt/dagster/dagster_home/dagster.yaml env_file: - .env environment: @@ -52,7 +53,7 @@ services: DAGSTER_POSTGRES_DB: "${ANOMSTACK_POSTGRES_DB:-postgres_db}" DAGSTER_CURRENT_IMAGE: "andrewm4894/anomstack_code:latest" ANOMSTACK_DUCKDB_PATH: "/metrics_db/duckdb/anomstack.db" - DAGSTER_HOME: "${DAGSTER_HOME:-/opt/dagster/dagster_home}" + DAGSTER_HOME: "/opt/dagster/dagster_home" networks: - anomstack_network healthcheck: @@ -95,13 +96,14 @@ services: DAGSTER_POSTGRES_USER: "${ANOMSTACK_POSTGRES_USER:-postgres_user}" DAGSTER_POSTGRES_PASSWORD: "${ANOMSTACK_POSTGRES_PASSWORD:-postgres_password}" DAGSTER_POSTGRES_DB: "${ANOMSTACK_POSTGRES_DB:-postgres_db}" - DAGSTER_HOME: "${DAGSTER_HOME:-/opt/dagster/dagster_home}" + DAGSTER_HOME: "/opt/dagster/dagster_home" volumes: # Make docker client accessible so we can terminate containers from the webserver - /var/run/docker.sock:/var/run/docker.sock - /tmp/io_manager_storage:/tmp/io_manager_storage - ./tmp:/opt/dagster/app/tmp - anomstack_metrics_duckdb:/metrics_db/duckdb - ./dagster_home:/opt/dagster/dagster_home + - ./dagster_docker.yaml:/opt/dagster/dagster_home/dagster.yaml networks: - anomstack_network depends_on: @@ -137,13 +139,14 @@ services: DAGSTER_POSTGRES_USER: "${ANOMSTACK_POSTGRES_USER:-postgres_user}" DAGSTER_POSTGRES_PASSWORD: "${ANOMSTACK_POSTGRES_PASSWORD:-postgres_password}" DAGSTER_POSTGRES_DB: "${ANOMSTACK_POSTGRES_DB:-postgres_db}" - DAGSTER_HOME: "${DAGSTER_HOME:-/opt/dagster/dagster_home}" + DAGSTER_HOME: "/opt/dagster/dagster_home" volumes: # Make docker client accessible so we can launch containers using host docker - /var/run/docker.sock:/var/run/docker.sock - /tmp/io_manager_storage:/tmp/io_manager_storage - ./tmp:/opt/dagster/app/tmp - anomstack_metrics_duckdb:/metrics_db/duckdb - ./dagster_home:/opt/dagster/dagster_home + - ./dagster_docker.yaml:/opt/dagster/dagster_home/dagster.yaml networks: - anomstack_network depends_on: diff --git a/scripts/kill_long_running_tasks.py b/scripts/kill_long_running_tasks.py index d9a88d29..296045fb 100644 --- a/scripts/kill_long_running_tasks.py +++ b/scripts/kill_long_running_tasks.py @@ -1,4 +1,5 @@ import os +import sys from pathlib import Path from datetime import datetime, timedelta, timezone @@ -17,8 +18,14 @@ os.environ["ANOMSTACK_DAGSTER_LOCAL_COMPUTE_LOG_MANAGER_DIRECTORY"] = "tmp" os.environ["ANOMSTACK_DAGSTER_LOCAL_ARTIFACT_STORAGE_DIR"] = "tmp" -# Cutoff for long-running (1 hour ago) -cutoff_time = datetime.now(timezone.utc) - timedelta(hours=1) +# Add the parent directory to sys.path to import the sensor module +sys.path.append(str(script_dir.parent)) +from anomstack.sensors.timeout import get_kill_after_minutes + +# Use the same configurable timeout as the sensor +kill_after_minutes = get_kill_after_minutes() +cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=kill_after_minutes) +print(f"Using {kill_after_minutes} minute timeout") instance = DagsterInstance.get() running_runs = instance.get_runs(filters=RunsFilter(statuses=[DagsterRunStatus.STARTED]))