MiroMindAI · shiqian-su · Aug 20, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,10 @@ __pycache__/
 *.py[codz]
 *$py.class
 
+# Data
+data/*
+!data/README.md
+
 # C extensions
 *.so
 

diff --git a/apps/run-agent/common_benchmark.py b/apps/run-agent/common_benchmark.py
@@ -5,6 +5,7 @@
 import asyncio
 import json
 import os
+import signal
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from enum import StrEnum
@@ -197,7 +198,7 @@ async def run_single_task(self, task: BenchmarkTask) -> BenchmarkResult:
                             output_formatter=self.output_formatter,
                             ground_truth=task.ground_truth,
                             log_path=self.output_dir
-                            / f"{task.task_id}_attempt_{attempt}",
+                            / f"{task.task_id}_attempt_{attempt}.json",
                         )
 
                         attempt_result["model_response"] = response if response else ""
@@ -544,11 +545,11 @@ def prepare_task_description(
         path = Path(task.file_path)
         # check if task.file_path is a relative path
         if path.is_absolute():
-            return task.task_question, str(path.resolve())
+            return task.task_question, str(path)
 
         # Build complete file path: data directory + relative path
         full_file_path = Path(self.data_dir) / path
-        return task.task_question, str(full_file_path.resolve())
+        return task.task_question, str(full_file_path)
 
 
 async def entrypoint(cfg: DictConfig) -> float:
@@ -631,14 +632,31 @@ def filter_func(x: BenchmarkTask) -> bool:
     return accuracy
 
 
-def main(*args):
+def signal_handler(signum, frame):
+    """Force exit signal handler"""
+    print(f"\n⚠️  Received interrupt signal {signum}, forcing immediate exit...")
+    print("Program will terminate all operations immediately")
+    os._exit(1)  # Force immediate exit
+
+
+@hydra.main(version_base=None, config_path=config_path(), config_name=config_name())
+def main(cfg: DictConfig) -> None:
+    """Main entry point using Hydra decorator - automatically creates .hydra directory"""
+    # Register signal handlers for immediate response to Ctrl+C
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
     dotenv.load_dotenv()
-    with hydra.initialize_config_dir(config_dir=config_path(), version_base=None):
-        cfg = hydra.compose(config_name=config_name(), overrides=list(args))
-        _ = bootstrap_logger()
-        # Default to disable tracing, and don't set key
-        set_tracing_disabled(True)
-        set_tracing_export_api_key("fake-key")
-        # Suppress trace provider warnings
-        bootstrap_silent_trace_provider()
-        asyncio.run(entrypoint(cfg))
+    _ = bootstrap_logger()
+    # Default to disable tracing, and don't set key
+    set_tracing_disabled(True)
+    set_tracing_export_api_key("fake-key")
+    # Suppress trace provider warnings
+    bootstrap_silent_trace_provider()
+
+    print("✅ Signal handler registered, press Ctrl+C to exit immediately")
+    asyncio.run(entrypoint(cfg))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/apps/run-agent/main.py b/apps/run-agent/main.py
@@ -7,7 +7,6 @@
 
 import calculate_average_score
 import calculate_score_from_log
-import common_benchmark
 import eval_answer_from_log
 import trace_single_task
 
@@ -22,13 +21,37 @@ def print_config(*args):
 
 if __name__ == "__main__":
     install(suppress=[fire, hydra], show_locals=True)
-    fire.Fire(
-        {
-            "print-config": print_config,
-            "trace": trace_single_task.main,
-            "common-benchmark": common_benchmark.main,
-            "eval-answer": eval_answer_from_log.main,
-            "avg-score": calculate_average_score.main,
-            "score-from-log": calculate_score_from_log.main,
-        }
-    )
+    import sys
+
+    if len(sys.argv) < 2:
+        print("Available commands:")
+        print("  print-config    - Print configuration")
+        print("  trace          - Run single task trace")
+        print("  common-benchmark - Run benchmark evaluation")
+        print("  eval-answer    - Evaluate answers from log")
+        print("  avg-score      - Calculate average score")
+        print("  score-from-log - Calculate score from log")
+        print("\nExample: python main.py common-benchmark")
+        sys.exit(1)
+
+    command = sys.argv[1]
+    args = sys.argv[2:]
+
+    if command == "print-config":
+        print_config(*args)
+    elif command == "trace":
+        trace_single_task.main(*args)
+    elif command == "common-benchmark":
+        # For common-benchmark, call it directly - it will use @hydra.main
+        import subprocess
+
+        subprocess.run(["python", "common_benchmark.py"] + args)
+    elif command == "eval-answer":
+        eval_answer_from_log.main(*args)
+    elif command == "avg-score":
+        calculate_average_score.main(*args)
+    elif command == "score-from-log":
+        calculate_score_from_log.main(*args)
+    else:
+        print(f"Unknown command: {command}")
+        sys.exit(1)
diff --git a/libs/miroflow-tool/src/miroflow/tool/mcp_servers/python_server.py b/libs/miroflow-tool/src/miroflow/tool/mcp_servers/python_server.py
@@ -13,7 +13,7 @@
 
 # DEFAULT TEMPLATE ID
 # see README.md on how to build this
-DEFAULT_TEMPLATE_ID = "1av7fdjfvcparqo8efq6"
+DEFAULT_TEMPLATE_ID = "all_pip_apt_pkg"
 
 # DEFAULT CONFS
 DEFAULT_TIMEOUT = 1200  # seconds

diff --git a/libs/miroflow/src/miroflow/logging/task_tracer.py b/libs/miroflow/src/miroflow/logging/task_tracer.py
@@ -122,7 +122,7 @@ def save(self):
             if not self.log_path.exists():
                 self.log_path.parent.mkdir(exist_ok=True, parents=True)
             with open(self.log_path, mode="w") as dest:
-                dest.write(self.model_dump_json())
+                dest.write(self.model_dump_json(indent=2))
         except Exception as e:
             logger.error(e, stack_info=True, exc_info=True)
 

diff --git a/libs/miroflow/src/miroflow/prebuilt/config/config.yaml b/libs/miroflow/src/miroflow/prebuilt/config/config.yaml
@@ -6,7 +6,7 @@ defaults:
   - pricing: _default
   # disable hydra logging
   # see https://github.com/facebookresearch/hydra/issues/2902#issuecomment-2147121325
-  # - override hydra/hydra_logging: disabled
+  - override hydra/hydra_logging: disabled
   - override hydra/job_logging: none
   - _self_  # Allow defining variables at the top of this file
 
@@ -34,4 +34,8 @@ env:
 
 # Can define some top-level or default parameters here
 project_name: "miroflow"
-output_dir: logs/${benchmark.name}/${llm.provider}_${llm.model_name} 
+output_dir: logs/${benchmark.name}/${llm.provider}_${llm.model_name} 
+
+hydra:
+  run:
+    dir: ${output_dir}