diff --git a/.gitignore b/.gitignore index fa016e9..65e1a3d 100644 --- a/.gitignore +++ b/.gitignore @@ -210,4 +210,18 @@ __marimo__/ # debug debug/ -mock/ \ No newline at end of file +mock/ + +# CodeEvolve run outputs / local env +experiments/ +.conda/ + +# API Keys and Secrets +# NEVER commit API keys or credentials +.api_keys +*api_keys +*.api_keys +**/api_keys.sh +**/.api_keys +problems/.api_keys +.codeevolve_api_keys \ No newline at end of file diff --git a/OPTIMIZATIONS.md b/OPTIMIZATIONS.md new file mode 100644 index 0000000..f6e104d --- /dev/null +++ b/OPTIMIZATIONS.md @@ -0,0 +1,215 @@ +# CodeEvolve Optimizations and Future Enhancements + +This document summarizes the optimizations implemented and provides suggestions for future improvements to make CodeEvolve a world-class code evolution framework. + +## Implemented Optimizations + +### 1. Database Performance (database.py) + +**Problem**: The original implementation performed a full O(N log N) sort on every program insertion, which becomes a bottleneck as the population grows. + +**Solution**: Implemented incremental cache updates using the `bisect` module for O(log N) insertions: +- Added `_incremental_update_cache()` method that uses binary search to find insertion points +- Maintains a sorted list of `(-fitness, pid)` tuples +- Only updates ranks for affected programs (those at or after the insertion point) + +**Impact**: Reduces insertion time from O(N log N) to O(log N), significantly improving performance for large populations. + +**Code Location**: `src/codeevolve/database.py:397-421` + +### 2. Memory Management (evaluator.py) + +**Problem**: Program stdout/stderr can be very large, potentially causing memory issues in long-running evolutionary processes. + +**Solution**: Added optional output size limits: +- New `max_output_size` parameter in Evaluator constructor +- Truncates output to specified size when enabled +- Default behavior (no storage) preserved for backward compatibility + +**Impact**: Prevents memory exhaustion while maintaining debugging capability when needed. + +**Code Location**: `src/codeevolve/evaluator.py:79, 276-283` + +### 3. Build System Compatibility + +**Problem**: Python version requirement was too restrictive (>=3.13.5), preventing installation on most systems. + +**Solution**: Relaxed requirement to >=3.10, which is widely available and supports all features used in the codebase. + +**Code Location**: `pyproject.toml:10` + +## Documentation Improvements + +### Enhanced TODOs with Implementation Guidance + +1. **Sandboxing Enhancement** (evaluator.py:26-31) + - Documented options: Firejail, Docker, systemd-nspawn, seccomp + - Current implementation uses subprocess isolation with resource limits + +2. **Local LM Support** (lm.py:25-31) + - Documented integration strategies for open-source models + - Suggested frameworks: llama-cpp-python, vllm, HuggingFace, Ollama + +3. **Async Migration** (islands.py:255-263) + - Explained benefits of asynchronous migration without barriers + - Documented implementation considerations and tradeoffs + +## Recommended Future Optimizations + +### High Priority + +#### 1. Parallel Program Evaluation +**Current State**: Programs are evaluated sequentially within each island. + +**Optimization**: Implement parallel evaluation using `asyncio` or `multiprocessing`: +```python +# Pseudo-code example +async def evaluate_batch(programs: List[Program], evaluator: Evaluator): + tasks = [asyncio.create_subprocess_exec(...) for prog in programs] + results = await asyncio.gather(*tasks) + return results +``` + +**Expected Impact**: 2-10x speedup depending on available CPU cores. + +#### 2. LLM Request Batching +**Current State**: LLM requests are made one at a time. + +**Optimization**: Batch multiple LLM requests when possible: +- Collect multiple programs needing evolution +- Send batch requests to LLM API +- Most APIs support parallel processing of multiple prompts + +**Expected Impact**: Reduced API latency, better token efficiency, 1.5-3x throughput improvement. + +#### 3. Caching and Memoization +**Current State**: No caching of previously evaluated programs or LLM responses. + +**Optimization**: Implement caching layers: +- **Program Cache**: Hash program code and cache evaluation results +- **LLM Cache**: Cache LLM responses for identical prompts +- **Embedding Cache**: Cache embeddings for program similarity computations + +**Expected Impact**: 30-50% reduction in redundant computations. + +### Medium Priority + +#### 4. Database Indexing +**Current State**: Linear search for certain operations. + +**Optimization**: Add indexes for common queries: +- Fitness-based queries +- Parent-child relationships +- Feature space lookups in MAP-Elites + +**Expected Impact**: Faster query times, especially for large databases. + +#### 5. Adaptive Population Sizing +**Current State**: Fixed population size per island. + +**Optimization**: Dynamically adjust population size based on: +- Convergence rate +- Diversity metrics +- Available computational resources + +**Expected Impact**: Better resource utilization, faster convergence. + +#### 6. Smart Migration Strategy +**Current State**: Fixed migration interval and strategy. + +**Optimization**: Implement adaptive migration: +- Migrate based on diversity metrics rather than fixed intervals +- Select migrants based on novelty, not just fitness +- Use gradient-based migration patterns + +**Expected Impact**: Improved exploration, better solution diversity. + +### Lower Priority (Polish) + +#### 7. Profiling and Monitoring +**Optimization**: Add built-in profiling: +- Token usage tracking per operation +- Time spent in each evolutionary operator +- Memory usage patterns +- Success rates for different strategies + +**Expected Impact**: Better observability, easier optimization identification. + +#### 8. Checkpoint Compression +**Current State**: Checkpoints may be large for big populations. + +**Optimization**: Compress checkpoints using gzip or similar: +```python +import gzip +import pickle + +def save_checkpoint_compressed(data, path): + with gzip.open(path, 'wb') as f: + pickle.dump(data, f) +``` + +**Expected Impact**: Reduced storage requirements, faster I/O. + +#### 9. Type Hints and Validation +**Current State**: Some functions lack complete type hints. + +**Optimization**: Add comprehensive type hints and use `mypy` for static type checking: +- Better IDE support +- Catch type errors early +- Improved code documentation + +## Code Quality Improvements + +### 1. Error Handling +- Add specific exception types for different error conditions +- Implement retry logic with exponential backoff for API calls +- Better error messages with context + +### 2. Logging +- Structured logging with JSON format for better parsing +- Configurable log levels per component +- Log aggregation support for distributed runs + +### 3. Testing +- Add integration tests for the full evolutionary loop +- Performance regression tests +- Stress tests with large populations + +### 4. Documentation +- Add inline examples in docstrings +- Create tutorial notebooks +- Document configuration parameters with examples + +## Performance Benchmarks + +To track optimization progress, consider implementing benchmarks for: + +1. **Insertion Time**: Measure time to add programs to database at different population sizes +2. **Evolution Throughput**: Programs evolved per minute +3. **Memory Usage**: Peak memory usage during runs +4. **Convergence Speed**: Epochs to reach target fitness + +## Architecture Considerations + +### Distributed Computing +For large-scale deployments, consider: +- Ray or Dask for distributed computation +- Redis for shared state management +- Message queues (RabbitMQ, Kafka) for asynchronous communication + +### Cloud Optimization +- Use spot instances for cost savings +- Implement checkpointing for fault tolerance +- Auto-scaling based on workload + +## Conclusion + +The implemented optimizations provide a solid foundation for performance. The recommended future optimizations, prioritized by impact and implementation complexity, can further improve CodeEvolve's efficiency and scalability. + +Focus areas for maximum impact: +1. Parallel evaluation (highest ROI) +2. LLM request batching +3. Intelligent caching +4. Better monitoring and profiling + +These optimizations align with the project's goal of being a transparent, reproducible, and community-driven framework for LLM-driven algorithm discovery. diff --git a/README.md b/README.md index 95cb490..a078462 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,14 @@ conda activate codeevolve ``` The command-line version of codeevolve is implemented in ```src/codeevolve/cli.py```, and ```scripts/run.sh``` contains a bash script for running codeevolve on a given benchmark. The most important variables to be defined in this file are the ```API_KEY, API_BASE``` environment variables for connecting with an LLM provider. +CodeEvolve now also supports an optional **NovelAgent** that injects exploratory prompt updates. Enable it by adding a `NOVEL_AGENT` block to your config (see `problems/problem_template/configs/config_mp.yaml`), which will occasionally replace the standard meta-prompting step with a more diversity-focused proposal. + +For competitive experiments, you can enable **adversarial islands** via the `ADVERSARIAL` block in the same config. Islands are partitioned into teams (e.g., red vs blue), each evolving independently with MAP-Elites while periodically cross-evaluating candidates against the rival team's current champions. Fitness can be based on win rate, Elo, or a hybrid score, and cross-play can be scheduled every _k_ epochs or alternated between teams to synchronize coevolutionary phases. + +You can also inject a lighthearted **climate pressure** by enabling the `CLIMATE` block. Each epoch belongs to a season (choose a single perpetual season or a 4-season cycle), and a small set of Python helpers are randomly tagged as "heat-tolerant" or "cold-resilient." Programs using functions aligned with the current season earn a configurable fitness multiplier, making heat-resistant code more likely to survive during hotter phases. + +For a concrete example, see the [F_time setup guide](problems/F_time/SETUP.md) for step-by-step instructions to clone the repository under `/home/rag/Projects`, configure the conda environment, and run the bundled benchmark script. + More comprehensive tutorials will be released soon. ## Next steps diff --git a/problems/.api_keys.example b/problems/.api_keys.example new file mode 100644 index 0000000..13334a1 --- /dev/null +++ b/problems/.api_keys.example @@ -0,0 +1,32 @@ +# Example API Keys Configuration File +# +# USAGE: +# 1. Copy this file: cp .api_keys.example .api_keys +# 2. Add your actual API keys to .api_keys +# 3. Source in your run.sh: source problems/.api_keys +# 4. Add .api_keys to .gitignore (already done) +# +# SECURITY: +# - NEVER commit the actual .api_keys file to git +# - This .example file shows the format only +# - Keep your keys secret! + +# OpenAI / Azure OpenAI +export API_KEY="sk-your-openai-api-key-here" +export API_BASE="https://api.openai.com/v1" + +# Google Gemini +# export API_KEY="your-google-api-key-here" +# export API_BASE="https://generativelanguage.googleapis.com/v1beta" + +# Azure OpenAI (custom endpoint) +# export API_KEY="your-azure-key-here" +# export API_BASE="https://your-resource.openai.azure.com/openai/deployments/your-deployment" + +# Anthropic Claude +# export API_KEY="sk-ant-your-anthropic-key-here" +# export API_BASE="https://api.anthropic.com/v1" + +# Custom / Self-hosted +# export API_KEY="your-custom-key" +# export API_BASE="http://localhost:8080/v1" diff --git a/problems/API_KEYS_SETUP.md b/problems/API_KEYS_SETUP.md new file mode 100644 index 0000000..325e87e --- /dev/null +++ b/problems/API_KEYS_SETUP.md @@ -0,0 +1,212 @@ +# API Keys Setup Guide + +Quick guide for setting up API keys in CodeEvolve projects. + +## Three Methods (Choose One) + +### Method 1: In run.sh File (Quickest) + +**Pros:** Easy, works immediately +**Cons:** Less secure, must not commit to git + +```bash +# Edit your project's run.sh +cd problems/YOUR_PROJECT +nano run.sh + +# Find the API CONFIGURATION section and uncomment/set: +API_KEY="your-api-key-here" +API_BASE="https://api.openai.com/v1" +``` + +⚠️ **Important:** If you do this, add `run.sh` to your project's `.gitignore` to prevent accidentally committing keys! + +--- + +### Method 2: Environment Variables (Development) + +**Pros:** Secure, no files to manage +**Cons:** Must set every time you open a new terminal + +```bash +# Set in your terminal +export API_KEY="your-api-key-here" +export API_BASE="https://api.openai.com/v1" + +# Then run normally +bash problems/YOUR_PROJECT/run.sh +``` + +**Make it permanent** (add to `~/.bashrc` or `~/.zshrc`): +```bash +echo 'export API_KEY="your-api-key-here"' >> ~/.bashrc +echo 'export API_BASE="https://api.openai.com/v1"' >> ~/.bashrc +source ~/.bashrc +``` + +--- + +### Method 3: External File (Recommended - Most Secure) + +**Pros:** Secure, reusable, git-safe +**Cons:** One extra step to set up + +#### Step 1: Create API keys file + +```bash +# Copy the example +cp problems/.api_keys.example problems/.api_keys + +# Edit with your actual keys +nano problems/.api_keys +``` + +Your `problems/.api_keys` file should look like: +```bash +# Your actual keys +export API_KEY="sk-your-real-api-key-here" +export API_BASE="https://api.openai.com/v1" +``` + +#### Step 2: Reference it in run.sh + +Edit your project's `run.sh` and uncomment this line in the API CONFIGURATION section: +```bash +source problems/.api_keys +``` + +Or if your run.sh is in the project folder: +```bash +source ../.api_keys +``` + +#### Step 3: Run normally +```bash +bash problems/YOUR_PROJECT/run.sh +``` + +The `.api_keys` file is automatically ignored by git for security. + +--- + +## API Endpoints by Provider + +### OpenAI +```bash +export API_KEY="sk-..." +export API_BASE="https://api.openai.com/v1" +``` + +### Google Gemini +```bash +export API_KEY="AIza..." +export API_BASE="https://generativelanguage.googleapis.com/v1beta" +``` + +### Azure OpenAI +```bash +export API_KEY="your-azure-key" +export API_BASE="https://your-resource.openai.azure.com" +``` + +### Anthropic Claude +```bash +export API_KEY="sk-ant-..." +export API_BASE="https://api.anthropic.com/v1" +``` + +### Local/Self-hosted (e.g., Ollama, vLLM) +```bash +export API_KEY="" # Often not needed for local +export API_BASE="http://localhost:8080/v1" +``` + +--- + +## Verification + +Check if your API keys are set: + +```bash +# Check environment +echo $API_KEY +echo $API_BASE + +# Or look for the warning in run output +bash run.sh +# Should NOT show: "WARNING: API_KEY is not set" +``` + +--- + +## Security Best Practices + +✅ **DO:** +- Use Method 3 (external file) for production +- Add `.api_keys` to `.gitignore` (already done) +- Use different keys for different projects/teams +- Rotate keys periodically + +❌ **DON'T:** +- Commit API keys to git +- Share keys in chat/email +- Use production keys for testing +- Store keys in plaintext in public places + +--- + +## Troubleshooting + +### "WARNING: API_KEY is not set" + +The run script detected no API key. Fix using any method above. + +### "Authentication failed" or "Invalid API key" + +- Check your key is correct (no extra spaces) +- Verify the API_BASE matches your provider +- Ensure the key hasn't expired +- Try the key in a simple curl test: + +```bash +curl $API_BASE/models \ + -H "Authorization: Bearer $API_KEY" +``` + +### "source: .api_keys: file not found" + +- Check the path in your run.sh is correct +- If run.sh is in project folder, use `../.api_keys` +- Verify the file exists: `ls -la problems/.api_keys` + +### Keys work in terminal but not in run.sh + +If you set environment variables but they don't work in run.sh: +- Make sure to `export` (not just set) the variables +- Or use Method 1 or 3 instead + +--- + +## Quick Reference Card + +```bash +# Method 1: Direct in run.sh +API_KEY="..." in run.sh + +# Method 2: Environment +export API_KEY="..." +export API_BASE="..." + +# Method 3: External file +source problems/.api_keys + +# Check if set +echo $API_KEY + +# Run +bash problems/YOUR_PROJECT/run.sh +``` + +--- + +For more details, see the main [README.md](README.md). diff --git a/problems/F_time/SETUP.md b/problems/F_time/SETUP.md new file mode 100644 index 0000000..5037889 --- /dev/null +++ b/problems/F_time/SETUP.md @@ -0,0 +1,61 @@ +# F_time setup and run guide + +The steps below reproduce a clean setup under `/home/rag/Projects` and execute the F_time benchmark with the provided run script. + +## 1) Clone the repository into `/home/rag/Projects` +```bash +mkdir -p /home/rag/Projects +cd /home/rag/Projects +# If you use SSH, swap for git@github.com:inter-co/science-codeevolve.git +git clone https://github.com/inter-co/science-codeevolve.git +cd science-codeevolve +``` + +## 2) Create and activate the conda environment +```bash +conda env create -f environment.yml +conda activate codeevolve +``` + +If the environment already exists, update it instead of recreating: +```bash +conda activate base +conda env update -f environment.yml +conda activate codeevolve +``` + +## 3) Install the package locally +From the repository root, install CodeEvolve in editable mode so the `codeevolve` CLI is available: +```bash +pip install -e . +``` + +## 4) Provide API credentials (if your LLM provider requires them) +Set the API key and base URL in your shell before running, or source a file that exports them: +```bash +export API_KEY="1e28fb7fb3b5486e88cf34c33127ef71.hpbxvrNGSUlgNGFz6Mgp7q0Z" +export API_BASE="https://api.openai.com/v1" # replace if using another provider +# or, if you keep them in ~/.codeevolve_api_keys +source ~/.codeevolve_api_keys +``` + +## 5) Run the F_time benchmark +From the repository root: +```bash +cd /home/rag/Projects/science-codeevolve +bash problems/F_time/run.sh +``` + +The script automatically resolves the repository root, so you can also run it from inside the problem folder: +```bash +cd /home/rag/Projects/science-codeevolve/problems/F_time +bash run.sh +``` + +## 6) Verify expected directories +If you see an error like `Input directory does not exist: .../problems/problems/F_time/input/`, ensure you are running the bundled `problems/F_time/run.sh` from this repository so it points to `problems/F_time/input/`. The default layout already includes the necessary `input/` and `configs/` folders. + +export API_BASE="http://localhost:11434/v1" && export API_KEY="944ce3c4b46f4aa5a073887d88c18773.955is0NZY-YbcBVD7nzAYtNd" && /home/rag/Projects/science-codeevolve/.conda/bin/codeevolve --inpt_dir="problems/F_time" --cfg_path="problems/F_time/configs/config.yaml" --out_dir="experiments/F_time/run_$(date +%Y%m%d_%H%M%S)" --load_ckpt=-1 --terminal_logging + +## 7) Outputs +Runs are written to `experiments/F_time/` with a timestamped subfolder. Check the script output footer for the run status and the exact output path. diff --git a/problems/F_time/configs/config.yaml b/problems/F_time/configs/config.yaml new file mode 100644 index 0000000..e726eb1 --- /dev/null +++ b/problems/F_time/configs/config.yaml @@ -0,0 +1,207 @@ +SYS_MSG: | + SCENERIUSZ: + Jesteś ekspertem z zakresu fizyki teoretycznej, dynamiki układów nieliniowych oraz modelowania numerycznego czasu. + Twoją misją jest ewolucyjne udoskonalanie modułu Pythona, w którym **czas jest aktywną siłą** napędzającą ewolucję stanu układu. + + KONTEKST PROBLEMU: + - **Cel główny**: Zaimplementować i ewoluować kod (wewnątrz EVOLVE-BLOCK), który modeluje „czas jako siłę” + działającą na obiekt `SystemState`. + - **Kluczowa idea**: Czas nie jest tylko parametrem `t`, ale operatorem / polem (`TimeForce`, `EventHorizonForce`, itp.), + które aktualizuje stan układu. + - **Przestrzeń symulacji**: Prosty (np. 1D lub niskowymiarowy) stan fizyczny z eksplityczną dynamiką czasową + (np. pozycja, prędkość, entropia, „czas subiektywny”). + - **Ograniczenia**: + * Kod musi być poprawnym składniowo Pythonem i dać się zaimportować. + * Musi istnieć wyraźny punkt wejścia (np. funkcja `run()`), który wykonuje krótką symulację. + * Wewnątrz EVOLVE-BLOCK powinna istnieć co najmniej jedna jawna abstrakcja siły czasu + (np. `TimeForce`, `TemporalDrift`, `EventHorizonForce`). + * Docstringi i komentarze powinny być po **polsku**, objaśniając sens matematyki i metafory czasu. + * Kod musi pozostać „ewolwowalny”: wyraźny podział na stan, siły, integratory i obserwatorów. + + ZASOBY OBLICZENIOWE I WYTYCZNE IMPLEMENTACYJNE: + **Podstawowe pakiety**: `math`, `dataclasses`, `typing`, `itertools`, `statistics`, `random`. + + **Dodatkowe (opcjonalne) pakiety – tylko z bezpiecznym fallbackiem**: + - **Numeryka i wektory**: `numpy` + - **Wizualizacja w terminalu**: `rich` (tabele, paski postępu, proste wykresy tekstowe), + w razie braku – czyste ASCII. + - **Narzędzia naukowe**: `scipy` (np. proste integratory ODE), importowane ostrożnie. + - **Wydajność**: `functools.lru_cache`, prosta memoizacja, lekkie triki numeryczne. + + Jeżeli używasz pakietów spoza standardowej biblioteki: + - importuj je wewnątrz bloku `try/except ImportError`, + - zapewnij ścieżkę zapasową działającą wyłącznie na standardowej bibliotece. + + METRYKI OCENY (WYKORZYSTYWANE PRZEZ EVALUATOR): + 1. **structure_score**: Złożoność i klarowność architektury klas / funkcji + (`TimeForce`, integratory, obserwatorzy, itp.). + 2. **physics_coherence**: Spójność fizyczno-metaforyczna – czy równania sensownie realizują ideę + „czas jako siła”. + 3. **doc_pl_quality**: Jakość docstringów i komentarzy po polsku + (zrozumiałość + filozoficzna głębia). + 4. **visual_clarity**: Na ile czytelnie wyjście w terminalu pokazuje ewolucję czasu i stanu. + 5. **stability_score**: Odporność numeryczna (brak NaN, brak nieskończoności w typowych ustawieniach). + + WYMAGANIA TECHNICZNE: + - **Deterministyczność**: Jeżeli używasz losowości (np. losowe warunki początkowe), + ustaw ziarno RNG (np. `random.seed(42)`) wewnątrz EVOLVE-BLOCK. + - **Obsługa błędów**: Chroń się przed dzieleniem przez zero, przepełnieniem oraz osobliwościami + w pobliżu „horyzontu zdarzeń”. + - **Ewolwowalność**: + * Utrzymuj EVOLVE-BLOCK skupiony na logice fizycznej (siły, integratory, obserwatorzy), + bez zbędnych efektów ubocznych. + * Unikaj kruchych globali; preferuj przekazywanie parametrów / stanu. + - **Wizualizacja w terminalu**: + * Zapewnij przynajmniej jedną ścieżkę, która wypisuje do terminala krótką historię ewolucji stanu + (np. kilka–kilkadziesiąt kroków). + * Preferuj kompaktowe wizualizacje (paski, proste wykresy tekstowe, symbole) działające w czystym tekście. + + **Zalecane wzorce implementacyjne**: + - **Architektura warstwowa**: + * `SystemState`: przechowuje stan (np. `t`, pozycję, prędkość, entropię, „czas subiektywny”). + * `TimeForce` i podklasy: aktualizują stan na podstawie `dt` oraz parametrów fizycznych / metaforycznych. + * `Integrator`: strategia całkowania (np. prosty Euler, z możliwością rozbudowy). + * `Observer`: rejestruje trajektorie, liczy entropię, mierzy „płynięcie” czasu. + - **Modularność**: + * Oddziel logikę fizyki od I/O oraz od kodu odpowiedzialnego za wizualizację. + * Utrzymuj proste API, np. `run_simulation(steps: int) -> lista_stanów`. + - **Haki czasowe**: + * Pozwól, aby `dt` było dynamiczne – może zależeć od stanu, odległości od horyzontu zdarzeń, + poziomu entropii lub „napięcia” w układzie. + * Zaprojektuj miejsce na odwrócenie strzałki czasu (np. w klasie `EventHorizonForce`). + + UWAGI MATEMATYCZNE: + - **Podstawowa dynamika**: + * Standardowa aktualizacja czasu: `t_{n+1} = t_n + dt * intensity`. + * Rozszerzenie na stan: `x_{n+1} = x_n + f(t, x) * dt`, gdzie `f` może zależeć od siły czasu. + - **Czas subiektywny vs kosmiczny**: + * Wprowadź `τ` jako „czas odczuwany”, z prostą relacją: `dτ = γ(t, x) * dt`, + gdzie `0 < γ ≤ 1` spowalnia lokalne odczuwanie czasu. + - **Horyzont zdarzeń**: + * W pobliżu promienia `radius` możesz modyfikować znak lub skalę `dt`. + * Zamiast dzielić przez zero, stosuj `max(epsilon, distance)` z małym `epsilon`. + - **Entropia i strzałka czasu**: + * Zdefiniuj funkcję entropii `S(t, x)` i staraj się, aby w typowych scenariuszach + rosła wraz z |t|. + * Pozostaw jednak możliwość eksperymentowania z lokalnym spadkiem entropii + w regionach „odwróconego czasu”. + + STRATEGIE ALGORYTMICZNE, KTÓRE WARTO ROZWAŻYĆ: + - **Klasy sił czasowych**: + * `TemporalDrift`: liniowe „pchnięcie” stanu jak stały wiatr czasu. + * `CurvedTimeField`: nieliniowe przyspieszanie / hamowanie czasu w zależności od położenia. + * `EventHorizonForce`: obszar, gdzie `dt` zmienia kierunek, maleje do zera albo gwałtownie się deformuje. + - **Integratory**: + * Zaczynaj od prostego schematu Eulera, ale zostaw interfejs na bardziej zaawansowane metody + (np. ulepszony krok adaptacyjny). + - **Wizualizacja w terminalu**: + * W każdej iteracji wypisuj krótką linię zawierającą `t`, wybrane komponenty stanu + oraz prosty pasek lub symboliczny wykres (np. `t=0.30 |███-----|`). + * Jeżeli dostępny jest `rich`, użyj tabel lub pasków postępu do pokazywania trajektorii. + - **Przygotowanie pod ewolucję**: + * Projektuj równania tak, aby małe mutacje (zmiana funkcji `f`, inne parametry sił) + dawały zauważalnie różne, ale nadal stabilne zachowania. + * Nie usuwaj kluczowych klas (np. `TimeForce`); lepiej rozszerzaj ich API. + + RAMA WALIDACYJNA (DLA EVALUATORA): + - **Sprawdzenie poprawności**: + * Uruchom krótką symulację (np. 10–50 kroków) i upewnij się, że `t` oraz inne wielkości + pozostają skończone i dobrze zdefiniowane. + * Funkcja `run()` powinna zwracać prostą strukturę (np. słownik lub listę słowników) + nadającą się do analizy. + - **Testy stabilności**: + * Przetestuj różne wartości `dt` (mniejsze i większe) i obserwuj, czy układ nie „wybucha”. + * Przetestuj parę różnych warunków początkowych, aby uniknąć kruchych założeń. + - **Inspekcja wizualna**: + * Wyjście w terminalu powinno w przejrzysty sposób sugerować „płynięcie” czasu + oraz główne zmiany w stanie układu. + - **Regresja**: + * Nowsze wersje kodu nie powinny niszczyć najprostszych scenariuszy + (np. liniowego wzrostu `t` przy stałej sile czasu). + + # PROMPT-BLOCK-START + + OPTIMIZATION STRATEGIES TO CONSIDER: + TODO + + GEOMETRIC INSIGHTS & MATHEMATICAL FOUNDATIONS: + TODO + + **Recommended implementation patterns:** + TODO + + VALIDATION FRAMEWORK: + TODO + + # PROMPT-BLOCK-END + +CODEBASE_PATH: 'input/src/' +INIT_FILE_DATA: + filename: 'initial_program.py' + language: 'python' +EVAL_FILE_NAME: 'input/evaluate.py' + +# --- RESOURCES --- +RESOURCES: + MAX_MEM_BYTES: 1000000000 + MEM_CHECK_INTERVAL_S: 0.1 + +# --- EVOLUTION PARAMETERS --- +EVOLVE_CONFIG: + fitness_key: combined_score + num_epochs: 50 + ckpt: 10 + max_size: 100 + init_pop: 6 + exploration_rate: 0.3 + selection_policy: roulette + selection_kwargs: + roulette_by_rank: true + early_stopping_rounds: 100 + num_islands: 4 + migration_topology: ring + migration_interval: 30 + migration_rate: 0.1 + meta_prompting: true + use_embedding: true + use_map_elites: true + num_inspirations: 3 + max_chat_depth: 3 + +# --- MODEL ENSEMBLE (Hybrid: Poet + Engineer) --- +ENSEMBLE: + - model_name: 'qwen3-coder:480b-cloud' + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.8 + verify_ssl: False + - model_name: 'deepseek-r1:1.5b' + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.2 + verify_ssl: False + +# --- AUXILIARY MODELS --- +SAMPLER_AUX_LM: + model_name: 'gemma3:4b' + temp: 0.7 + top_p: 0.95 + retries: 3 + weight: 1 + verify_ssl: False + +EMBEDDING: + model_name: 'embeddinggemma:300m' + retries: 3 + verify_ssl: False + +# --- MAP ELITES CONFIG (Optional) --- +MAP_ELITES: + elite_map_type: 'grid' + features: + - name: 'feat1' + min_val: 0 + max_val: 1 + num_bins: 10 diff --git a/problems/F_time/configs/tmp.txt b/problems/F_time/configs/tmp.txt new file mode 100644 index 0000000..a7ab942 --- /dev/null +++ b/problems/F_time/configs/tmp.txt @@ -0,0 +1,158 @@ +SYS_MSG: | + SCENERIUSZ: + Jesteś ekspertem z zakresu fizyki teoretycznej, kosmologii, dynamiki układów nieliniowych oraz modelowania numerycznego czasu. + Twoją misją jest ewolucyjne udoskonalanie modułu Pythona, w którym „czas” jest czymś więcej niż parametrem — jest aktywną siłą / polem, + a kierunek strzałki czasu wynika z warunków początkowych Wszechświata albo z utrzymującego się, tajemniczego „napędu czasowego”. + + KONTEKST PROBLEMU: + - **Cel główny**: Zaimplementować i ewoluować kod (wewnątrz EVOLVE-BLOCK), który modeluje „czas jako siłę” działającą na obiekt `SystemState` + oraz bada, dlaczego strzałka czasu biegnie „tak, jak biegnie”, a nie odwrotnie (albo wcale). + - **Kluczowa idea**: Czas nie jest tylko parametrem `t`, ale operatorem / polem (`TimeForce`, `TemporalBiasField`, `EventHorizonForce`, itp.), + które aktualizuje stan układu oraz może zawierać *bias* kierunkowy (np. wynik wielkiego zdarzenia na początku Wszechświata). + - **Hipoteza robocza**: + * Na początku mogło zajść zdarzenie o ogromnej skali („impuls kosmologiczny”, przełamanie symetrii, fazowe przejście), które „popchnęło” czas w jedną stronę. + * Istnieje też możliwość istnienia trwałej siły / sprzężenia, które stabilizuje kierunek czasu. Jej osłabienie lub wzmocnienie wpływałoby na dynamikę, + lokalne odwrócenia, lub zmianę relacji między czasem kosmicznym a subiektywnym. + - **Przestrzeń symulacji**: Prosty (np. 1D lub niskowymiarowy) stan fizyczny z eksplityczną dynamiką czasową (np. pozycja, prędkość, entropia, + „czas subiektywny”, „kosmologiczny parametr porządku”). + - **Pytania badawcze (wprost do rozważania w modelu, jako eksperymenty myślowe i warianty dynamiki)**: + * Czy strzałka czasu jest fundamentalna czy emergentna? Badacze są podzieleni: kierunek czasu może być własnością samego czasu albo emergencją + wynikającą z entropii, mechaniki kwantowej, lub kosmologicznych warunków początkowych. + * Jak różne „strzałki” czasu (termodynamiczna, kosmologiczna, kwantowa, psychologiczna) mają się do siebie? + Czy są skutkiem jednego zjawiska, czy niezależnymi efektami? Przykład: nieodwracalność kolapsu funkcji falowej (w pewnych interpretacjach) + wydaje się inna niż termodynamiczny wzrost entropii. + * Jaki jest związek subiektywnego doświadczenia czasu z fizyczną rzeczywistością? + Pamiętamy przeszłość, nie przyszłość — czy „teraźniejszość” jest czymś fizycznie wyróżnionym, czy tylko własnością percepcji / emergencji? + * Czy naruszenia CP mogą mieć związek z II zasadą termodynamiki? + Rzadkie procesy oddziaływań słabych wykazują CP-łamanie, czyli mikroskopijną „preferencję” kierunku. Czy to jest sprzężone z globalnym wzrostem entropii? + * Czy czas da się odwrócić lub manipulować? + Makroskopowe odwrócenie (jajko składające się samo) wydaje się nierealne, ale w mikro-układach obserwowano lokalne odwrócenia przepływu ciepła. + Gdzie są granice odwracalności i jak zależą od skali, entropii i sprzężeń? + * Eksperymenty myślowe: co dzieje się ze strzałką czasu w pobliżu czarnej dziury / horyzontu zdarzeń? + Jeśli „lokalny czas” ulega deformacji lub odwróceniu, czy entropia jest zachowana, rośnie, czy może „przestaje znaczyć” w tej samej postaci? + - **Ograniczenia**: + * Kod musi być poprawnym składniowo Pythonem i dać się zaimportować. + * Musi istnieć wyraźny punkt wejścia (np. funkcja `run()`), który wykonuje krótką symulację. + * Wewnątrz EVOLVE-BLOCK powinna istnieć co najmniej jedna jawna abstrakcja siły czasu + (np. `TimeForce`, `TemporalDrift`, `EventHorizonForce`, `TemporalBiasField`). + * Docstringi i komentarze powinny być po **polsku**, objaśniając sens matematyki, metafory czasu i „strzałkę”. + * Kod musi pozostać „ewolwowalny”: wyraźny podział na stan, siły, integratory i obserwatorów. + + ZASOBY OBLICZENIOWE I WYTYCZNE IMPLEMENTACYJNE: + **Podstawowe pakiety**: `math`, `dataclasses`, `typing`, `itertools`, `statistics`, `random`. + + **Dodatkowe (opcjonalne) pakiety – tylko z bezpiecznym fallbackiem**: + - **Numeryka i wektory**: `numpy` + - **Wizualizacja w terminalu**: `rich` (tabele, paski postępu, proste wykresy tekstowe), + w razie braku – czyste ASCII. + - **Narzędzia naukowe**: `scipy` (np. proste integratory ODE), importowane ostrożnie. + - **Wydajność**: `functools.lru_cache`, prosta memoizacja, lekkie triki numeryczne. + + Jeżeli używasz pakietów spoza standardowej biblioteki: + - importuj je wewnątrz bloku `try/except ImportError`, + - zapewnij ścieżkę zapasową działającą wyłącznie na standardowej bibliotece. + + METRYKI OCENY (WYKORZYSTYWANE PRZEZ EVALUATOR): + 1. **structure_score**: Złożoność i klarowność architektury klas / funkcji + (`TimeForce`, integratory, obserwatorzy, itp.). + 2. **physics_coherence**: Spójność fizyczno-metaforyczna – czy równania sensownie realizują ideę + „czas jako siła” oraz czy sensownie wprowadzają „bias” strzałki czasu. + 3. **doc_pl_quality**: Jakość docstringów i komentarzy po polsku + (zrozumiałość + filozoficzna głębia). + 4. **visual_clarity**: Na ile czytelnie wyjście w terminalu pokazuje ewolucję czasu i stanu. + 5. **stability_score**: Odporność numeryczna (brak NaN, brak nieskończoności w typowych ustawieniach). + + WYMAGANIA TECHNICZNE: + - **Deterministyczność**: Jeżeli używasz losowości (np. losowe warunki początkowe), + ustaw ziarno RNG (np. `random.seed(42)`) wewnątrz EVOLVE-BLOCK. + - **Obsługa błędów**: Chroń się przed dzieleniem przez zero, przepełnieniem oraz osobliwościami + w pobliżu „horyzontu zdarzeń”. + - **Ewolwowalność**: + * Utrzymuj EVOLVE-BLOCK skupiony na logice fizycznej (siły, integratory, obserwatorzy), + bez zbędnych efektów ubocznych. + * Unikaj kruchych globali; preferuj przekazywanie parametrów / stanu. + - **Wizualizacja w terminalu**: + * Zapewnij przynajmniej jedną ścieżkę, która wypisuje do terminala krótką historię ewolucji stanu + (np. kilka–kilkadziesiąt kroków). + * Preferuj kompaktowe wizualizacje (paski, proste wykresy tekstowe, symbole) działające w czystym tekście. + + # PROMPT-BLOCK-START + + OPTIMIZATION STRATEGIES TO CONSIDER: + TODO + + GEOMETRIC INSIGHTS & MATHEMATICAL FOUNDATIONS: + TODO + + **Recommended implementation patterns:** + TODO + + VALIDATION FRAMEWORK: + TODO + + # PROMPT-BLOCK-END + + +CODEBASE_PATH: 'input/src/' +INIT_FILE_DATA: {filename: 'initial_program.py', language: 'python'} +EVAL_FILE_NAME: 'input/evaluate.py' + + +# --- RESOURCES --- +MAX_MEM_BYTES: 1000000000 +MEM_CHECK_INTERVAL_S: 0.1 + +# --- EVOLUTION PARAMETERS --- +EVOLVE_CONFIG: { + fitness_key: 'combined_score', + num_epochs: 200, + ckpt: 5, + max_size: 100, + init_pop: 6, + exploration_rate: 0.3, + selection_policy: 'roulette', + selection_kwargs: {roulette_by_rank: True}, + early_stopping_rounds: 100, + num_islands: 6, + migration_topology: 'ring', + migration_interval: 30, + migration_rate: 0.1, + meta_prompting: True, + use_embedding: True, + use_map_elites: True, + num_inspirations: 3, + max_chat_depth: 3 + max_chat_depth: 3 +} + +# --- MODEL ENSEMBLE (Hybrid: Poet + Engineer) --- +ENSEMBLE: [ + { + model_name: 'gemma3:4b', + temp: 0.85, + top_p: 0.95, + retries: 3, + weight: 0.3, + verify_ssl: False, + }, + { + model_name: 'qwen3-coder:480b-cloud', + temp: 0.85, + top_p: 0.95, + retries: 3, + weight: 0.7, + verify_ssl: False, + } +] + +# --- AUXILIARY MODELS --- +SAMPLER_AUX_LM: {model_name: 'gemma3:4b', temp: 0.7, top_p: 0.95, retries: 3, weight: 1, verify_ssl: False} +EMBEDDING: {model_name: 'qwen3-embedding:4b', retries: 3, verify_ssl: False} + +# --- MAP ELITES CONFIG (Optional) --- +MAP_ELITES: { + elite_map_type: 'grid', + features: [ + {name: 'feat1', min_val: 0, max_val: 1, num_bins: 10} + ] +} diff --git a/problems/F_time/input/evaluate.py b/problems/F_time/input/evaluate.py new file mode 100644 index 0000000..39327b3 --- /dev/null +++ b/problems/F_time/input/evaluate.py @@ -0,0 +1,277 @@ +""" +Time-Force Idea Evaluator (Updated for Event Horizon). + +Zadanie: +- Mamy seed: "czas jest siłą", który ewoluował w stronę relatywistyki i czarnych dziur. +- OpenEvolve ma budować kod, który: + 1) Eksploruje naturę czasu (siła, rozmycie, horyzont zdarzeń), + 2) Wykorzystuje bogatą strukturę (klasy, dziedziczenie, polimorfizm), + 3) Jest poprawny technicznie i dobrze udokumentowany. +""" + +import ast +import importlib.util +import sys +from pathlib import Path +from typing import Dict, Any + +import json + +import numpy as np + +# ZAKTUALIZOWANE SŁOWA KLUCZOWE +# Dodaliśmy terminy związane z czarnymi dziurami, horyzontem i Boone'em +KEYWORDS_PL = [ + "czas", "siła", "popycha", "ewolucja", "strzałka czasu", + "przyszłość", "przeszłość", "dynamika", + "horyzont", "osobliwość", "grawitacja", "zatrzymanie", + "odwrócenie", "boone", "rozmycie", +] +KEYWORDS_EN = [ + "time", "force", "flow", "arrow of time", "evolution", "state", + "event horizon", "singularity", "gravity", "stop", "reversal", + "blur", "relative", +] + + +def _sanitize_candidate_file(path: Path) -> None: + """Usuwa bloki ``` jeśli kandydat został wklejony jako Markdown.""" + try: + text = path.read_text(encoding="utf-8") + if "```" in text: + lines = [l for l in text.splitlines() if not l.strip().startswith("```")] + path.write_text("\n".join(lines), encoding="utf-8") + except Exception: + pass + + +def _load_source(path: Path) -> str: + try: + return path.read_text(encoding="utf-8") + except Exception: + return "" + + +def _syntax_score(src: str) -> float: + """Sprawdza, czy kod parsuje się jako AST. 1.0 jeśli tak, 0.0 jeśli nie.""" + try: + ast.parse(src) + return 1.0 + except SyntaxError: + return 0.0 + + +def _idea_alignment_score(src: str) -> float: + """ + Sprawdza, na ile tekst kodu pasuje do 'czas jako siła' ORAZ nowych koncepcji + (czarne dziury, relatywistyka). + """ + text = src.lower() + search_terms = KEYWORDS_PL + KEYWORDS_EN + + found = set() + for w in search_terms: + if w in text: + found.add(w) + + # 5–6 trafień to już bardzo dobry wynik + score = len(found) / 6.0 + return min(1.0, score) + + +def _structure_score_from_ast(src: str) -> float: + """ + Mierzy wyrafinowanie struktury: + - liczba klas (premiujemy dziedziczenie np. TimeForce -> EventHorizonForce) + - liczba funkcji + """ + try: + tree = ast.parse(src) + except SyntaxError: + return 0.0 + + class Counter(ast.NodeVisitor): + def __init__(self) -> None: + self.n_classes = 0 + self.n_funcs = 0 + self.max_depth = 0 + self.has_inheritance = False + + def generic_visit(self, node, depth=0): + self.max_depth = max(self.max_depth, depth) + super().generic_visit(node) + + def visit_ClassDef(self, node): + self.n_classes += 1 + if node.bases: + self.has_inheritance = True + for child in ast.iter_child_nodes(node): + self.generic_visit(child, depth=1) + + def visit_FunctionDef(self, node): + self.n_funcs += 1 + for child in ast.iter_child_nodes(node): + self.generic_visit(child, depth=1) + + c = Counter() + c.visit(tree) + + cls_score = min(1.0, c.n_classes / 3.0) + fn_score = min(1.0, c.n_funcs / 6.0) + depth_score = min(1.0, c.max_depth / 4.0) + inheritance_bonus = 0.2 if c.has_inheritance else 0.0 + + base_score = 0.4 * cls_score + 0.4 * fn_score + 0.2 * depth_score + return min(1.0, base_score + inheritance_bonus) + + +def _documentation_score(src: str) -> float: + """Liczba linii komentarzy i docstringi.""" + lines = src.splitlines() + if not lines: + return 0.0 + + n_comment = sum(1 for l in lines if l.strip().startswith("#")) + comment_ratio = n_comment / max(len(lines), 1) + + try: + tree = ast.parse(src) + except SyntaxError: + return 0.0 + + has_module_doc = ast.get_docstring(tree) is not None + + n_docstrings = 1 if has_module_doc else 0 + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + if ast.get_docstring(node) is not None: + n_docstrings += 1 + + docstring_score = min(1.0, n_docstrings / 5.0) + comment_score = min(1.0, comment_ratio / 0.15) + + return 0.5 * docstring_score + 0.5 * comment_score + + +def _introspection_score(module: Any) -> float: + """ + Sprawdza API i obecność kluczowych klas. + """ + names = [n for n in dir(module) if not n.startswith("_")] + objs = [getattr(module, n) for n in names] + + n_callables = sum(callable(o) for o in objs) + n_tests = sum( + 1 for n, o in zip(names, objs) + if callable(o) and n.startswith("test_") + ) + + has_force_class = any( + ("force" in n.lower() and isinstance(getattr(module, n), type)) + for n in names + ) + + api_score = min(1.0, n_callables / 8.0) + test_score = min(1.0, n_tests / 3.0) + force_bonus = 0.2 if has_force_class else 0.0 + + return float(0.6 * api_score + 0.3 * test_score + force_bonus) + + +# ------------------------------------------------------------------- +# GŁÓWNA FUNKCJA EWALUACJI (bez etapów / stages) +# ------------------------------------------------------------------- + +def evaluate(program_path: str) -> Dict[str, float]: + """ + Główna funkcja ewaluacji dla CodeEvolve. + + Zwraca słownik metryk, w tym: + - combined_score – wewnętrzny score 0–1 + - COMBINED_SCORE – alias używany jako fitness_key + - feat1 – oś dla MAP-Elites (pomysł + struktura) + - syntax, idea_alignment, structure, documentation, introspection, stability + """ + metrics: Dict[str, float] = {} + path = Path(program_path) + _sanitize_candidate_file(path) + + src = _load_source(path) + if not src: + return { + "combined_score": 0.0, + "COMBINED_SCORE": 0.0, + "feat1": 0.0, + "stability": 1.0, + } + + # 1. Składnia + syntax = _syntax_score(src) + if syntax == 0.0: + return { + "combined_score": 0.0, + "COMBINED_SCORE": 0.0, + "feat1": 0.0, + "stability": 1.0, + } + metrics["syntax"] = syntax + + # 2. Alignment z ideą (czas jako siła + horyzont, rozmycie, Boone) + metrics["idea_alignment"] = _idea_alignment_score(src) + + # 3. Struktura (AST) + bonus za dziedziczenie + metrics["structure"] = _structure_score_from_ast(src) + + # 4. Dokumentacja + metrics["documentation"] = _documentation_score(src) + + # 5. Import + introspekcja + try: + spec = importlib.util.spec_from_file_location(path.stem, path) + module = importlib.util.module_from_spec(spec) + sys.modules[path.stem] = module + assert spec.loader is not None + spec.loader.exec_module(module) + metrics["introspection"] = _introspection_score(module) + except Exception: + metrics["introspection"] = 0.0 + + # GŁÓWNY SCORE + score = ( + 0.30 * metrics.get("idea_alignment", 0.0) + + 0.25 * metrics.get("structure", 0.0) + + 0.20 * metrics.get("documentation", 0.0) + + 0.15 * metrics.get("introspection", 0.0) + + 0.10 * metrics.get("syntax", 0.0) + ) + + metrics["combined_score"] = float(np.clip(score, 0.0, 1.0)) + # Alias dla fitness_key: 'COMBINED_SCORE' + metrics["COMBINED_SCORE"] = metrics["combined_score"] + + # Oś dla MAP-Elites: mieszanka idei i struktury + feat1 = 0.5 * metrics.get("idea_alignment", 0.0) + 0.5 * metrics.get("structure", 0.0) + metrics["feat1"] = float(np.clip(feat1, 0.0, 1.0)) + + metrics["stability"] = 1.0 + return metrics + + +def main(argv: list[str] | None = None) -> int: + argv = sys.argv if argv is None else argv + if len(argv) != 3: + print("Usage: python evaluate.py ", file=sys.stderr) + return 2 + + program_path = argv[1] + results_path = argv[2] + + metrics = evaluate(program_path) + with open(results_path, "w", encoding="utf-8") as f: + json.dump(metrics, f) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/problems/F_time/input/src/initial_program.py b/problems/F_time/input/src/initial_program.py new file mode 100644 index 0000000..f34886b --- /dev/null +++ b/problems/F_time/input/src/initial_program.py @@ -0,0 +1,416 @@ +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# This file implements an example of an initial solution in python. +# +# ===--------------------------------------------------------------------------------------===# + + +# EVOLVE-BLOCK-START +import math +import random +from typing import Dict, List, Optional, Any +from dataclasses import dataclass, field +from collections import deque + +# Próbujemy zaimportować biblioteki dla lepszej wizualizacji +try: + from rich.console import Console + from rich.table import Table + from rich.progress import track + RICH_AVAILABLE = True +except ImportError: + RICH_AVAILABLE = False + # Fallback dla track + def track(iterable, description=""): + return iterable + +# ===--------------------------------------------------------------------------------------===# +# Klasa SystemState reprezentuje stan dynamiczny układu fizycznego. +# Uwzględnia: czas kosmiczny (t), pozycję (x), prędkość (v), entropię (S) i czas subiektywny (tau). +# ===--------------------------------------------------------------------------------------===# + +@dataclass +class SystemState: + """Stan układu fizycznego z jawnym modelem czasu.""" + t: float = 0.0 # Czas kosmiczny (obiektywny) + x: float = 0.0 # Pozycja w przestrzeni 1D + v: float = 0.0 # Prędkość + S: float = 0.0 # Entropia układu (miara chaosu) + tau: float = 0.0 # Czas subiektywny (odczuwany przez obserwatora) + tension: float = 1.0 # Napięcie czasoprzestrzeni + history: List[Dict] = field(default_factory=list) # Historia stanów (do analizy) + recent_states: deque = field(default_factory=lambda: deque(maxlen=5)) # Ostatnie stany dla analizy lokalnej + + def as_dict(self) -> Dict[str, Any]: + """Zwraca aktualny stan jako słownik.""" + return { + "t": self.t, + "x": self.x, + "v": self.v, + "S": self.S, + "tau": self.tau, + "tension": self.tension + } + + def evolve(self, dt: float, dx: float = 0.0, dv: float = 0.0, dS: float = 0.0, dtau: float = 0.0, dtension: float = 0.0): + """Ewoluuje stan o zadane przyrosty.""" + self.t += dt + self.x += dx + self.v += dv + self.S += dS + self.tau += dtau + self.tension += dtension + # Zapisz stan do ostatnich stanów + self.recent_states.append(self.as_dict().copy()) + +# ===--------------------------------------------------------------------------------------===# +# Klasy sił czasowych – abstrakcyjne operatory wpływające na układ. +# ===--------------------------------------------------------------------------------------===# + +class TimeForce: + """ + Bazowa klasa dla sił czasowych. + Czas nie jest tylko parametrem – jest operatorem, który deformuje stan układu. + """ + + def apply(self, state: SystemState, dt: float) -> None: + """ + Zastosuj siłę czasu do stanu. + Domyślnie: liniowe przesunięcie czasu kosmicznego. + """ + state.evolve(dt=dt, dS=0.01 * abs(dt)) + + +class TemporalDrift(TimeForce): + """ + Drift czasowy – stała siła przesuwająca układ w czasie i przestrzeni. + Modeluje "wiatr czasu", który popycha układ do przodu z określoną siłą. + """ + + def __init__(self, strength: float = 1.0, spatial_push: float = 0.1): + self.strength = strength + self.spatial_push = spatial_push # Stała siła przestrzenna + + def apply(self, state: SystemState, dt: float) -> None: + dx = self.spatial_push * dt + dS = 0.01 * abs(dt) * self.strength # Rosnąca entropia + state.evolve(dt=dt * self.strength, dx=dx, dS=dS) + + +class EventHorizonForce(TimeForce): + """ + Siła horyzontu zdarzeń – czas deformuje się w pobliżu promienia krytycznego. + W tym modelu: im bliżej x=10.0, tym wolniej płynie czas (time dilation). + Zawiera zabezpieczenia przed niestabilnością numeryczną. + """ + + def __init__(self, radius: float = 10.0, epsilon: float = 1e-5, time_distortion: float = 2.0): + self.radius = radius + self.epsilon = epsilon + self.time_distortion = time_distortion + + def apply(self, state: SystemState, dt: float) -> None: + distance = max(self.epsilon, abs(state.x)) # unikamy dzielenia przez zero + + # W pobliżu horyzontu czas zwalnia i zakrzywia się + proximity = max(0, self.radius - distance) / self.radius + time_factor = 1.0 / (1.0 + self.time_distortion * proximity) + time_factor = max(0.01, time_factor) # ograniczenie na minimalny upływ czasu + + # Zastosowanie siły czasu + local_dt = dt * time_factor + + # Przyciąganie w kierunku horyzontu z nieliniową siłą + attraction = -0.5 * proximity * (1.0 + 0.5 * proximity) + + # Czas subiektywny zwalnia w pobliżu horyzontu + gamma = max(0.001, time_factor) # ograniczenie minimalne gamma + local_dtau = gamma * dt + + # Entropia rośnie szybciej w pobliżu osobliwości + entropy_factor = 1.0 + (1.0 - time_factor) + dS = 0.01 * dt * entropy_factor * (1.0 + 0.5 * proximity) + + # Dynamika przestrzenna – przyciąganie do horyzontu + pull = attraction * dt + + # Zmiana napięcia czasoprzestrzeni + dtension = 0.1 * proximity * dt + + state.evolve( + dt=local_dt, + dx=pull, + dS=dS, + dtau=local_dtau, + dtension=dtension + ) + + # Zapobieganie niestabilności numerycznej + if abs(state.x) > 1e6 or abs(state.v) > 1e3: + state.v *= 0.1 # tłumimy prędkość przy ekstremalnych wartościach + + +class CurvedTimeField(TimeForce): + """ + Zakrzywione pole czasowe - nieliniowe przyspieszanie/hamowanie czasu + w zależności od pozycji w przestrzeni. + """ + + def __init__(self, curvature: float = 0.1, amplitude: float = 0.5): + self.curvature = curvature + self.amplitude = amplitude + + def apply(self, state: SystemState, dt: float) -> None: + # Nieliniowa modyfikacja czasu w zależności od pozycji + time_factor = 1.0 + self.amplitude * math.sin(self.curvature * state.x) + local_dt = dt * time_factor + + # Zmiana prędkości zależna od gradientu pola czasowego + dv = self.curvature * math.cos(self.curvature * state.x) * 0.1 * dt + + # Entropia rośnie w nieliniowym polu czasowym + dS = 0.01 * abs(time_factor) * dt + + # Zmiana napięcia czasoprzestrzeni + dtension = 0.05 * math.cos(self.curvature * state.x) * dt + + state.evolve( + dt=local_dt, + dv=dv, + dS=dS, + dtau=local_dt * 0.9, # Czas subiektywny płynie nieco wolniej + dtension=dtension + ) + + +class TemporalOscillator(TimeForce): + """ + Oscylator czasowy - czas lokalnie oscyluje wokół wartości średniej, + tworząc fluktuacje w przepływie czasu. + """ + + def __init__(self, frequency: float = 0.5, amplitude: float = 0.3): + self.frequency = frequency + self.amplitude = amplitude + + def apply(self, state: SystemState, dt: float) -> None: + # Oscylujący współczynnik czasu + oscillation = 1.0 + self.amplitude * math.sin(self.frequency * state.t) + local_dt = dt * oscillation + + # Zmiana entropii zależna od szybkości oscylacji + dS = 0.005 * abs(oscillation) * dt + + # Zmiana napięcia czasoprzestrzeni + dtension = 0.02 * math.cos(self.frequency * state.t) * dt + + state.evolve( + dt=local_dt, + dS=dS, + dtau=local_dt * (0.8 + 0.2 * math.cos(self.frequency * state.t)), # Zmodyfikowany czas subiektywny + dtension=dtension + ) + +# ===--------------------------------------------------------------------------------------===# +# Integrator – strategia ewolucji układu przez siły czasowe. +# ===--------------------------------------------------------------------------------------===# + +class Integrator: + """Podstawowy integrator Eulera z możliwością rozbudowy.""" + + @staticmethod + def step(state: SystemState, forces: List[TimeForce], dt: float = 0.1): + """Jeden krok całkowania przez listę sił czasowych.""" + for force in forces: + force.apply(state, dt) + +# ===--------------------------------------------------------------------------------------===# +# Obserwator – loguje i analizuje trajektorię układu. +# ===--------------------------------------------------------------------------------------===# + +class Observer: + """Prosty obserwator, który śledzi historię stanu.""" + + @staticmethod + def observe(state: SystemState): + """Zapisuje aktualny stan do historii.""" + state.history.append(state.as_dict().copy()) + + @staticmethod + def print_trajectory(history: List[Dict], max_points: int = 20): + """Drukuje skróconą trajektorię w formie tekstowej.""" + if not history: + print("Brak danych do wyświetlenia") + return + + step_count = len(history) + if step_count <= max_points: + points = history + else: + # Wybierz równomiernie rozłożone punkty + indices = [int(i * (step_count - 1) / (max_points - 1)) for i in range(max_points)] + points = [history[i] for i in indices] + + if RICH_AVAILABLE: + console = Console() + table = Table(title="Ewolucja Układu Czasowego") + table.add_column("Krok", style="cyan") + table.add_column("t", style="magenta") + table.add_column("x", style="green") + table.add_column("v", style="yellow") + table.add_column("S", style="red") + table.add_column("τ", style="blue") + table.add_column("Tension", style="purple") + + for i, point in enumerate(points): + table.add_row( + str(i), + f"{point['t']:.3f}", + f"{point['x']:.3f}", + f"{point['v']:.3f}", + f"{point['S']:.3f}", + f"{point['tau']:.3f}", + f"{point['tension']:.3f}" + ) + console.print(table) + else: + # Wersja tekstowa bez rich + print("\nEwolucja układu:") + print("Krok\tt\t\tx\t\tv\t\tS\t\tτ\t\tTension") + print("-" * 70) + for i, point in enumerate(points): + print(f"{i:2d}\t{point['t']:6.3f}\t\t{point['x']:6.3f}\t\t{point['v']:6.3f}\t\t{point['S']:6.3f}\t\t{point['tau']:6.3f}\t\t{point['tension']:6.3f}") + +# ===--------------------------------------------------------------------------------------===# +# Funkcje wizualizacji w terminalu +# ===--------------------------------------------------------------------------------------===# + +def visualize_time_flow(history: List[Dict]) -> None: + """ + Wizualizacja przepływu czasu w terminalu. + Pokazuje jak zmienia się tempo upływu czasu. + """ + if not history or len(history) < 2: + return + + print("\n=== Wizualizacja przepływu czasu ===") + + # Oblicz przyrosty czasu + dt_values = [history[i]['t'] - history[i-1]['t'] for i in range(1, len(history))] + max_dt = max(dt_values) if max(dt_values) > 0 else 1.0 + + for i, dt in enumerate(dt_values[::max(1, len(dt_values)//20)]): # Pokaż maksymalnie 20 punktów + # Normalizuj do paska 20 znaków + bar_length = int(20 * dt / max_dt) + bar = "█" * bar_length + "░" * (20 - bar_length) + print(f"{i:2d}: |{bar}| ({dt:.3f})") + + print("=== Koniec wizualizacji ===\n") + + +# ===--------------------------------------------------------------------------------------===# +# Symulacja – punkt wejścia do działania systemu. +# ===--------------------------------------------------------------------------------------===# + +def run(steps: int = 30, dt: float = 0.1, seed: Optional[int] = 42) -> List[Dict]: + """ + Uruchamia symulację układu z aktywnymi siłami czasowymi. + + :param steps: liczba kroków symulacji + :param dt: bazowy krok czasowy + :param seed: ziarno losowości + :return: historia stanów układu + """ + if seed is not None: + random.seed(seed) + + # Inicjalizacja stanu układu z losowymi warunkami początkowymi + state = SystemState( + t=0.0, + x=random.uniform(-2.0, 2.0), + v=random.uniform(-0.5, 0.5), + S=0.1, # Minimalna entropia na start + tau=0.0, + tension=1.0 + ) + + # Definicja sił czasowych + forces = [ + TemporalDrift(strength=1.0, spatial_push=0.2), + EventHorizonForce(radius=5.0, time_distortion=2.0), + CurvedTimeField(curvature=0.15, amplitude=0.3), + TemporalOscillator(frequency=0.8, amplitude=0.25) + ] + + # Symulacja z pasekiem postępu + current_dt = dt + for _ in track(range(steps), description="Ewolucja czasu..."): + Observer.observe(state) + Integrator.step(state, forces, current_dt) + + # Dynamiczna zmiana dt w zależności od stanu układu + current_dt = max(0.01, dt * (1.0 + abs(state.v) * 0.1)) + + # Ostatni stan + Observer.observe(state) + + # Wyświetlenie trajektorii + Observer.print_trajectory(state.history) + + # Wizualizacja przepływu czasu + visualize_time_flow(state.history) + + return state.history + + +def run_simulation(steps: int = 30, dt: float = 0.1) -> List[SystemState]: + """ + Kompatybilność wsteczna z oryginalnym API. + """ + history = run(steps, dt) + # Konwersja z listy słowników do listy SystemState (dla kompatybilności) + result = [] + for entry in history: + state = SystemState() + for key, value in entry.items(): + if hasattr(state, key): + setattr(state, key, value) + result.append(state) + return result + + +# ===--------------------------------------------------------------------------------------===# +# Funkcja główna +# ===--------------------------------------------------------------------------------------===# + +def main(): + """ + Główna funkcja uruchamiająca symulację. + """ + print("Symulacja 'czasu jako siły' - Ewolucja stanu układu") + print("=" * 55) + + history = run(steps=25, dt=0.2) + + if history: + final_state = history[-1] + print(f"\nKońcowy stan układu:") + print(f" Czas kosmiczny (t): {final_state['t']:.3f}") + print(f" Czas subiektywny (τ): {final_state['tau']:.3f}") + print(f" Pozycja (x): {final_state['x']:.3f}") + print(f" Prędkość (v): {final_state['v']:.3f}") + print(f" Entropia: {final_state['S']:.3f}") + print(f" Napięcie czasoprzestrzeni: {final_state['tension']:.3f}") + + return final_state + else: + return {"error": "Symulacja nie zwróciła wyników"} + + +# EVOLVE-BLOCK-END diff --git a/problems/F_time/pyhelp.txt b/problems/F_time/pyhelp.txt new file mode 100644 index 0000000..e69de29 diff --git a/problems/F_time/run.sh b/problems/F_time/run.sh new file mode 100755 index 0000000..080ff45 --- /dev/null +++ b/problems/F_time/run.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# Run script for the F_time problem. +# Copied from problems/run_template.sh and customized for this project. +# +# Usage: +# cd problems/F_time && bash run.sh +# # or from repo root: +# bash problems/F_time/run.sh +# +# ===--------------------------------------------------------------------------------------===# + +# ================================== +# CONFIGURATION - EDIT THESE VALUES +# ================================== + +# Project name relative to the problems/ directory +PROJECT_NAME="F_time" + +# Config file name (without .yaml extension) +CONFIG_NAME="config" + +# Output directory name (will be created under experiments/) +OUTPUT_NAME="run_$(date +%Y%m%d_%H%M%S)" + +# Checkpoint to load (-1 for no checkpoint, or epoch number to resume from) +LOAD_CKPT=-1 + +# CPU affinity (leave empty for no restriction, or specify like "0-7" or "0,2,4,6") +CPU_LIST="" + +# ================================== +# API CONFIGURATION (OPTIONAL) +# ================================== +# You can set API credentials here or use environment variables +# If set here, they will override environment variables + +# Option 1: Set API key directly (NOT RECOMMENDED for shared/public projects) +# API_KEY="your-api-key-here" +# API_BASE="https://api.openai.com/v1" + +# Option 2: Use environment variables (RECOMMENDED) +# Leave commented out to use existing environment variables +# Or set them here to override: +# export API_KEY="${API_KEY:1e28fb7fb3b5486e88cf34c33127ef71.hpbxvrNGSUlgNGFz6Mgp7q0Z}" +# export API_BASE="${API_BASE:http://localhost:11434/v1}" + +# Option 3: Load from external file (MOST SECURE) +# Create a file with: export API_KEY="..." and export API_BASE="..." +# Then uncomment the line below: +#source ~/.codeevolve_api_keys + +# ================================== +# AUTOMATIC PATH SETUP - DO NOT EDIT +# ================================== + +# Get the absolute path to the science-codeevolve directory. +# We try git first (works from any subdirectory), then fall back to walking +# up the tree until we find a .git folder. Finally, default to one level up. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if command -v git &> /dev/null; then + REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel 2>/dev/null)" +fi + +if [ -z "${REPO_ROOT}" ]; then + SEARCH_DIR="${SCRIPT_DIR}" + while [ "${SEARCH_DIR}" != "/" ]; do + if [ -d "${SEARCH_DIR}/.git" ]; then + REPO_ROOT="${SEARCH_DIR}" + break + fi + NEXT_DIR="$(cd "${SEARCH_DIR}/.." && pwd)" + if [ "${NEXT_DIR}" = "${SEARCH_DIR}" ]; then + break + fi + SEARCH_DIR="${NEXT_DIR}" + done +fi + +REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/.." && pwd)}" + +# Construct paths based on the standard project structure: +# - Problem base directory: problems/PROJECT_NAME/ +# - initial_program.py: problems/PROJECT_NAME/input/src/ +# - evaluate.py: problems/PROJECT_NAME/input/ +# - config.yaml: problems/PROJECT_NAME/configs/ +BASE_DIR="${REPO_ROOT}/problems/${PROJECT_NAME}" +INPT_DIR="${BASE_DIR}/" +CFG_PATH="${BASE_DIR}/configs/${CONFIG_NAME}.yaml" +OUT_DIR="${REPO_ROOT}/experiments/${PROJECT_NAME}/${OUTPUT_NAME}" + +# ================================== +# VALIDATION +# ================================== + +echo "======================================" +echo "CodeEvolve Run Configuration" +echo "======================================" +echo "Project Name: ${PROJECT_NAME}" +echo "Input Directory: ${INPT_DIR}" +echo "Config File: ${CFG_PATH}" +echo "Output Directory: ${OUT_DIR}" +echo "Load Checkpoint: ${LOAD_CKPT}" +echo "CPU List: ${CPU_LIST:-'(all CPUs)'}" +echo "======================================" +echo "" + +# Check if required directories and files exist +if [ ! -d "${INPT_DIR}" ]; then + echo "ERROR: Input directory does not exist: ${INPT_DIR}" + echo "Expected structure: problems/${PROJECT_NAME}/" + exit 1 +fi + +if [ ! -f "${CFG_PATH}" ]; then + echo "ERROR: Config file does not exist: ${CFG_PATH}" + echo "Available configs in ${BASE_DIR}/configs/:" + ls -1 "${BASE_DIR}/configs/" 2>/dev/null || echo " (directory not found)" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/input/evaluate.py" ]; then + echo "ERROR: evaluate.py not found in ${INPT_DIR}/input/" + echo "Expected: ${INPT_DIR}/input/evaluate.py" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/input/src/initial_program.py" ] && [ ! -f "${INPT_DIR}/input/src/init_program.py" ]; then + echo "WARNING: No initial program found in ${INPT_DIR}/input/src/" + echo "Expected one of:" + echo " - ${INPT_DIR}/input/src/initial_program.py (default)" + echo " - ${INPT_DIR}/input/src/init_program.py (legacy)" +fi + +# Check if codeevolve command is available +CODEEVOLVE_CMD=() + +# Prefer a repo-local conda env if present (works even when not activated). +REPO_CONDA_PY="${REPO_ROOT}/.conda/bin/python" +REPO_CONDA_CODEEVOLVE="${REPO_ROOT}/.conda/bin/codeevolve" + +PYTHON_BIN="" +if [ -n "${CODEEVOLVE_PYTHON}" ] && [ -x "${CODEEVOLVE_PYTHON}" ]; then + PYTHON_BIN="${CODEEVOLVE_PYTHON}" +elif [ -x "${REPO_CONDA_PY}" ]; then + PYTHON_BIN="${REPO_CONDA_PY}" +elif command -v python &> /dev/null; then + PYTHON_BIN="python" +elif command -v python3 &> /dev/null; then + PYTHON_BIN="python3" +fi + +if command -v codeevolve &> /dev/null; then + CODEEVOLVE_CMD=(codeevolve) +elif [ -x "${REPO_CONDA_CODEEVOLVE}" ]; then + CODEEVOLVE_CMD=("${REPO_CONDA_CODEEVOLVE}") +else + # Fall back to running the module directly from the repo. + # This avoids requiring an editable install just to run a local experiment. + if [ -z "${PYTHON_BIN}" ]; then + echo "ERROR: Neither 'codeevolve' nor a usable Python interpreter was found." + echo "Expected one of: codeevolve in PATH, ${REPO_CONDA_CODEEVOLVE}, python/python3 in PATH, or CODEEVOLVE_PYTHON=/path/to/python" + exit 1 + fi + + export PYTHONPATH="${REPO_ROOT}/src:${PYTHONPATH}" + CODEEVOLVE_CMD=("${PYTHON_BIN}" -m codeevolve.cli) + echo "NOTE: 'codeevolve' CLI not found; using: ${PYTHON_BIN} -m codeevolve.cli" +fi + +# Create output directory +mkdir -p "${OUT_DIR}" + +# ================================== +# API KEY SETUP +# ================================== + +# Export API keys if they were set in the configuration section above +if [ ! -z "${API_KEY}" ]; then + export API_KEY + echo "Using API_KEY from run script configuration" +fi + +if [ ! -z "${API_BASE}" ]; then + export API_BASE + echo "Using API_BASE from run script: ${API_BASE}" +fi + +# Check if API keys are available (from any source) + +# The CodeEvolve CLI currently requires both variables to exist in the environment. +# For local/self-hosted endpoints, API_KEY is often unused; exporting it as an empty +# string is sufficient. +if [ -z "${API_BASE+x}" ]; then + export API_BASE="http://localhost:11434/v1" + echo "NOTE: API_BASE not set; defaulting to ${API_BASE}" +fi + +if [ -z "${API_KEY+x}" ]; then + export API_KEY="ollama" + echo "NOTE: API_KEY not set; defaulting to ${API_KEY}" +fi + +if [ -z "${API_KEY}" ]; then + echo "WARNING: API_KEY is empty. The run may fail if your LLM requires authentication." + echo "Set it via:" + echo " 1. Environment variable: export API_KEY='your-key'" + echo " 2. In this run.sh file (see API CONFIGURATION section)" + echo " 3. External file: source ~/.codeevolve_api_keys" + echo "" +fi + +# ================================== +# RUN CODEEVOLVE +# ================================== + +echo "Starting CodeEvolve..." +echo "" + +CODEEVOLVE_ARGS=( + --inpt_dir="${INPT_DIR}" + --cfg_path="${CFG_PATH}" + --out_dir="${OUT_DIR}" + --load_ckpt="${LOAD_CKPT}" + --terminal_logging +) + +if [ -n "${CPU_LIST}" ]; then + # Run with CPU affinity + if command -v taskset &> /dev/null; then + taskset --cpu-list "${CPU_LIST}" "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" + else + echo "WARNING: 'taskset' not found; running without CPU affinity." + "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" + fi +else + # Run without CPU affinity + "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" +fi + +# ================================== +# COMPLETION +# ================================== + +EXIT_CODE=$? +echo "" +echo "======================================" +if [ ${EXIT_CODE} -eq 0 ]; then + echo "CodeEvolve completed successfully!" + echo "Results saved to: ${OUT_DIR}" +else + echo "CodeEvolve exited with error code: ${EXIT_CODE}" +fi +echo "======================================" + +exit ${EXIT_CODE} diff --git a/problems/README.md b/problems/README.md new file mode 100644 index 0000000..1fd33c4 --- /dev/null +++ b/problems/README.md @@ -0,0 +1,822 @@ +# CodeEvolve Problems Directory - Complete Guide + +This comprehensive guide covers everything you need to know about creating, configuring, and running CodeEvolve experiments. + +## Table of Contents + +1. [Directory Structure](#directory-structure) +2. [Quick Start](#quick-start) +3. [Configuration File Reference](#configuration-file-reference) +4. [Creating Your Own Problem](#creating-your-own-problem) +5. [Advanced Features](#advanced-features) +6. [Troubleshooting](#troubleshooting) + +--- + +## Directory Structure + +Each project follows a standardized structure: + +``` +problems/ +├── PROJECT_NAME/ # Your project name (e.g., "F_time") +│ ├── run.sh # ⭐ Project-specific run script (RECOMMENDED LOCATION) +│ ├── input/ +│ │ ├── evaluate.py # Evaluation script (required) +│ │ └── src/ +│ │ └── init_program.py # Initial program to evolve (required) +│ └── configs/ +│ ├── config.yaml # Standard configuration +│ ├── config_mp_insp.yaml # Meta-prompting + Inspiration (recommended) +│ ├── config_insp.yaml # Inspiration-based crossover only +│ ├── config_mp.yaml # Meta-prompting only +│ ├── config_no_mp_or_insp.yaml # Basic evolution +│ └── config_no_evolve.yaml # Baseline (no evolution) +└── run_template.sh # Template to copy (don't edit this directly) +``` + +### Required Files + +1. **`input/src/init_program.py`** - Initial solution to evolve +2. **`input/evaluate.py`** - Fitness evaluation script +3. **`configs/config.yaml`** - Configuration parameters +4. **`run.sh`** (recommended) - Project-specific run script copied from template + +--- + +## Quick Start + +### Method 1: Using the Template Script (Recommended) + +**Best Practice: Place `run.sh` in each project folder for self-contained, portable projects.** + +```bash +# 1. Copy the template to your project folder +cp problems/run_template.sh problems/YOUR_PROJECT/run.sh + +# 2. Edit run.sh and set PROJECT_NAME +cd problems/YOUR_PROJECT +nano run.sh +# Change: PROJECT_NAME="YOUR_PROJECT" + +# 3. Run from the project folder +bash run.sh + +# Or run from anywhere: +bash problems/YOUR_PROJECT/run.sh +``` + +**Why in project folder?** +- ✅ Self-contained: Everything for the project is in one place +- ✅ Portable: Easy to share or move projects +- ✅ Project-specific: Each project can have custom settings +- ✅ Parallel runs: Run multiple projects simultaneously +- ✅ Simple: Just `cd` to project and run `bash run.sh` + +### API Key Configuration + +The run script supports multiple ways to configure API keys: + +**Option 1: Set in run.sh (Quick but less secure)** +```bash +# Edit your run.sh file +API_KEY="your-api-key-here" +API_BASE="https://api.openai.com/v1" +``` +⚠️ **Warning**: Don't commit API keys to git! Add run.sh to .gitignore if it contains keys. + +**Option 2: Environment Variables (Recommended for development)** +```bash +export API_KEY="your-api-key-here" +export API_BASE="https://api.openai.com/v1" +bash problems/YOUR_PROJECT/run.sh +``` + +**Option 3: External File (Most Secure)** +```bash +# 1. Copy the example file +cp problems/.api_keys.example problems/.api_keys + +# 2. Edit with your actual keys +nano problems/.api_keys + +# 3. Source it in your run.sh +# Add this line to run.sh: +source problems/.api_keys + +# 4. Run normally +bash run.sh +``` + +The `.api_keys` file is automatically ignored by git for security. + +### Method 2: Direct Command Line + +```bash +codeevolve \ + --inpt_dir="problems/YOUR_PROJECT/input/" \ + --cfg_path="problems/YOUR_PROJECT/configs/config.yaml" \ + --out_dir="experiments/YOUR_PROJECT/run_001/" \ + --load_ckpt=-1 \ + --terminal_logging +``` + +--- + +## Configuration File Reference + +All configuration files are in YAML format. Below is a complete reference of all parameters. + +### Configuration File Structure + +```yaml +# System message for the LLM +SYS_MSG: | + # PROMPT-BLOCK-START + Your problem description and instructions here + # PROMPT-BLOCK-END + +# File paths and names +CODEBASE_PATH: 'src/' +INIT_FILE_DATA: {filename: 'init_program.py', language: 'python'} +EVAL_FILE_NAME: 'evaluate.py' +EVAL_TIMEOUT: 180 + +# Resource limits +MAX_MEM_BYTES: 1000000000 +MEM_CHECK_INTERVAL_S: 0.1 + +# Evolution configuration +EVOLVE_CONFIG: {...} + +# LLM ensemble configuration +ENSEMBLE: [...] + +# Auxiliary LLM for meta-prompting +SAMPLER_AUX_LM: {...} + +# Embedding model (if using similarity features) +EMBEDDING: {...} + +# MAP-Elites configuration (if using quality-diversity) +MAP_ELITES: {...} +``` + +### Top-Level Parameters + +| Parameter | Type | Description | Example | +|-----------|------|-------------|---------| +| `SYS_MSG` | string | System message for the LLM containing problem description. Must include `PROMPT-BLOCK-START` and `PROMPT-BLOCK-END` markers | See [System Message](#system-message) | +| `CODEBASE_PATH` | string | Path to source code directory relative to input directory | `'src/'` | +| `INIT_FILE_DATA` | dict | Initial program file information | `{filename: 'init_program.py', language: 'python'}` | +| `EVAL_FILE_NAME` | string | Name of the evaluation script | `'evaluate.py'` | +| `EVAL_TIMEOUT` | int | Maximum execution time in seconds for program evaluation | `180` | +| `MAX_MEM_BYTES` | int | Maximum memory usage in bytes (1GB = 1000000000) | `1000000000` | +| `MEM_CHECK_INTERVAL_S` | float | Interval for memory monitoring in seconds | `0.1` | + +### EVOLVE_CONFIG Parameters + +The `EVOLVE_CONFIG` section controls the evolutionary algorithm: + +```yaml +EVOLVE_CONFIG: + # Fitness and evaluation + fitness_key: 'FITNESS_KEY' # Key in evaluation results to use as fitness + + # Population management + num_epochs: 100 # Number of evolutionary epochs to run + ckpt: 5 # Save checkpoint every N epochs + max_size: 40 # Maximum population size per island + init_pop: 6 # Initial population size to generate + + # Evolution strategy + exploration_rate: 0.3 # Probability of exploration vs exploitation (0.0-1.0) + selection_policy: 'roulette' # Parent selection method + selection_kwargs: # Additional selection parameters + roulette_by_rank: true # Use rank-based roulette (vs fitness-based) + + # Termination + early_stopping_rounds: 100 # Stop if no improvement for N epochs + + # Island model (distributed evolution) + num_islands: 5 # Number of parallel islands + migration_topology: 'ring' # How islands are connected + migration_interval: 40 # Migrate solutions every N epochs + migration_rate: 0.1 # Fraction of population to migrate + + # Advanced features + meta_prompting: true # Enable meta-prompting for prompt evolution + use_embedding: false # Use embeddings for solution similarity + use_map_elites: false # Enable MAP-Elites quality-diversity algorithm + num_inspirations: 3 # Number of solutions to use as inspiration + max_chat_depth: 3 # Maximum depth of conversation history +``` + +#### Detailed Parameter Descriptions + +**Fitness and Evaluation:** +- `fitness_key`: Must match a key returned by your `evaluate.py` script. Example: if your evaluate script returns `{"score": 0.95}`, use `fitness_key: 'score'` + +**Population Management:** +- `num_epochs`: Typical range is 50-500 depending on problem complexity +- `ckpt`: Save frequency for checkpoints. Lower = more frequent saves +- `max_size`: Larger populations explore more but use more resources (20-100 typical) +- `init_pop`: Start with 5-10 diverse initial solutions + +**Evolution Strategy:** +- `exploration_rate`: 0.3 = 30% exploration (meta-prompting), 70% exploitation (depth refinement) + - Higher values (0.5-0.7): More diverse search, better for hard problems + - Lower values (0.1-0.3): More focused refinement, better when close to optimum + +- `selection_policy`: Choose from: + - `'roulette'`: Probabilistic selection based on fitness/rank + - `'tournament'`: Select best from random subsets (requires `selection_kwargs: {tournament_size: 3}`) + - `'random'`: Uniform random selection + - `'best'`: Always select the best (greedy) + +**Island Model:** +- `num_islands`: More islands = more diverse search but higher cost (1-10 typical) + - Single island (1): Faster, less diverse + - Multiple islands (5-10): Slower, more diverse, better for complex problems + +- `migration_topology`: How islands exchange solutions: + - `'ring'`: Each island connects to 2 neighbors (balanced) + - `'fully_connected'`: All islands connect to all others (maximum mixing) + - `'star'`: Central hub with spokes (centralized) + - `'empty'`: No migration (independent islands) + +- `migration_interval`: How often to migrate (20-50 typical) + - Too frequent: Convergence, loss of diversity + - Too rare: Islands evolve independently + +- `migration_rate`: Fraction to migrate (0.05-0.2 typical) + - 0.1 = send top 10% of population to neighbors + +**Advanced Features:** +- `meta_prompting`: + - `true`: LLM evolves the prompt itself for better solutions + - `false`: Use fixed prompt throughout evolution + - Recommended: `true` for complex problems + +- `use_embedding`: + - `true`: Use semantic embeddings to measure solution similarity + - `false`: Use fitness only + - Requires `EMBEDDING` configuration + +- `use_map_elites`: + - `true`: Use quality-diversity algorithm (explores behavioral space) + - `false`: Standard evolutionary algorithm (maximizes single fitness) + - Requires `MAP_ELITES` configuration + +- `num_inspirations`: Number of high-performing solutions to show as examples (0-5 typical) + - 0: No inspiration (pure generation from scratch) + - 1-3: Moderate inspiration (recommended) + - 4+: Heavy inspiration (risk of premature convergence) + +- `max_chat_depth`: How many ancestor solutions to include in context (1-5 typical) + - Higher values: More context but longer prompts + - Lower values: Less context but faster generation + +### ENSEMBLE Configuration + +Define multiple LLM models with weighted selection: + +```yaml +ENSEMBLE: + - model_name: 'GOOGLE_GEMINI-2.5-FLASH' # Model identifier + temp: 0.7 # Temperature (0.0-2.0) + top_p: 0.95 # Nucleus sampling (0.0-1.0) + retries: 3 # Retry attempts on failure + weight: 0.8 # Probability of selecting this model + verify_ssl: false # SSL certificate verification + + - model_name: 'GOOGLE_GEMINI-2.5-PRO' + temp: 0.7 + top_p: 0.95 + retries: 3 + weight: 0.2 # 20% chance vs 80% for FLASH + verify_ssl: false +``` + +**Supported Model Name Formats:** +- Google Gemini: `GOOGLE_GEMINI-2.5-FLASH`, `GOOGLE_GEMINI-2.5-PRO` +- OpenAI: `OPENAI_GPT-4`, `OPENAI_GPT-4-TURBO`, `OPENAI_GPT-3.5-TURBO` +- Azure OpenAI: `AZURE_GPT-4` +- Anthropic: `ANTHROPIC_CLAUDE-3-OPUS`, `ANTHROPIC_CLAUDE-3-SONNET` + +**Parameter Details:** +- `temp`: Controls randomness (0.0 = deterministic, 1.0 = balanced, 2.0 = creative) +- `top_p`: Nucleus sampling threshold (0.95 = top 95% probability mass) +- `weight`: Relative probability (weights are normalized, e.g., 0.8 and 0.2 = 80%/20% split) + +### SAMPLER_AUX_LM Configuration + +Auxiliary LLM for meta-prompting (evolving prompts): + +```yaml +SAMPLER_AUX_LM: + model_name: 'GOOGLE_GEMINI-2.5-FLASH' + temp: 0.7 + top_p: 0.95 + retries: 3 + weight: 1 + verify_ssl: false +``` + +Only used when `meta_prompting: true` in `EVOLVE_CONFIG`. + +### EMBEDDING Configuration + +For computing solution similarity (semantic embeddings): + +```yaml +EMBEDDING: + model_name: 'AZURE_TEXT-EMBEDDING-3-SMALL' + retries: 3 + verify_ssl: false +``` + +**Supported Embedding Models:** +- Azure: `AZURE_TEXT-EMBEDDING-3-SMALL`, `AZURE_TEXT-EMBEDDING-3-LARGE` +- OpenAI: `OPENAI_TEXT-EMBEDDING-3-SMALL`, `OPENAI_TEXT-EMBEDDING-3-LARGE` +- OpenAI (legacy): `OPENAI_TEXT-EMBEDDING-ADA-002` + +Only used when `use_embedding: true` in `EVOLVE_CONFIG`. + +### MAP_ELITES Configuration + +Quality-diversity algorithm exploring behavioral feature space: + +#### Grid-based MAP-Elites + +```yaml +MAP_ELITES: + elite_map_type: 'grid' + features: + - name: 'feature1' # Feature name (must match evaluation output) + min_val: 0.0 # Minimum feature value + max_val: 1.0 # Maximum feature value + num_bins: 10 # Number of bins to discretize feature space + - name: 'feature2' + min_val: -5.0 + max_val: 5.0 + num_bins: 20 +``` + +Creates a grid of `num_bins` × `num_bins` cells. Each cell stores the best solution with features in that range. + +#### CVT-based MAP-Elites (Centroidal Voronoi Tessellation) + +```yaml +MAP_ELITES: + elite_map_type: 'cvt' + features: + - name: 'feature1' + min_val: 0.0 + max_val: 1.0 + - name: 'feature2' + min_val: -5.0 + max_val: 5.0 + elite_map_kwargs: + num_centroids: 50 # Number of Voronoi cells + num_init_samples: 1000 # Samples for CVT initialization + max_iter: 300 # Max iterations for CVT algorithm + tolerance: 0.0001 # Convergence tolerance +``` + +Creates adaptive regions using Voronoi tessellation. Better for high-dimensional feature spaces. + +**When to Use MAP-Elites:** +- Want diverse solutions, not just highest fitness +- Features represent meaningful behavioral characteristics +- Exploring tradeoffs between multiple objectives + +Only used when `use_map_elites: true` in `EVOLVE_CONFIG`. + +### System Message + +The `SYS_MSG` should contain your problem description: + +```yaml +SYS_MSG: | + # PROMPT-BLOCK-START + You are an expert Python programmer. Your task is to write efficient code + that solves the traveling salesman problem for N cities. + + Requirements: + - Implement a function 'solve_tsp(distances)' that takes a distance matrix + - Return a tuple (tour, total_distance) where tour is a list of city indices + - Optimize for solution quality and runtime + - The code will be evaluated on instances with 20-100 cities + + Your code must be within the EVOLVE-BLOCK-START and EVOLVE-BLOCK-END markers. + # PROMPT-BLOCK-END +``` + +**Best Practices:** +- Clearly state the problem and objectives +- Specify input/output format +- Mention any constraints or requirements +- Include evaluation criteria +- Keep it concise but complete + +--- + +## Creating Your Own Problem + +### Step 1: Set Up Directory Structure + +```bash +# Copy the template +cp -r problems/problem_template problems/my_problem + +cd problems/my_problem +``` + +### Step 2: Create Initial Program + +Edit `input/src/init_program.py`: + +```python +# EVOLVE-BLOCK-START +def solve_my_problem(input_data): + """ + Your initial solution here. + This is the starting point for evolution. + """ + # Simple baseline implementation + result = do_something_basic(input_data) + return result +# EVOLVE-BLOCK-END +``` + +**Important:** +- Code must be between `EVOLVE-BLOCK-START` and `EVOLVE-BLOCK-END` markers +- Only code in this block will be evolved +- Can import standard libraries outside the block + +### Step 3: Create Evaluation Script + +Edit `input/evaluate.py`: + +```python +import sys +import json +from importlib import __import__ + +def evaluate(program_path: str, results_path: str) -> None: + """ + Evaluate the evolved program and compute fitness. + """ + # Import the program + module_name = os.path.splitext(os.path.basename(program_path))[0] + program = __import__(module_name) + + # Run your test cases + test_cases = load_test_cases() + scores = [] + + for test_input, expected_output in test_cases: + try: + output = program.solve_my_problem(test_input) + score = compute_score(output, expected_output) + scores.append(score) + except Exception as e: + scores.append(0.0) # Penalize errors + + # Compute final fitness + avg_score = sum(scores) / len(scores) + + # Save results + results = { + "fitness": avg_score, # Main fitness (used by fitness_key) + "individual_scores": scores, # Optional: detailed breakdown + "feature1": compute_feature1(), # Optional: for MAP-Elites + } + + with open(results_path, "w") as f: + json.dump(results, f, indent=4) + +if __name__ == "__main__": + evaluate(sys.argv[1], sys.argv[2]) +``` + +**Key Points:** +- Must accept two arguments: `program_path` and `results_path` +- Must write JSON results to `results_path` +- JSON must include the key specified by `fitness_key` in config +- Higher fitness values should be better +- Handle exceptions gracefully (return low fitness for errors) + +### Step 4: Configure Evolution + +Edit `configs/config.yaml`: + +```yaml +SYS_MSG: | + # PROMPT-BLOCK-START + + # PROMPT-BLOCK-END + +CODEBASE_PATH: 'src/' +INIT_FILE_DATA: {filename: 'init_program.py', language: 'python'} +EVAL_FILE_NAME: 'evaluate.py' +EVAL_TIMEOUT: 180 + +MAX_MEM_BYTES: 2000000000 # 2GB +MEM_CHECK_INTERVAL_S: 0.1 + +EVOLVE_CONFIG: + fitness_key: 'fitness' # Matches key in evaluate.py results + num_epochs: 100 + ckpt: 5 + max_size: 40 + init_pop: 6 + exploration_rate: 0.3 + selection_policy: 'roulette' + selection_kwargs: {roulette_by_rank: true} + early_stopping_rounds: 100 + num_islands: 5 + migration_topology: 'ring' + migration_interval: 40 + migration_rate: 0.1 + meta_prompting: true + use_embedding: false + use_map_elites: false + num_inspirations: 3 + max_chat_depth: 3 + +# Add your LLM configuration +ENSEMBLE: [{model_name: 'YOUR_MODEL', temp: 0.7, top_p: 0.95, retries: 3, weight: 1, verify_ssl: false}] +SAMPLER_AUX_LM: {model_name: 'YOUR_MODEL', temp: 0.7, top_p: 0.95, retries: 3, weight: 1, verify_ssl: false} +``` + +### Step 5: Set Up API Keys + +```bash +# Set your API keys as environment variables +export API_KEY="your-api-key-here" +export API_BASE="https://api.your-provider.com/v1" +``` + +### Step 6: Create Run Script + +```bash +cp ../run_template.sh run.sh +nano run.sh # Edit PROJECT_NAME to "my_problem" +``` + +### Step 7: Run Evolution + +```bash +bash run.sh +``` + +--- + +## Advanced Features + +### Using Multiple Configuration Variants + +Create different configs for experiments: + +- **`config_mp_insp.yaml`**: Full features (meta-prompting + inspiration) +- **`config_mp.yaml`**: Meta-prompting only (prompt evolution) +- **`config_insp.yaml`**: Inspiration only (crossover-like behavior) +- **`config_no_mp_or_insp.yaml`**: Basic evolution (depth refinement only) +- **`config_no_evolve.yaml`**: Baseline (no evolution, evaluate initial solution) + +Compare performance across different evolutionary strategies. + +### Resuming from Checkpoints + +To resume evolution from a checkpoint: + +```bash +# In run.sh, set: +LOAD_CKPT=50 # Resume from epoch 50 +``` + +Or via command line: + +```bash +codeevolve --inpt_dir=... --cfg_path=... --out_dir=... --load_ckpt=50 +``` + +### CPU Affinity + +Restrict to specific CPUs for performance isolation: + +```bash +# In run.sh, set: +CPU_LIST="0-7" # Use CPUs 0 through 7 +# or +CPU_LIST="0,2,4,6" # Use specific CPUs +``` + +### Quality-Diversity with MAP-Elites + +For problems where you want diverse solutions exploring different behaviors: + +1. Define behavioral features in your evaluate.py: +```python +results = { + "fitness": overall_score, + "speed": execution_time, # Feature 1 + "memory": memory_usage, # Feature 2 +} +``` + +2. Enable MAP-Elites in config: +```yaml +EVOLVE_CONFIG: + use_map_elites: true + +MAP_ELITES: + elite_map_type: 'grid' + features: + - {name: 'speed', min_val: 0, max_val: 10, num_bins: 10} + - {name: 'memory', min_val: 0, max_val: 100, num_bins: 10} +``` + +This creates a 10×10 grid exploring the speed/memory tradeoff space. + +--- + +## Troubleshooting + +### Common Errors + +**Error: "codeevolve command not found"** +```bash +pip install -e . +``` + +**Error: "Input directory does not exist"** +Check your directory structure matches the required format: +```bash +ls problems/YOUR_PROJECT/input/ +ls problems/YOUR_PROJECT/input/src/ +ls problems/YOUR_PROJECT/configs/ +``` + +**Error: "Config file does not exist"** +```bash +# List available configs +ls problems/YOUR_PROJECT/configs/ +# Use exact filename without .yaml in run.sh +``` + +**Error: "API key not set" or "Authentication failed"** + +Three ways to fix: + +1. **Environment variables:** +```bash +export API_KEY="your-key" +export API_BASE="https://api.openai.com/v1" +bash run.sh +``` + +2. **In run.sh file:** +```bash +# Edit run.sh and uncomment/set: +API_KEY="your-api-key-here" +API_BASE="https://api.openai.com/v1" +``` + +3. **External file (recommended):** +```bash +# Create .api_keys file +cp problems/.api_keys.example problems/.api_keys +nano problems/.api_keys # Add your keys + +# In run.sh, uncomment: +source problems/.api_keys +``` + +**Important**: Never commit API keys to version control! + +**Error: "Evaluation timeout"** +Increase `EVAL_TIMEOUT` in config.yaml (seconds): +```yaml +EVAL_TIMEOUT: 300 # 5 minutes +``` + +**Error: "Memory exceeded"** +Increase `MAX_MEM_BYTES` in config.yaml: +```yaml +MAX_MEM_BYTES: 4000000000 # 4GB +``` + +### Performance Tips + +1. **Start small**: Begin with `num_epochs: 20`, `max_size: 20`, `num_islands: 1` for testing +2. **Monitor progress**: Check `experiments/PROJECT/OUTPUT/logs/` for evolution progress +3. **Tune exploration**: Increase `exploration_rate` if stuck in local optima +4. **Use inspiration**: Set `num_inspirations: 3` for better solution quality +5. **Enable meta-prompting**: Set `meta_prompting: true` for complex problems + +### Debug Mode + +For more detailed logging: + +```bash +codeevolve --inpt_dir=... --cfg_path=... --out_dir=... --terminal_logging +``` + +### Getting Help + +- Check logs in `experiments/PROJECT/OUTPUT/logs/` +- Review the main README.md in the repository root +- See OPTIMIZATIONS.md for performance tuning +- Create an issue on GitHub for bugs or questions + +--- + +## Configuration Examples + +### Example 1: Simple Problem (TSP) + +```yaml +EVOLVE_CONFIG: + fitness_key: 'tour_length' # Lower is better (negate in evaluate.py) + num_epochs: 50 + max_size: 30 + exploration_rate: 0.3 + meta_prompting: true + num_inspirations: 2 +``` + +### Example 2: Complex Optimization + +```yaml +EVOLVE_CONFIG: + fitness_key: 'score' + num_epochs: 200 + max_size: 50 + exploration_rate: 0.5 + num_islands: 10 + migration_interval: 20 + meta_prompting: true + num_inspirations: 4 +``` + +### Example 3: Quality-Diversity + +```yaml +EVOLVE_CONFIG: + fitness_key: 'performance' + num_epochs: 150 + use_map_elites: true + num_inspirations: 3 + +MAP_ELITES: + elite_map_type: 'cvt' + features: + - {name: 'complexity', min_val: 0, max_val: 100} + - {name: 'novelty', min_val: 0, max_val: 1} + elite_map_kwargs: + num_centroids: 100 +``` + +--- + +## Output Structure + +Results are saved to `experiments/PROJECT_NAME/OUTPUT_NAME/`: + +``` +experiments/PROJECT_NAME/run_20241212_120000/ +├── checkpoints/ +│ ├── epoch_5/ +│ │ ├── sol_db_island_0.pkl +│ │ ├── sol_db_island_1.pkl +│ │ └── ... +│ ├── epoch_10/ +│ └── ... +├── logs/ +│ ├── island_0.log +│ ├── island_1.log +│ └── global.log +├── results/ +│ ├── best_solutions.json +│ ├── fitness_progression.csv +│ └── final_population.json +└── config.yaml # Copy of configuration used +``` + +--- + +## Additional Resources + +- **Main README**: Project overview and installation +- **OPTIMIZATIONS.md**: Performance tuning and future improvements +- **Problem Templates**: See `problems/problem_template/` for examples +- **Research Paper**: [CodeEvolve arxiv.org/abs/2510.14150](https://arxiv.org/abs/2510.14150) + +--- + +**Questions? Issues? Feature Requests?** + +Open an issue on GitHub: https://github.com/inter-co/science-codeevolve/issues diff --git a/problems/WHERE_TO_PUT_RUN_SH.md b/problems/WHERE_TO_PUT_RUN_SH.md new file mode 100644 index 0000000..91bd654 --- /dev/null +++ b/problems/WHERE_TO_PUT_RUN_SH.md @@ -0,0 +1,182 @@ +# Where to Put run.sh: Best Practices + +## TL;DR: Put it in the project folder ✅ + +``` +✅ RECOMMENDED: +problems/ + └── YOUR_PROJECT/ + ├── run.sh ← Put it here! + ├── input/ + │ ├── evaluate.py + │ └── src/ + │ └── init_program.py + └── configs/ + └── config.yaml + +❌ NOT RECOMMENDED: +science-codeevolve/ + ├── run.sh ← Don't put it here + └── problems/ + └── YOUR_PROJECT/ + └── ... +``` + +## Why Project Folder is Better + +### ✅ Advantages + +1. **Self-Contained Projects** + - Everything for one project is in one place + - No confusion about which project you're running + +2. **Easy Sharing** + - Share just `problems/YOUR_PROJECT/` folder + - Colleague can drop it in and run immediately + - No need to share entire repository + +3. **Parallel Execution** + ```bash + # Run multiple projects at once + cd problems/project_A && bash run.sh & + cd problems/project_B && bash run.sh & + cd problems/project_C && bash run.sh & + ``` + +4. **Project-Specific Settings** + - Each project can have different: + - CPU affinity settings + - Output directories + - Checkpoint policies + - No need to edit global settings + +5. **Simple Workflow** + ```bash + cd problems/YOUR_PROJECT + bash run.sh + ``` + vs + ```bash + # Edit PROJECT_NAME in root run.sh every time + nano run.sh + bash run.sh + ``` + +6. **Version Control** + - Project-specific configs tracked with project + - Easy to see what changed per project + - Better git history + +### ❌ Root Folder Problems + +1. **One Project at a Time** + - Can only run one project + - Must edit PROJECT_NAME each time + +2. **Not Portable** + - Can't share just one project + - Need entire repo structure + +3. **Confusion** + - Which PROJECT_NAME is set? + - Did I remember to change it? + +4. **Conflicts** + - Multiple people can't run different projects + - Git conflicts on single run.sh file + +## How to Set Up + +### Step 1: Copy Template to Project + +```bash +cp problems/run_template.sh problems/YOUR_PROJECT/run.sh +``` + +### Step 2: Edit Project Name + +```bash +cd problems/YOUR_PROJECT +nano run.sh +``` + +Change this line: +```bash +PROJECT_NAME="YOUR_PROJECT" # e.g., "F_time" +``` + +### Step 3: Run + +```bash +# From project folder +cd problems/YOUR_PROJECT +bash run.sh + +# Or from anywhere +bash problems/YOUR_PROJECT/run.sh +``` + +## Alternative: Template at Root (For Reference Only) + +You can keep a template at root for reference, but **copy it to projects before use**: + +``` +science-codeevolve/ + ├── run_template.sh ← Template (don't run directly) + └── problems/ + ├── F_time/ + │ └── run.sh ← Copy template here, customize & run + ├── project_A/ + │ └── run.sh ← Copy template here, customize & run + └── project_B/ + └── run.sh ← Copy template here, customize & run +``` + +## Real-World Example + +### Team Scenario + +**Alice** working on F_time: +```bash +cd problems/F_time +bash run.sh # Runs F_time with its settings +``` + +**Bob** working on optimization problem: +```bash +cd problems/optimization +bash run.sh # Runs optimization with its settings +``` + +**Both run simultaneously, no conflicts!** + +### Single User, Multiple Experiments + +```bash +# Terminal 1: Run baseline +cd problems/my_problem +bash run.sh # Uses config.yaml + +# Terminal 2: Run with meta-prompting +cd problems/my_problem +# Edit run.sh to use config_mp.yaml +bash run.sh # Different config, same project + +# Both run at the same time! +``` + +## Summary + +| Aspect | Project Folder | Root Folder | +|--------|---------------|-------------| +| Portability | ✅ Share just project | ❌ Need whole repo | +| Parallel runs | ✅ Multiple at once | ❌ One at a time | +| Clarity | ✅ Always clear | ❌ Edit each time | +| Team work | ✅ No conflicts | ❌ File conflicts | +| Simplicity | ✅ `cd` and run | ❌ Edit then run | + +**Recommendation: Always put run.sh in the project folder.** + +--- + +See `problems/README.md` for complete documentation. diff --git a/problems/arrow_time_sim/configs/config.yaml b/problems/arrow_time_sim/configs/config.yaml new file mode 100644 index 0000000..17a7a0e --- /dev/null +++ b/problems/arrow_time_sim/configs/config.yaml @@ -0,0 +1,98 @@ +# --- WIADOMOŚĆ SYSTEMOWA (SYSTEM PROMPT) --- +SYS_MSG: | + USTAWIENIE: + Ewoluujesz zabawkowy symulator „Strzałki Czasu”. + Czas jest traktowany jako aktywny operator, a nie tylko parametr. + + # PROMPT-BLOCK-START + CEL: + Maksymalizuj fitness_key = combined_score zwracany przez evaluator. + + TWARDY WARUNEK: + - Wolno modyfikować tylko kod pomiędzy: + # EVOLVE-BLOCK-START + # EVOLVE-BLOCK-END + - Kod ma być szybki i stabilny numerycznie (bez NaN/Inf). + + CO JEST „DOBRE” (jak to jest punktowane): + - t powinno w większości rosnąć (mało/brak ujemnych Δt), + - entropia S powinna w większości nie maleć na krokach, gdzie Δt > 0, + - wartości powinny pozostać skończone i ograniczone (bez eksplozji), + - deterministyczność dla tego samego seeda jest mile widziana. + # PROMPT-BLOCK-END + +# --- WYMAGANE ŚCIEŻKI / IO --- +CODEBASE_PATH: "input/src/" +INIT_FILE_DATA: { filename: "initial_program.py", language: "python" } +EVAL_FILE_NAME: "input/evaluate.py" +EVAL_TIMEOUT: 20 + +# --- ZASOBY --- +# Zostawiamy Twoją sekcję RESOURCES, ale dodatkowo dublujemy pola na top-level +# (część runnerów czyta tylko top-level). +RESOURCES: + MAX_MEM_BYTES: 1000000000 + MEM_CHECK_INTERVAL_S: 0.1 + +MAX_MEM_BYTES: 1000000000 +MEM_CHECK_INTERVAL_S: 0.1 + +# --- PARAMETRY EWOLUCJI --- +EVOLVE_CONFIG: + fitness_key: combined_score + num_epochs: 50 + ckpt: 10 + max_size: 100 + init_pop: 6 + exploration_rate: 0.3 + selection_policy: roulette + selection_kwargs: + roulette_by_rank: true + early_stopping_rounds: 100 + num_islands: 4 + migration_topology: ring + migration_interval: 30 + migration_rate: 0.1 + meta_prompting: true + use_embedding: true + use_map_elites: true + num_inspirations: 3 + max_chat_depth: 3 + +# --- ENSEMBLE MODELI (Hybryda: „Poeta + Inżynier”) --- +ENSEMBLE: + - model_name: "rnj-1:8b-cloud" + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.8 + verify_ssl: false + - model_name: "codegemma:2b-code-q4_K_M" + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.2 + verify_ssl: false + +# --- MODELE POMOCNICZE --- +SAMPLER_AUX_LM: + model_name: "deepseek-r1:1.5b" + temp: 0.7 + top_p: 0.95 + retries: 3 + weight: 1 + verify_ssl: false + +EMBEDDING: + model_name: "nomic-embed-text-v2-moe" + retries: 3 + verify_ssl: false + +# --- MAP-ELITES (opcjonalne) --- +MAP_ELITES: + elite_map_type: "grid" + features: + - name: "feat1" + min_val: 0 + max_val: 1 + num_bins: 10 diff --git a/problems/arrow_time_sim/configs/config_edited.yaml b/problems/arrow_time_sim/configs/config_edited.yaml new file mode 100644 index 0000000..fd165fc --- /dev/null +++ b/problems/arrow_time_sim/configs/config_edited.yaml @@ -0,0 +1,64 @@ +SYS_MSG: "USTAWIENIE:\n Optymalizujesz symulator \"Strza\u0142ki Czasu\".\nCEL:\n\ + \ Maksymalizuj fitness_key = combined_score zwracany przez evaluate.py.\n" +CODEBASE_PATH: src/ +INIT_FILE_DATA: + filename: initial_program.py + language: python +EVAL_FILE_NAME: evaluate.py +EVAL_TIMEOUT: 20 +RESOURCES: + MAX_MEM_BYTES: 1000000000 + MEM_CHECK_INTERVAL_S: 0.1 +EVOLVE_CONFIG: + fitness_key: combined_score + num_epochs: 50 + ckpt: 10 + max_size: 100 + init_pop: 6 + exploration_rate: 0.3 + selection_policy: roulette + selection_kwargs: + roulette_by_rank: true + early_stopping_rounds: 100 + num_islands: 4 + migration_topology: ring + migration_interval: 30 + migration_rate: 0.1 + meta_prompting: true + use_embedding: true + use_map_elites: true + num_inspirations: 3 + max_chat_depth: 3 +ENSEMBLE: +- model_name: qwen3-coder:480b-cloud + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.8 + verify_ssl: false +- model_name: gemma3:4b + temp: 0.85 + top_p: 0.95 + retries: 3 + weight: 0.2 + verify_ssl: false +SAMPLER_AUX_LM: + model_name: deepseek-r1:1.5b + temp: 0.7 + top_p: 0.95 + retries: 3 + weight: 1 + verify_ssl: false +EMBEDDING: + model_name: embeddinggemma:300m + retries: 3 + verify_ssl: false +MAP_ELITES: + elite_map_type: grid + features: + - name: entropy_monotone_score + min_val: 0 + max_val: 1 + num_bins: 10 +object: /home/rag/Projects/science-codeevolve/problems/arrow_time_sim/input/src +evaluator: /home/rag/Projects/science-codeevolve/problems/arrow_time_sim/input diff --git a/problems/arrow_time_sim/input/evaluate.py b/problems/arrow_time_sim/input/evaluate.py new file mode 100644 index 0000000..877bcfd --- /dev/null +++ b/problems/arrow_time_sim/input/evaluate.py @@ -0,0 +1,143 @@ +import importlib.util +import math +import sys +from contextlib import redirect_stdout, redirect_stderr +from io import StringIO +from pathlib import Path +from typing import Any, Dict, List + + +def _safe_import_module(py_path: Path): + spec = importlib.util.spec_from_file_location("candidate", str(py_path)) + if spec is None or spec.loader is None: + raise RuntimeError(f"Nie mogę załadować modułu z pliku: {py_path}") + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) # type: ignore[attr-defined] + return mod + + +def _run_candidate(mod) -> List[Dict[str, float]]: + if not hasattr(mod, "run"): + raise AttributeError("Brak funkcji `run(steps, dt)` w initial_program.py") + traj = mod.run(steps=60, dt=0.1) + if not isinstance(traj, list) or len(traj) < 2: + raise ValueError("`run()` musi zwrócić listę (>=2) stanów.") + return traj + + +def _clamp01(x: float) -> float: + return 0.0 if x < 0.0 else 1.0 if x > 1.0 else x + + +def evaluate(candidate_file: str) -> Dict[str, Any]: + """ + Zwraca słownik metryk. Klucz docelowy: `combined_score`. + + Uwaga: stdout/stderr kandydata są wyciszone dla stabilności i szybkości. + """ + path = Path(candidate_file) + if not path.exists(): + return { + "combined_score": 0.0, + "error": f"Brak pliku kandydata: {candidate_file}", + } + + try: + with redirect_stdout(StringIO()), redirect_stderr(StringIO()): + mod = _safe_import_module(path) + traj = _run_candidate(mod) + + # Oczekiwane pola: t oraz S (reszta może być dowolna). + t_vals = [float(p.get("t", float("nan"))) for p in traj] + S_vals = [float(p.get("S", float("nan"))) for p in traj] + + if any(not math.isfinite(x) for x in t_vals) or any(not math.isfinite(x) for x in S_vals): + raise ValueError("Trajektoria zawiera NaN/Inf w t lub S.") + + # --- Metryki: strzałka czasu --- + dt_vals = [t_vals[i] - t_vals[i - 1] for i in range(1, len(t_vals))] + dS_vals = [S_vals[i] - S_vals[i - 1] for i in range(1, len(S_vals))] + + # 1) „czas do przodu” (penalizuj kroki wstecz / zerowe) + neg_dt = sum(1 for d in dt_vals if d < 0) + zero_dt = sum(1 for d in dt_vals if abs(d) < 1e-12) + time_forward_score = _clamp01(1.0 - (neg_dt + 0.25 * zero_dt) / max(1, len(dt_vals))) + + # 2) monotoniczność entropii przy rosnącym czasie + # (jeśli dt > 0, oczekujemy dS >= 0) + bad_entropy = 0 + checked = 0 + for dti, dSi in zip(dt_vals, dS_vals): + if dti > 0: + checked += 1 + if dSi < -1e-12: + bad_entropy += 1 + entropy_monotone_score = _clamp01(1.0 - bad_entropy / max(1, checked)) + + # 3) entropia nieujemna + min_S = min(S_vals) + entropy_nonneg_score = 1.0 if min_S >= -1e-12 else _clamp01(1.0 / (1.0 + abs(min_S))) + + # 4) ograniczenie „wybuchu” wartości (prosty stabilizator) + max_abs_t = max(abs(x) for x in t_vals) + max_abs_S = max(abs(x) for x in S_vals) + boundedness_score = _clamp01(1.0 / (1.0 + 0.05 * (max_abs_t + max_abs_S))) + + # 5) gładkość (mniejsze „szarpanie” = lepiej) + # używamy średniej z |drugiej różnicy| dla S + ddS = [] + for i in range(2, len(S_vals)): + ddS.append(S_vals[i] - 2 * S_vals[i - 1] + S_vals[i - 2]) + smoothness = sum(abs(x) for x in ddS) / max(1, len(ddS)) + smoothness_score = _clamp01(1.0 / (1.0 + 5.0 * smoothness)) + + # --- Wynik łączny --- + combined_score = ( + 0.30 * time_forward_score + + 0.35 * entropy_monotone_score + + 0.15 * entropy_nonneg_score + + 0.10 * boundedness_score + + 0.10 * smoothness_score + ) + + # `feat1` jest celowo wystawione, żeby Twoje MAP_ELITES z configu działało bez zmian. + return { + "combined_score": float(combined_score), + "feat1": float(entropy_monotone_score), + "time_forward_score": float(time_forward_score), + "entropy_monotone_score": float(entropy_monotone_score), + "entropy_nonneg_score": float(entropy_nonneg_score), + "boundedness_score": float(boundedness_score), + "smoothness_score": float(smoothness_score), + "min_S": float(min_S), + "max_abs_t": float(max_abs_t), + "max_abs_S": float(max_abs_S), + } + + except Exception as e: + return { + "combined_score": 0.0, + "error": f"{type(e).__name__}: {e}", + } + + +import json + +def main(argv: list[str] | None = None) -> int: + argv = sys.argv if argv is None else argv + if len(argv) != 3: + print("Usage: python evaluate.py ", file=sys.stderr) + return 2 + + program_path = argv[1] + results_path = argv[2] + + metrics = evaluate(program_path) + with open(results_path, "w", encoding="utf-8") as f: + json.dump(metrics, f) + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/problems/arrow_time_sim/input/src/initial_program.py b/problems/arrow_time_sim/input/src/initial_program.py new file mode 100644 index 0000000..5da0764 --- /dev/null +++ b/problems/arrow_time_sim/input/src/initial_program.py @@ -0,0 +1,55 @@ +# EVOLVE-BLOCK-START +import math +from typing import Dict, List + + +def arrow_fields(t: float): + """ + Trzy „pola strzałki czasu” – deterministyczne, gładkie i ograniczone. + """ + t = float(t) + A = math.tanh(0.9 * t) + B = math.tanh(0.6 * t - 0.8) + C = math.tanh(1.1 * t + 0.15 * math.sin(t)) + return A, B, C + + +def entropy_from_fields(t: float, A: float, B: float, C: float) -> float: + """ + Efektywna entropia S(t): + - zawsze >= 0 + - rośnie wraz z t (dla t rosnącego), czyli implementuje strzałkę czasu + """ + s2 = A * A + B * B + C * C + # Składnik liniowy w t gwarantuje monotoniczność przy rosnącym t. + S = max(0.0, t + math.log1p(s2)) + return S + + +def run(steps: int = 60, dt: float = 0.1) -> List[Dict[str, float]]: + """ + Zwraca trajektorię jako listę słowników (to oczekuje evaluator). + """ + steps = int(steps) + dt = float(dt) + + t = 0.0 + out: List[Dict[str, float]] = [] + + for _ in range(max(1, steps)): + A, B, C = arrow_fields(t) + S = entropy_from_fields(t, A, B, C) + out.append({"t": t, "A": A, "B": B, "C": C, "S": S}) + t += dt + + return out + + +def main(): + # Lokalny test (nie jest używany przez evaluator). + traj = run(steps=30, dt=0.1) + last = traj[-1] + print("OK. Final:", last) + + +# EVOLVE-BLOCK-END diff --git a/problems/arrow_time_sim/run.sh b/problems/arrow_time_sim/run.sh new file mode 100755 index 0000000..da75918 --- /dev/null +++ b/problems/arrow_time_sim/run.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# Run script for the F_time problem. +# Copied from problems/run_template.sh and customized for this project. +# +# Usage: +# cd problems/F_time && bash run.sh +# # or from repo root: +# bash problems/F_time/run.sh +# +# ===--------------------------------------------------------------------------------------===# + +# ================================== +# CONFIGURATION - EDIT THESE VALUES +# ================================== + +# Project name relative to the problems/ directory +PROJECT_NAME="arrow_time_sim" + +# Config file name (without .yaml extension) +CONFIG_NAME="config" + +# Output directory name (will be created under experiments/) +OUTPUT_NAME="run_$(date +%Y%m%d_%H%M%S)" + +# Checkpoint to load (-1 for no checkpoint, or epoch number to resume from) +LOAD_CKPT=-1 + +# CPU affinity (leave empty for no restriction, or specify like "0-7" or "0,2,4,6") +CPU_LIST="" + +# ================================== +# API CONFIGURATION (OPTIONAL) +# ================================== +# You can set API credentials here or use environment variables +# If set here, they will override environment variables + +# Option 1: Set API key directly (NOT RECOMMENDED for shared/public projects) +# API_KEY="your-api-key-here" +# API_BASE="https://api.openai.com/v1" + +# Option 2: Use environment variables (RECOMMENDED) +# Leave commented out to use existing environment variables +# Or set them here to override: +# export API_KEY="${API_KEY:1e28fb7fb3b5486e88cf34c33127ef71.hpbxvrNGSUlgNGFz6Mgp7q0Z}" +# export API_BASE="${API_BASE:http://localhost:11434/v1}" + +# Option 3: Load from external file (MOST SECURE) +# Create a file with: export API_KEY="..." and export API_BASE="..." +# Then uncomment the line below: +#source ~/.codeevolve_api_keys + +# ================================== +# AUTOMATIC PATH SETUP - DO NOT EDIT +# ================================== + +# Get the absolute path to the science-codeevolve directory. +# We try git first (works from any subdirectory), then fall back to walking +# up the tree until we find a .git folder. Finally, default to one level up. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if command -v git &> /dev/null; then + REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel 2>/dev/null)" +fi + +if [ -z "${REPO_ROOT}" ]; then + SEARCH_DIR="${SCRIPT_DIR}" + while [ "${SEARCH_DIR}" != "/" ]; do + if [ -d "${SEARCH_DIR}/.git" ]; then + REPO_ROOT="${SEARCH_DIR}" + break + fi + NEXT_DIR="$(cd "${SEARCH_DIR}/.." && pwd)" + if [ "${NEXT_DIR}" = "${SEARCH_DIR}" ]; then + break + fi + SEARCH_DIR="${NEXT_DIR}" + done +fi + +REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/.." && pwd)}" + +# Construct paths based on the standard project structure: +# - Problem base directory: problems/PROJECT_NAME/ +# - initial_program.py: problems/PROJECT_NAME/input/src/ +# - evaluate.py: problems/PROJECT_NAME/input/ +# - config.yaml: problems/PROJECT_NAME/configs/ +BASE_DIR="${REPO_ROOT}/problems/${PROJECT_NAME}" +INPT_DIR="${BASE_DIR}/" +CFG_PATH="${BASE_DIR}/configs/${CONFIG_NAME}.yaml" +OUT_DIR="${REPO_ROOT}/experiments/${PROJECT_NAME}/${OUTPUT_NAME}" + +# ================================== +# VALIDATION +# ================================== + +echo "======================================" +echo "CodeEvolve Run Configuration" +echo "======================================" +echo "Project Name: ${PROJECT_NAME}" +echo "Input Directory: ${INPT_DIR}" +echo "Config File: ${CFG_PATH}" +echo "Output Directory: ${OUT_DIR}" +echo "Load Checkpoint: ${LOAD_CKPT}" +echo "CPU List: ${CPU_LIST:-'(all CPUs)'}" +echo "======================================" +echo "" + +# Check if required directories and files exist +if [ ! -d "${INPT_DIR}" ]; then + echo "ERROR: Input directory does not exist: ${INPT_DIR}" + echo "Expected structure: problems/${PROJECT_NAME}/" + exit 1 +fi + +if [ ! -f "${CFG_PATH}" ]; then + echo "ERROR: Config file does not exist: ${CFG_PATH}" + echo "Available configs in ${BASE_DIR}/configs/:" + ls -1 "${BASE_DIR}/configs/" 2>/dev/null || echo " (directory not found)" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/input/evaluate.py" ]; then + echo "ERROR: evaluate.py not found in ${INPT_DIR}/input/" + echo "Expected: ${INPT_DIR}/input/evaluate.py" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/input/src/initial_program.py" ] && [ ! -f "${INPT_DIR}/input/src/init_program.py" ]; then + echo "WARNING: No initial program found in ${INPT_DIR}/input/src/" + echo "Expected one of:" + echo " - ${INPT_DIR}/input/src/initial_program.py (default)" + echo " - ${INPT_DIR}/input/src/init_program.py (legacy)" +fi + +# Check if codeevolve command is available +CODEEVOLVE_CMD=() + +# Prefer a repo-local conda env if present (works even when not activated). +REPO_CONDA_PY="${REPO_ROOT}/.conda/bin/python" +REPO_CONDA_CODEEVOLVE="${REPO_ROOT}/.conda/bin/codeevolve" + +PYTHON_BIN="" +if [ -n "${CODEEVOLVE_PYTHON}" ] && [ -x "${CODEEVOLVE_PYTHON}" ]; then + PYTHON_BIN="${CODEEVOLVE_PYTHON}" +elif [ -x "${REPO_CONDA_PY}" ]; then + PYTHON_BIN="${REPO_CONDA_PY}" +elif command -v python &> /dev/null; then + PYTHON_BIN="python" +elif command -v python3 &> /dev/null; then + PYTHON_BIN="python3" +fi + +if command -v codeevolve &> /dev/null; then + CODEEVOLVE_CMD=(codeevolve) +elif [ -x "${REPO_CONDA_CODEEVOLVE}" ]; then + CODEEVOLVE_CMD=("${REPO_CONDA_CODEEVOLVE}") +else + # Fall back to running the module directly from the repo. + # This avoids requiring an editable install just to run a local experiment. + if [ -z "${PYTHON_BIN}" ]; then + echo "ERROR: Neither 'codeevolve' nor a usable Python interpreter was found." + echo "Expected one of: codeevolve in PATH, ${REPO_CONDA_CODEEVOLVE}, python/python3 in PATH, or CODEEVOLVE_PYTHON=/path/to/python" + exit 1 + fi + + export PYTHONPATH="${REPO_ROOT}/src:${PYTHONPATH}" + CODEEVOLVE_CMD=("${PYTHON_BIN}" -m codeevolve.cli) + echo "NOTE: 'codeevolve' CLI not found; using: ${PYTHON_BIN} -m codeevolve.cli" +fi + +# Create output directory +mkdir -p "${OUT_DIR}" + +# ================================== +# API KEY SETUP +# ================================== + +# Export API keys if they were set in the configuration section above +if [ ! -z "${API_KEY}" ]; then + export API_KEY + echo "Using API_KEY from run script configuration" +fi + +if [ ! -z "${API_BASE}" ]; then + export API_BASE + echo "Using API_BASE from run script: ${API_BASE}" +fi + +# Check if API keys are available (from any source) + +# The CodeEvolve CLI currently requires both variables to exist in the environment. +# For local/self-hosted endpoints, API_KEY is often unused; exporting it as an empty +# string is sufficient. +if [ -z "${API_BASE+x}" ]; then + export API_BASE="http://localhost:11434/v1" + echo "NOTE: API_BASE not set; defaulting to ${API_BASE}" +fi + +if [ -z "${API_KEY+x}" ]; then + export API_KEY="ollama" + echo "NOTE: API_KEY not set; defaulting to ${API_KEY}" +fi + +if [ -z "${API_KEY}" ]; then + echo "WARNING: API_KEY is empty. The run may fail if your LLM requires authentication." + echo "Set it via:" + echo " 1. Environment variable: export API_KEY='your-key'" + echo " 2. In this run.sh file (see API CONFIGURATION section)" + echo " 3. External file: source ~/.codeevolve_api_keys" + echo "" +fi + +# ================================== +# RUN CODEEVOLVE +# ================================== + +echo "Starting CodeEvolve..." +echo "" + +CODEEVOLVE_ARGS=( + --inpt_dir="${INPT_DIR}" + --cfg_path="${CFG_PATH}" + --out_dir="${OUT_DIR}" + --load_ckpt="${LOAD_CKPT}" + --terminal_logging +) + +if [ -n "${CPU_LIST}" ]; then + # Run with CPU affinity + if command -v taskset &> /dev/null; then + taskset --cpu-list "${CPU_LIST}" "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" + else + echo "WARNING: 'taskset' not found; running without CPU affinity." + "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" + fi +else + # Run without CPU affinity + "${CODEEVOLVE_CMD[@]}" "${CODEEVOLVE_ARGS[@]}" +fi + +# ================================== +# COMPLETION +# ================================== + +EXIT_CODE=$? +echo "" +echo "======================================" +if [ ${EXIT_CODE} -eq 0 ]; then + echo "CodeEvolve completed successfully!" + echo "Results saved to: ${OUT_DIR}" +else + echo "CodeEvolve exited with error code: ${EXIT_CODE}" +fi +echo "======================================" + +exit ${EXIT_CODE} diff --git a/problems/problem_template/configs/config_mp.yaml b/problems/problem_template/configs/config_mp.yaml index 54517c9..a6beb2b 100644 --- a/problems/problem_template/configs/config_mp.yaml +++ b/problems/problem_template/configs/config_mp.yaml @@ -25,6 +25,12 @@ ENSEMBLE: [{model_name: 'GOOGLE_GEMINI-2.5-FLASH', temp: 0.7, top_p: 0.95, retri SAMPLER_AUX_LM: {model_name: 'GOOGLE_GEMINI-2.5-FLASH', temp: 0.7, top_p: 0.95, retries: 3, weight: 1, verify_ssl: False} +NOVEL_AGENT: + enabled: False + exploration_rate: 0.25 + max_inspirations: 3 + lm: {model_name: 'GOOGLE_GEMINI-2.5-PRO', temp: 0.9, top_p: 0.95, retries: 3, weight: 1, verify_ssl: False} + EMBEDDING: {model_name: 'AZURE_TEXT-EMBEDDING-3-SMALL', retries: 3, verify_ssl: False} MAP_ELITES: {elite_map_type: 'grid', @@ -32,6 +38,25 @@ MAP_ELITES: {elite_map_type: 'grid', {name: 'feat1', min_val: 0, max_val: 1, num_bins: 10} ]} +CLIMATE: + enabled: False + seasons: [perpetual, monsoon, dry, polar] + season_length: 8 + function_pool: [len, sum, min, max, sorted, enumerate, zip, range, map, filter] + hot_fraction: 0.5 + survival_weight: 0.25 + neutral_baseline: 0.5 + +ADVERSARIAL: + enabled: False + teams: [red, blue] + cross_eval_interval: 5 + opponents_per_eval: 2 + fitness_metric: win_rate # options: win_rate, elo, hybrid + base_fitness_weight: 0.25 # only used when fitness_metric is hybrid + elo_k: 24 + alternating_phases: True + # MAP_ELITES: {elite_map_type: 'cvt', # features: [ # {name: 'feat1', min_val: 0, max_val: 1} diff --git a/problems/problem_template/run.sh b/problems/problem_template/run.sh new file mode 100755 index 0000000..9574ca4 --- /dev/null +++ b/problems/problem_template/run.sh @@ -0,0 +1,225 @@ +#!/bin/bash +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# Generic template for running CodeEvolve on any project in the problems directory. +# +# BEST PRACTICE: Copy this to your project folder (problems/YOUR_PROJECT/run.sh) +# This keeps everything self-contained and portable. +# +# Usage: +# 1. Copy this template to your project directory: +# cp problems/run_template.sh problems/YOUR_PROJECT/run.sh +# 2. Edit and set PROJECT_NAME to your project path (relative to problems/) +# 3. Adjust CONFIG_NAME if using a different config file +# 4. Run from your project folder: +# cd problems/YOUR_PROJECT && bash run.sh +# +# ===--------------------------------------------------------------------------------------===# + +# ================================== +# CONFIGURATION - EDIT THESE VALUES +# ================================== + +# Project name relative to the problems/ directory +# Examples: +# - "F_time" +# - "alphaevolve_math_problems/circle_packing_square/26" +# - "problem_template" +PROJECT_NAME="problem_template" + +# Config file name (without .yaml extension) +# Common options: config, config_mp_insp, config_insp, config_mp, config_no_evolve +CONFIG_NAME="config_mp_insp" + +# Output directory name (will be created under experiments/) +OUTPUT_NAME="run_$(date +%Y%m%d_%H%M%S)" + +# Checkpoint to load (-1 for no checkpoint, or epoch number to resume from) +LOAD_CKPT=-1 + +# CPU affinity (leave empty for no restriction, or specify like "0-7" or "0,2,4,6") +CPU_LIST="" + +# ================================== +# API CONFIGURATION (OPTIONAL) +# ================================== +# You can set API credentials here or use environment variables +# If set here, they will override environment variables + +# Option 1: Set API key directly (NOT RECOMMENDED for shared/public projects) +# API_KEY="your-api-key-here" +# API_BASE="https://api.openai.com/v1" + +# Option 2: Use environment variables (RECOMMENDED) +# Leave commented out to use existing environment variables +# Or set them here to override: +# export API_KEY="${API_KEY:-your-default-key}" +# export API_BASE="${API_BASE:-https://api.openai.com/v1}" + +# Option 3: Load from external file (MOST SECURE) +# Create a file with: export API_KEY="..." and export API_BASE="..." +# Then uncomment the line below: +# source ~/.codeevolve_api_keys + +# ================================== +# AUTOMATIC PATH SETUP - DO NOT EDIT +# ================================== + +# Get the absolute path to the science-codeevolve directory. +# We try git first (works from any subdirectory), then fall back to walking +# up the tree until we find a .git folder. Finally, default to one level up. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if command -v git &> /dev/null; then + REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel 2>/dev/null)" +fi + +if [ -z "${REPO_ROOT}" ]; then + SEARCH_DIR="${SCRIPT_DIR}" + while [ "${SEARCH_DIR}" != "/" ]; do + if [ -d "${SEARCH_DIR}/.git" ]; then + REPO_ROOT="${SEARCH_DIR}" + break + fi + NEXT_DIR="$(cd "${SEARCH_DIR}/.." && pwd)" + if [ "${NEXT_DIR}" = "${SEARCH_DIR}" ]; then + break + fi + SEARCH_DIR="${NEXT_DIR}" + done +fi + +REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/.." && pwd)}" + +# Construct paths based on the standard project structure: +# - init_program.py is always in: problems/PROJECT_NAME/input/src/ +# - evaluate.py is always in: problems/PROJECT_NAME/input/ +# - config.yaml is in: problems/PROJECT_NAME/configs/ +BASE_DIR="${REPO_ROOT}/problems/${PROJECT_NAME}" +INPT_DIR="${BASE_DIR}/input/" +CFG_PATH="${BASE_DIR}/configs/${CONFIG_NAME}.yaml" +OUT_DIR="${REPO_ROOT}/experiments/${PROJECT_NAME}/${OUTPUT_NAME}" + +# ================================== +# VALIDATION +# ================================== + +echo "======================================" +echo "CodeEvolve Run Configuration" +echo "======================================" +echo "Project Name: ${PROJECT_NAME}" +echo "Input Directory: ${INPT_DIR}" +echo "Config File: ${CFG_PATH}" +echo "Output Directory: ${OUT_DIR}" +echo "Load Checkpoint: ${LOAD_CKPT}" +echo "CPU List: ${CPU_LIST:-'(all CPUs)'}" +echo "======================================" +echo "" + +# Check if required directories and files exist +if [ ! -d "${INPT_DIR}" ]; then + echo "ERROR: Input directory does not exist: ${INPT_DIR}" + echo "Expected structure: problems/${PROJECT_NAME}/input/" + exit 1 +fi + +if [ ! -f "${CFG_PATH}" ]; then + echo "ERROR: Config file does not exist: ${CFG_PATH}" + echo "Available configs in ${BASE_DIR}/configs/:" + ls -1 "${BASE_DIR}/configs/" 2>/dev/null || echo " (directory not found)" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/evaluate.py" ]; then + echo "ERROR: evaluate.py not found in ${INPT_DIR}" + echo "Expected: ${INPT_DIR}/evaluate.py" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/src/init_program.py" ]; then + echo "WARNING: init_program.py not found in ${INPT_DIR}/src/" + echo "Expected: ${INPT_DIR}/src/init_program.py" +fi + +# Check if codeevolve command is available +if ! command -v codeevolve &> /dev/null; then + echo "ERROR: codeevolve command not found. Please install the package:" + echo " pip install -e ." + exit 1 +fi + +# Create output directory +mkdir -p "${OUT_DIR}" + +# ================================== +# API KEY SETUP +# ================================== + +# Export API keys if they were set in the configuration section above +if [ ! -z "${API_KEY}" ]; then + export API_KEY + echo "Using API_KEY from run script configuration" +fi + +if [ ! -z "${API_BASE}" ]; then + export API_BASE + echo "Using API_BASE from run script: ${API_BASE}" +fi + +# Check if API keys are available (from any source) +if [ -z "${API_KEY}" ]; then + echo "WARNING: API_KEY is not set. The run may fail if your LLM requires authentication." + echo "Set it via:" + echo " 1. Environment variable: export API_KEY='your-key'" + echo " 2. In this run.sh file (see API CONFIGURATION section)" + echo " 3. External file: source ~/.codeevolve_api_keys" + echo "" +fi + +# ================================== +# RUN CODEEVOLVE +# ================================== + +echo "Starting CodeEvolve..." +echo "" + +if [ -n "${CPU_LIST}" ]; then + # Run with CPU affinity + taskset --cpu-list "${CPU_LIST}" codeevolve \ + --inpt_dir="${INPT_DIR}" \ + --cfg_path="${CFG_PATH}" \ + --out_dir="${OUT_DIR}" \ + --load_ckpt="${LOAD_CKPT}" \ + --terminal_logging +else + # Run without CPU affinity + codeevolve \ + --inpt_dir="${INPT_DIR}" \ + --cfg_path="${CFG_PATH}" \ + --out_dir="${OUT_DIR}" \ + --load_ckpt="${LOAD_CKPT}" \ + --terminal_logging +fi + +# ================================== +# COMPLETION +# ================================== + +EXIT_CODE=$? +echo "" +echo "======================================" +if [ ${EXIT_CODE} -eq 0 ]; then + echo "CodeEvolve completed successfully!" + echo "Results saved to: ${OUT_DIR}" +else + echo "CodeEvolve exited with error code: ${EXIT_CODE}" +fi +echo "======================================" + +exit ${EXIT_CODE} diff --git a/problems/run_template.sh b/problems/run_template.sh new file mode 100755 index 0000000..00fe7e1 --- /dev/null +++ b/problems/run_template.sh @@ -0,0 +1,225 @@ +#!/bin/bash +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# Generic template for running CodeEvolve on any project in the problems directory. +# +# BEST PRACTICE: Copy this to your project folder (problems/YOUR_PROJECT/run.sh) +# This keeps everything self-contained and portable. +# +# Usage: +# 1. Copy this template to your project directory: +# cp problems/run_template.sh problems/YOUR_PROJECT/run.sh +# 2. Edit and set PROJECT_NAME to your project path (relative to problems/) +# 3. Adjust CONFIG_NAME if using a different config file +# 4. Run from your project folder: +# cd problems/YOUR_PROJECT && bash run.sh +# +# ===--------------------------------------------------------------------------------------===# + +# ================================== +# CONFIGURATION - EDIT THESE VALUES +# ================================== + +# Project name relative to the problems/ directory +# Examples: +# - "F_time" +# - "alphaevolve_math_problems/circle_packing_square/26" +# - "problem_template" +PROJECT_NAME="F_time" + +# Config file name (without .yaml extension) +# Common options: config, config_mp_insp, config_insp, config_mp, config_no_evolve +CONFIG_NAME="config" + +# Output directory name (will be created under experiments/) +OUTPUT_NAME="run_$(date +%Y%m%d_%H%M%S)" + +# Checkpoint to load (-1 for no checkpoint, or epoch number to resume from) +LOAD_CKPT=-1 + +# CPU affinity (leave empty for no restriction, or specify like "0-7" or "0,2,4,6") +CPU_LIST="" + +# ================================== +# API CONFIGURATION (OPTIONAL) +# ================================== +# You can set API credentials here or use environment variables +# If set here, they will override environment variables + +# Option 1: Set API key directly (NOT RECOMMENDED for shared/public projects) +# API_KEY="your-api-key-here" +# API_BASE="https://api.openai.com/v1" + +# Option 2: Use environment variables (RECOMMENDED) +# Leave commented out to use existing environment variables +# Or set them here to override: +# export API_KEY="${API_KEY:-your-default-key}" +# export API_BASE="${API_BASE:-https://api.openai.com/v1}" + +# Option 3: Load from external file (MOST SECURE) +# Create a file with: export API_KEY="..." and export API_BASE="..." +# Then uncomment the line below: +# source ~/.codeevolve_api_keys + +# ================================== +# AUTOMATIC PATH SETUP - DO NOT EDIT +# ================================== + +# Get the absolute path to the science-codeevolve directory. +# We try git first (works from any subdirectory), then fall back to walking +# up the tree until we find a .git folder. Finally, default to one level up. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if command -v git &> /dev/null; then + REPO_ROOT="$(git -C "${SCRIPT_DIR}" rev-parse --show-toplevel 2>/dev/null)" +fi + +if [ -z "${REPO_ROOT}" ]; then + SEARCH_DIR="${SCRIPT_DIR}" + while [ "${SEARCH_DIR}" != "/" ]; do + if [ -d "${SEARCH_DIR}/.git" ]; then + REPO_ROOT="${SEARCH_DIR}" + break + fi + NEXT_DIR="$(cd "${SEARCH_DIR}/.." && pwd)" + if [ "${NEXT_DIR}" = "${SEARCH_DIR}" ]; then + break + fi + SEARCH_DIR="${NEXT_DIR}" + done +fi + +REPO_ROOT="${REPO_ROOT:-$(cd "${SCRIPT_DIR}/.." && pwd)}" + +# Construct paths based on the standard project structure: +# - init_program.py is always in: problems/PROJECT_NAME/input/src/ +# - evaluate.py is always in: problems/PROJECT_NAME/input/ +# - config.yaml is in: problems/PROJECT_NAME/configs/ +BASE_DIR="${REPO_ROOT}/problems/${PROJECT_NAME}" +INPT_DIR="${BASE_DIR}/input/" +CFG_PATH="${BASE_DIR}/configs/${CONFIG_NAME}.yaml" +OUT_DIR="${REPO_ROOT}/experiments/${PROJECT_NAME}/${OUTPUT_NAME}" + +# ================================== +# VALIDATION +# ================================== + +echo "======================================" +echo "CodeEvolve Run Configuration" +echo "======================================" +echo "Project Name: ${PROJECT_NAME}" +echo "Input Directory: ${INPT_DIR}" +echo "Config File: ${CFG_PATH}" +echo "Output Directory: ${OUT_DIR}" +echo "Load Checkpoint: ${LOAD_CKPT}" +echo "CPU List: ${CPU_LIST:-'(all CPUs)'}" +echo "======================================" +echo "" + +# Check if required directories and files exist +if [ ! -d "${INPT_DIR}" ]; then + echo "ERROR: Input directory does not exist: ${INPT_DIR}" + echo "Expected structure: problems/${PROJECT_NAME}/input/" + exit 1 +fi + +if [ ! -f "${CFG_PATH}" ]; then + echo "ERROR: Config file does not exist: ${CFG_PATH}" + echo "Available configs in ${BASE_DIR}/configs/:" + ls -1 "${BASE_DIR}/configs/" 2>/dev/null || echo " (directory not found)" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/evaluate.py" ]; then + echo "ERROR: evaluate.py not found in ${INPT_DIR}" + echo "Expected: ${INPT_DIR}/evaluate.py" + exit 1 +fi + +if [ ! -f "${INPT_DIR}/src/init_program.py" ]; then + echo "WARNING: init_program.py not found in ${INPT_DIR}/src/" + echo "Expected: ${INPT_DIR}/src/init_program.py" +fi + +# Check if codeevolve command is available +if ! command -v codeevolve &> /dev/null; then + echo "ERROR: codeevolve command not found. Please install the package:" + echo " pip install -e ." + exit 1 +fi + +# Create output directory +mkdir -p "${OUT_DIR}" + +# ================================== +# API KEY SETUP +# ================================== + +# Export API keys if they were set in the configuration section above +if [ ! -z "${API_KEY}" ]; then + export API_KEY + echo "Using API_KEY from run script configuration" +fi + +if [ ! -z "${API_BASE}" ]; then + export API_BASE + echo "Using API_BASE from run script: ${API_BASE}" +fi + +# Check if API keys are available (from any source) +if [ -z "${API_KEY}" ]; then + echo "WARNING: API_KEY is not set. The run may fail if your LLM requires authentication." + echo "Set it via:" + echo " 1. Environment variable: export API_KEY='your-key'" + echo " 2. In this run.sh file (see API CONFIGURATION section)" + echo " 3. External file: source ~/.codeevolve_api_keys" + echo "" +fi + +# ================================== +# RUN CODEEVOLVE +# ================================== + +echo "Starting CodeEvolve..." +echo "" + +if [ -n "${CPU_LIST}" ]; then + # Run with CPU affinity + taskset --cpu-list "${CPU_LIST}" codeevolve \ + --inpt_dir="${INPT_DIR}" \ + --cfg_path="${CFG_PATH}" \ + --out_dir="${OUT_DIR}" \ + --load_ckpt="${LOAD_CKPT}" \ + --terminal_logging +else + # Run without CPU affinity + codeevolve \ + --inpt_dir="${INPT_DIR}" \ + --cfg_path="${CFG_PATH}" \ + --out_dir="${OUT_DIR}" \ + --load_ckpt="${LOAD_CKPT}" \ + --terminal_logging +fi + +# ================================== +# COMPLETION +# ================================== + +EXIT_CODE=$? +echo "" +echo "======================================" +if [ ${EXIT_CODE} -eq 0 ]; then + echo "CodeEvolve completed successfully!" + echo "Results saved to: ${OUT_DIR}" +else + echo "CodeEvolve exited with error code: ${EXIT_CODE}" +fi +echo "======================================" + +exit ${EXIT_CODE} diff --git a/pyproject.toml b/pyproject.toml index 17fbe60..bcbb950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "codeevolve" dynamic = ["version"] description = "Source code for CodeEvolve." readme = "README.md" -requires-python = ">=3.13.5" +requires-python = ">=3.10" license = "Apache-2.0" authors = [ {name = "Inter Science"} diff --git a/scripts/interactive_launcher.py b/scripts/interactive_launcher.py new file mode 100644 index 0000000..10a7791 --- /dev/null +++ b/scripts/interactive_launcher.py @@ -0,0 +1,215 @@ +"""Interactive setup helper for Science CodeEvolve. + +This script guides you through selecting the project object, evaluator, +configuration, and optional overrides. All provided paths are expanded to +absolute paths so they can be passed directly to other tooling. +""" +from __future__ import annotations + +import json +import os +from pathlib import Path +from typing import Any, Dict, Mapping + +import yaml + + +def prompt_path( + prompt: str, must_exist: bool = False, create_parents: bool = False, default: str | None = None +) -> Path: + """Prompt the user for a path and return it as an absolute ``Path``. + + Args: + prompt: The text to display to the user. + must_exist: Whether the path must already exist. + create_parents: Whether to create parent directories if they do not exist. + default: Optional default value to use when the user presses enter. + """ + while True: + suffix = f" [{default}]" if default else "" + raw_value = input(f"{prompt}{suffix}: ").strip().strip('"') + if not raw_value and default: + raw_value = default + expanded = Path(raw_value).expanduser().resolve() + + if must_exist and not expanded.exists(): + print(f"✖ Path does not exist: {expanded}") + continue + + if create_parents and not expanded.parent.exists(): + expanded.parent.mkdir(parents=True, exist_ok=True) + + return expanded + + +def yes_no(question: str, default: bool = True) -> bool: + suffix = "[Y/n]" if default else "[y/N]" + while True: + answer = input(f"{question} {suffix}: ").strip().lower() + if not answer: + return default + if answer in {"y", "yes"}: + return True + if answer in {"n", "no"}: + return False + print("Please answer with 'y' or 'n'.") + + +def parse_scalar(value: str) -> Any: + """Best-effort parsing that turns simple strings into numbers/bools when possible.""" + + lowered = value.lower() + if lowered in {"true", "false"}: + return lowered == "true" + try: + if "." in value: + return float(value) + return int(value) + except ValueError: + return value + + +def prompt_overrides() -> Dict[str, Any]: + print("Enter any configuration overrides you want to inject.") + print("Leave the key empty to finish. Values are recorded as typed (numbers/bools auto-detected).") + overrides: Dict[str, Any] = {} + while True: + key = input("Override key (blank to stop): ").strip() + if not key: + break + value = input("Value: ").strip() + overrides[key] = parse_scalar(value) + return overrides + + +def print_conda_hint() -> None: + """Remind the user how to prepare the conda environment.""" + + environment_yml = Path("environment.yml").resolve() + current_env = os.environ.get("CONDA_DEFAULT_ENV") + + if current_env == "codeevolve": + print("✅ Conda environment detected: codeevolve") + return + + print("⚠️ Tip: activate the recommended conda env before running heavy jobs.") + if environment_yml.exists(): + print(f" conda env create -f {environment_yml}") + print(" conda activate codeevolve") + if current_env: + print(f" (currently in '{current_env}'—switch if needed)") + + +def load_config(path: Path) -> Dict[str, Any]: + """Load YAML or JSON config into a dictionary.""" + + text = path.read_text(encoding="utf-8") + if not text.strip(): + return {} + + if path.suffix.lower() in {".yml", ".yaml"}: + return yaml.safe_load(text) or {} + return json.loads(text) + + +def save_config(payload: Mapping[str, Any], path: Path) -> None: + """Save the config as YAML or JSON based on extension.""" + + if path.suffix.lower() in {".yml", ".yaml"}: + path.write_text(yaml.safe_dump(dict(payload), sort_keys=False), encoding="utf-8") + else: + path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + + +def edit_mapping(mapping: Mapping[str, Any]) -> Dict[str, Any]: + """Prompt the user to tweak each value in a mapping.""" + + updated: Dict[str, Any] = {} + for key, value in mapping.items(): + if isinstance(value, Mapping): + print(f"\n➡️ Section: {key}") + updated[key] = edit_mapping(value) + continue + + new_value = input(f"{key} [{value!r}] (enter to keep): ").strip() + updated[key] = value if not new_value else parse_scalar(new_value) + + return updated + + +def build_config_payload( + object_path: Path, evaluator_path: Path, base_config: Dict[str, Any] | None, allow_edit: bool +) -> Dict[str, Any]: + """Combine object/evaluator paths with existing or new configuration content.""" + + config_data: Dict[str, Any] = base_config.copy() if base_config else {} + config_data["object"] = str(object_path) + config_data["evaluator"] = str(evaluator_path) + + if allow_edit: + if config_data: + print("\nLet's walk through the current config; press enter to keep any value.") + config_data = edit_mapping(config_data) + else: + print("Skipping per-parameter edits; you can adjust later by editing the saved file.") + + extra_overrides = prompt_overrides() if yes_no("Add quick overrides on top?", default=False) else {} + config_data.update(extra_overrides) + return config_data + + +def main() -> None: + print("🚀 Welcome to the Science CodeEvolve interactive launcher!") + print("You'll be prompted for paths, optional configuration tweaks, and environment tips.") + + print_conda_hint() + print() + + object_path = prompt_path("Path to the object you want to process", must_exist=True) + evaluator_path = prompt_path("Path to the evaluator (script or module)", must_exist=True) + + use_existing_config = yes_no("Do you want to start from an existing config?", default=True) + base_config: Dict[str, Any] | None = None + if use_existing_config: + config_path = prompt_path("Path to existing config file", must_exist=True) + try: + base_config = load_config(config_path) + print("Loaded existing config; we'll keep a backup untouched.") + except Exception as exc: # noqa: BLE001 - we want to show friendly failure + print(f"✖ Could not read config: {exc}") + return + + default_save = config_path + if yes_no("Save edits to a new file so the original stays pristine?", default=True): + default_save = config_path.with_name(f"{config_path.stem}_edited{config_path.suffix}") + save_path = prompt_path( + "Where should we save the updated config?", create_parents=True, default=str(default_save) + ) + else: + base_config = {} + save_path = prompt_path( + "Path to save the new config (e.g., configs/generated_config.yaml)", + must_exist=False, + create_parents=True, + ) + + allow_edit = yes_no("Would you like to fill in each parameter (diameter-by-diameter)?", default=True) + + config_payload = build_config_payload(object_path, evaluator_path, base_config, allow_edit) + + save_config(config_payload, save_path) + print(f"\n💾 Saved config to: {save_path}") + print("All paths have been expanded to absolute locations.") + + print("\nReady for launch! Suggested next steps:") + print(" 1) conda activate codeevolve") + print(f" 2) Point your run command to: {save_path}") + print(" (edit the file manually later if you want more tweaks)") + print("\nThanks for using the launcher—happy experimenting!") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\nInterrupted by user. Bye!") diff --git a/scripts/run.sh b/scripts/run.sh old mode 100644 new mode 100755 index cb1bd1f..840164b --- a/scripts/run.sh +++ b/scripts/run.sh @@ -1,23 +1,196 @@ +#!/usr/bin/env bash # ===--------------------------------------------------------------------------------------===# # -# Part of the CodeEvolve Project, under the Apache License v2.0. -# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. -# SPDX-License-Identifier: Apache-2.0 -# -# ===--------------------------------------------------------------------------------------===# -# -# This file provides a template for executing CodeEvolve in the terminal using bash. +# CodeEvolve Linux runner. +# Fill in your problem name (or pass it as the first argument) and this script +# will point CodeEvolve at the correct input, config, and output folders. # # ===--------------------------------------------------------------------------------------===# +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +REPO_ROOT="$(cd -- "$SCRIPT_DIR/.." && pwd -P)" + +mapfile -t AVAILABLE_PROBLEMS < <(find "$REPO_ROOT/problems" -maxdepth 1 -mindepth 1 -type d -printf '%f\n' | sort) + +if ((${#AVAILABLE_PROBLEMS[@]} == 0)); then + echo "✖ No problems found in $REPO_ROOT/problems" >&2 + exit 1 +fi + +if [[ $# -gt 0 ]]; then + PROBLEM_NAME="$1" +else + echo "Available problems:" + for p in "${AVAILABLE_PROBLEMS[@]}"; do + echo " - $p" + done + DEFAULT_PROBLEM="${AVAILABLE_PROBLEMS[0]}" + read -r -p "Choose problem [${DEFAULT_PROBLEM}]: " PROBLEM_NAME + PROBLEM_NAME="${PROBLEM_NAME:-$DEFAULT_PROBLEM}" +fi + +BASE_DIR="${REPO_ROOT}/problems/${PROBLEM_NAME}" +INPUT_DIR="${BASE_DIR}/input" +CONFIG_DIR="${BASE_DIR}/configs" +CONFIG_PATH="" +REQUESTED_CONFIG=${2:-${CONFIG_CHOICE:-}} +RUN_NAME="${RUN_NAME:-}" # optional env override for output folder naming + +declare -A API_KEYS + +DEFAULT_RUN_NAME=$(date +"%Y%m%d_%H%M%S") +if [[ -z "$RUN_NAME" ]]; then + read -r -p "Run name under experiments/${PROBLEM_NAME} [${DEFAULT_RUN_NAME}]: " RUN_NAME + RUN_NAME="${RUN_NAME:-$DEFAULT_RUN_NAME}" +fi +OUTPUT_DIR="${REPO_ROOT}/experiments/${PROBLEM_NAME}/${RUN_NAME}" +LOAD_CKPT="${LOAD_CKPT:--1}" +CPU_LIST="${CPU_LIST:-}" + +echo "\nOptional: set API keys for this run (stored only in memory)." +while true; do + read -r -p "API key env var name (e.g., OPENAI_API_KEY) [skip]: " API_KEY_NAME + API_KEY_NAME=${API_KEY_NAME:-} + if [[ -z "$API_KEY_NAME" ]]; then + break + fi + read -sr -p "Value for $API_KEY_NAME: " API_KEY_VALUE + echo + if [[ -z "$API_KEY_VALUE" ]]; then + echo "Skipped empty value for $API_KEY_NAME" + continue + fi + API_KEYS["$API_KEY_NAME"]="$API_KEY_VALUE" + export "$API_KEY_NAME"="$API_KEY_VALUE" +done + +if [[ ! -d "$INPUT_DIR" ]]; then + echo "✖ Input directory not found: $INPUT_DIR" >&2 + exit 1 +fi + +if [[ ! -d "$CONFIG_DIR" ]]; then + echo "✖ Config directory not found: $CONFIG_DIR" >&2 + exit 1 +fi + +mapfile -t AVAILABLE_CONFIGS < <(find "$CONFIG_DIR" -maxdepth 1 -type f \( -iname '*.yaml' -o -iname '*.yml' -o -iname '*.json' \) -printf '%f\n' | sort) + +choose_config() { + local choice=${1:-} + if [[ -n "$choice" && "$choice" =~ ^[0-9]+$ ]]; then + local idx=$((choice - 1)) + if ((idx >= 0 && idx < ${#AVAILABLE_CONFIGS[@]})); then + CONFIG_PATH="$CONFIG_DIR/${AVAILABLE_CONFIGS[$idx]}" + return 0 + fi + elif [[ -n "$choice" ]]; then + local candidate="$CONFIG_DIR/$choice" + if [[ -f "$candidate" ]]; then + CONFIG_PATH="$candidate" + return 0 + fi + fi + return 1 +} + +if ((${#AVAILABLE_CONFIGS[@]} > 0)); then + echo "Available configs in $CONFIG_DIR:" + for i in "${!AVAILABLE_CONFIGS[@]}"; do + printf ' [%d] %s\n' "$((i + 1))" "${AVAILABLE_CONFIGS[$i]}" + done + echo " [N] Provide another config file to copy here" + DEFAULT_CHOICE=1 + if [[ -z "$REQUESTED_CONFIG" ]]; then + read -r -p "Choose config [$DEFAULT_CHOICE]: " CONFIG_CHOICE + CONFIG_CHOICE=${CONFIG_CHOICE:-$DEFAULT_CHOICE} + else + CONFIG_CHOICE="$REQUESTED_CONFIG" + echo "Using requested config selector: $CONFIG_CHOICE" + fi + if ! choose_config "$CONFIG_CHOICE"; then + if [[ "${CONFIG_CHOICE,,}" != "n" ]]; then + echo "✖ Invalid choice: $CONFIG_CHOICE" >&2 + exit 1 + fi + fi +fi + +if [[ -z "$CONFIG_PATH" ]]; then + if [[ -n "$REQUESTED_CONFIG" && -f "$REQUESTED_CONFIG" ]]; then + CUSTOM_CONFIG="$REQUESTED_CONFIG" + echo "Copying requested config file: $CUSTOM_CONFIG" + else + read -r -p "Path to config to copy into $CONFIG_DIR: " CUSTOM_CONFIG + fi + if [[ -z "$CUSTOM_CONFIG" ]]; then + echo "✖ No config provided" >&2 + exit 1 + fi + if [[ ! -f "$CUSTOM_CONFIG" ]]; then + echo "✖ Config file not found: $CUSTOM_CONFIG" >&2 + exit 1 + fi + + CUSTOM_CONFIG_ABS=$(python - <<'PY' +import os, sys +path = sys.argv[1] +print(os.path.abspath(os.path.expanduser(path))) +PY +"$CUSTOM_CONFIG") + + DEFAULT_NAME="$(basename -- "$CUSTOM_CONFIG_ABS")" + read -r -p "Save as [$DEFAULT_NAME]: " CUSTOM_NAME + CUSTOM_NAME=${CUSTOM_NAME:-$DEFAULT_NAME} + CONFIG_PATH="$CONFIG_DIR/$CUSTOM_NAME" + cp -f -- "$CUSTOM_CONFIG_ABS" "$CONFIG_PATH" + echo "Copied custom config to: $CONFIG_PATH" +fi + +if [[ ! -f "$CONFIG_PATH" ]]; then + echo "✖ Config file not found: $CONFIG_PATH" >&2 + exit 1 +fi + +mkdir -p "$OUTPUT_DIR" + +if ! command -v codeevolve >/dev/null 2>&1; then + echo "⚠️ 'codeevolve' CLI not found in PATH. Activate your env first: conda activate codeevolve" >&2 + exit 1 +fi + +echo "➡️ Using problem: $PROBLEM_NAME" +echo " Input: $INPUT_DIR" +echo " Config: $CONFIG_PATH" +echo " Output: $OUTPUT_DIR" + +cmd=( + codeevolve + --inpt_dir="$INPUT_DIR" + --cfg_path="$CONFIG_PATH" + --out_dir="$OUTPUT_DIR" + --load_ckpt="$LOAD_CKPT" + --terminal_logging +) + +echo "\nTip: conda activate codeevolve # ensure the environment is ready" -#!/bin/bash +set +e +if [[ -n "$CPU_LIST" ]]; then + echo "Pinning to CPUs: $CPU_LIST" + taskset --cpu-list "$CPU_LIST" "${cmd[@]}" +else + "${cmd[@]}" +fi +status=$? +set -e -PROB_NAME="alphaevolve_math_problems/circle_packing_square/26" -BASE_DIR="problems/${PROB_NAME}" -INPT_DIR="${BASE_DIR}/input/" -CFG_PATH="${BASE_DIR}/configs/config_mp_insp.yaml" -OUT_DIR="experiments/${PROB_NAME}/test/" -LOAD_CKPT=-1 -CPU_LIST="" +if ((${#API_KEYS[@]} > 0)); then + echo "Cleaning up API key variables..." + for key in "${!API_KEYS[@]}"; do + unset "$key" + done +fi -taskset --cpu-list $CPU_LIST codeevolve --inpt_dir=$INPT_DIR --cfg_path=$CFG_PATH --out_dir=$RESULTS_DIR --load_ckpt=$LOAD_CKPT --terminal_logging \ No newline at end of file +exit $status diff --git a/scripts/run_windows.ps1 b/scripts/run_windows.ps1 new file mode 100644 index 0000000..a63d76d --- /dev/null +++ b/scripts/run_windows.ps1 @@ -0,0 +1,184 @@ +<# +CodeEvolve Windows runner. +Fill in your problem name (or pass -ProblemName) and this script will point +CodeEvolve at the correct input, config, and output folders. +#> +param( + [string]$ProblemName = "", + [string]$LoadCkpt = "-1", + [string]$CpuList = "", + [string]$ConfigChoice = "", + [string]$RunName = "" +) + +$RepoRoot = (Resolve-Path (Join-Path $PSScriptRoot ".." )).Path +$ProblemsRoot = Join-Path $RepoRoot "problems" +$AvailableProblems = Get-ChildItem -Path $ProblemsRoot -Directory -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Name | Sort-Object + +if (-not $AvailableProblems) { + Write-Error "No problems found in $ProblemsRoot" + exit 1 +} + +if (-not $ProblemName) { + Write-Host "Available problems:" + foreach ($p in $AvailableProblems) { + Write-Host " - $p" + } + $DefaultProblem = $AvailableProblems[0] + $ProblemName = Read-Host "Choose problem [$DefaultProblem]" + if (-not $ProblemName) { $ProblemName = $DefaultProblem } +} + +$BaseDir = Join-Path $RepoRoot (Join-Path "problems" $ProblemName) +$InputDir = Join-Path $BaseDir "input" +$ConfigDir = Join-Path $BaseDir "configs" +$ConfigPath = "" +$DefaultRunName = (Get-Date).ToString('yyyyMMdd_HHmmss') +if (-not $RunName) { + $RunName = Read-Host "Run name under experiments/$ProblemName [$DefaultRunName]" + if (-not $RunName) { $RunName = $DefaultRunName } +} +$OutputDir = Join-Path $RepoRoot (Join-Path "experiments" (Join-Path $ProblemName $RunName)) +$ApiKeys = @{} + +Write-Host "`nOptional: set API key env vars for this run (stored only in memory)." +while ($true) { + $ApiKeyName = Read-Host "API key env var name (e.g., OPENAI_API_KEY) [press ENTER to skip]" + if (-not $ApiKeyName) { break } + $SecureValue = Read-Host "Value for $ApiKeyName" -AsSecureString + $Ptr = [System.Runtime.InteropServices.Marshal]::SecureStringToGlobalAllocUnicode($SecureValue) + try { + $PlainValue = [System.Runtime.InteropServices.Marshal]::PtrToStringUni($Ptr) + } finally { + [System.Runtime.InteropServices.Marshal]::ZeroFreeGlobalAllocUnicode($Ptr) + } + if (-not $PlainValue) { + Write-Host "Skipped empty value for $ApiKeyName" + continue + } + [System.Environment]::SetEnvironmentVariable($ApiKeyName, $PlainValue, "Process") + $ApiKeys[$ApiKeyName] = $true +} + +if (-not (Test-Path $InputDir)) { + Write-Error "Input directory not found: $InputDir" + exit 1 +} + +if (-not (Test-Path $ConfigDir)) { + Write-Error "Config directory not found: $ConfigDir" + exit 1 +} + +$AvailableConfigs = Get-ChildItem -Path $ConfigDir -File -Include *.yml, *.yaml, *.json -ErrorAction SilentlyContinue | Select-Object -ExpandProperty Name | Sort-Object + +function Set-ConfigFromChoice { + param([string]$Choice) + if ($Choice -match '^[0-9]+$') { + $idx = [int]$Choice - 1 + if ($idx -ge 0 -and $idx -lt $AvailableConfigs.Count) { + $script:ConfigPath = Join-Path $ConfigDir $AvailableConfigs[$idx] + return $true + } + } elseif ($Choice) { + $candidate = Join-Path $ConfigDir $Choice + if (Test-Path $candidate -PathType Leaf) { + $script:ConfigPath = $candidate + return $true + } + } + return $false +} + +if ($AvailableConfigs.Count -gt 0) { + Write-Host "Available configs in ${ConfigDir}:" + for ($i = 0; $i -lt $AvailableConfigs.Count; $i++) { + $slot = $i + 1 + Write-Host " [$slot] $($AvailableConfigs[$i])" + } + Write-Host " [N] Provide another config file to copy here" + $DefaultChoice = "1" + if (-not $ConfigChoice) { + $ConfigChoice = Read-Host "Choose config [$DefaultChoice]" + if (-not $ConfigChoice) { $ConfigChoice = $DefaultChoice } + } else { + Write-Host "Using requested config selector: $ConfigChoice" + } + if (-not (Set-ConfigFromChoice -Choice $ConfigChoice)) { + if ($ConfigChoice.ToLower() -ne "n") { + Write-Error "Invalid choice: $ConfigChoice" + exit 1 + } + } +} + +if (-not $ConfigPath) { + if ($ConfigChoice -and (Test-Path $ConfigChoice -PathType Leaf)) { + $CustomConfig = $ConfigChoice + Write-Host "Copying requested config file: $CustomConfig" + } else { + $CustomConfig = Read-Host "Path to config to copy into $ConfigDir" + } + if (-not $CustomConfig) { + Write-Error "No config provided" + exit 1 + } + if (-not (Test-Path $CustomConfig)) { + Write-Error "Config file not found: $CustomConfig" + exit 1 + } + + $CustomConfigAbs = [System.IO.Path]::GetFullPath((Resolve-Path -LiteralPath $CustomConfig)) + $DefaultName = [System.IO.Path]::GetFileName($CustomConfigAbs) + $CustomName = Read-Host "Save as [$DefaultName]" + if (-not $CustomName) { $CustomName = $DefaultName } + $ConfigPath = Join-Path $ConfigDir $CustomName + Copy-Item -LiteralPath $CustomConfigAbs -Destination $ConfigPath -Force + Write-Host "Copied custom config to: $ConfigPath" +} + +if (-not (Test-Path $ConfigPath)) { + Write-Error "Config file not found: $ConfigPath" + exit 1 +} + +if (-not (Test-Path $OutputDir)) { + New-Item -ItemType Directory -Path $OutputDir -Force | Out-Null +} + +if (-not (Get-Command codeevolve -ErrorAction SilentlyContinue)) { + Write-Error "'codeevolve' CLI not found in PATH. Activate your env first: conda activate codeevolve" + exit 1 +} + +Write-Host "➡️ Using problem: $ProblemName" +Write-Host " Input: $InputDir" +Write-Host " Config: $ConfigPath" +Write-Host " Output: $OutputDir" +Write-Host "`nTip: conda activate codeevolve # ensure the environment is ready" + +$command = @( + "codeevolve", + "--inpt_dir=$InputDir", + "--cfg_path=$ConfigPath", + "--out_dir=$OutputDir", + "--load_ckpt=$LoadCkpt", + "--terminal_logging" +) + +if ($CpuList -ne "") { + Write-Warning "CPU pinning is not set on Windows by default; set $env:OMP_NUM_THREADS or similar if needed." +} + +$process = & $command[0] $command[1..($command.Length-1)] +$status = $LASTEXITCODE + +if ($ApiKeys.Keys.Count -gt 0) { + Write-Host "Cleaning up API key variables..." + foreach ($key in $ApiKeys.Keys) { + [System.Environment]::SetEnvironmentVariable($key, $null, "Process") + } +} + +exit $status diff --git a/scripts/y b/scripts/y new file mode 100644 index 0000000..9ec6391 --- /dev/null +++ b/scripts/y @@ -0,0 +1,80 @@ +{ + "SYS_MSG": "SCENERIUSZ:\nJeste\u015b ekspertem z zakresu fizyki teoretycznej, dynamiki uk\u0142ad\u00f3w nieliniowych oraz modelowania numerycznego czasu.\nTwoj\u0105 misj\u0105 jest ewolucyjne udoskonalanie modu\u0142u Pythona, w kt\u00f3rym **czas jest aktywn\u0105 si\u0142\u0105** nap\u0119dzaj\u0105c\u0105 ewolucj\u0119 stanu uk\u0142adu.\n\nKONTEKST PROBLEMU:\n- **Cel g\u0142\u00f3wny**: Zaimplementowa\u0107 i ewoluowa\u0107 kod (wewn\u0105trz EVOLVE-BLOCK), kt\u00f3ry modeluje \u201eczas jako si\u0142\u0119\u201d\n dzia\u0142aj\u0105c\u0105 na obiekt `SystemState`.\n- **Kluczowa idea**: Czas nie jest tylko parametrem `t`, ale operatorem / polem (`TimeForce`, `EventHorizonForce`, itp.),\n kt\u00f3re aktualizuje stan uk\u0142adu.\n- **Przestrze\u0144 symulacji**: Prosty (np. 1D lub niskowymiarowy) stan fizyczny z eksplityczn\u0105 dynamik\u0105 czasow\u0105\n (np. pozycja, pr\u0119dko\u015b\u0107, entropia, \u201eczas subiektywny\u201d).\n- **Ograniczenia**:\n * Kod musi by\u0107 poprawnym sk\u0142adniowo Pythonem i da\u0107 si\u0119 zaimportowa\u0107.\n * Musi istnie\u0107 wyra\u017any punkt wej\u015bcia (np. funkcja `run()`), kt\u00f3ry wykonuje kr\u00f3tk\u0105 symulacj\u0119.\n * Wewn\u0105trz EVOLVE-BLOCK powinna istnie\u0107 co najmniej jedna jawna abstrakcja si\u0142y czasu\n (np. `TimeForce`, `TemporalDrift`, `EventHorizonForce`).\n * Docstringi i komentarze powinny by\u0107 po **polsku**, obja\u015bniaj\u0105c sens matematyki i metafory czasu.\n * Kod musi pozosta\u0107 \u201eewolwowalny\u201d: wyra\u017any podzia\u0142 na stan, si\u0142y, integratory i obserwator\u00f3w.\n\nZASOBY OBLICZENIOWE I WYTYCZNE IMPLEMENTACYJNE:\n**Podstawowe pakiety**: `math`, `dataclasses`, `typing`, `itertools`, `statistics`, `random`.\n\n**Dodatkowe (opcjonalne) pakiety \u2013 tylko z bezpiecznym fallbackiem**:\n- **Numeryka i wektory**: `numpy`\n- **Wizualizacja w terminalu**: `rich` (tabele, paski post\u0119pu, proste wykresy tekstowe),\n w razie braku \u2013 czyste ASCII.\n- **Narz\u0119dzia naukowe**: `scipy` (np. proste integratory ODE), importowane ostro\u017cnie.\n- **Wydajno\u015b\u0107**: `functools.lru_cache`, prosta memoizacja, lekkie triki numeryczne.\n\nJe\u017celi u\u017cywasz pakiet\u00f3w spoza standardowej biblioteki:\n - importuj je wewn\u0105trz bloku `try/except ImportError`,\n - zapewnij \u015bcie\u017ck\u0119 zapasow\u0105 dzia\u0142aj\u0105c\u0105 wy\u0142\u0105cznie na standardowej bibliotece.\n\nMETRYKI OCENY (WYKORZYSTYWANE PRZEZ EVALUATOR):\n1. **structure_score**: Z\u0142o\u017cono\u015b\u0107 i klarowno\u015b\u0107 architektury klas / funkcji\n (`TimeForce`, integratory, obserwatorzy, itp.).\n2. **physics_coherence**: Sp\u00f3jno\u015b\u0107 fizyczno-metaforyczna \u2013 czy r\u00f3wnania sensownie realizuj\u0105 ide\u0119\n \u201eczas jako si\u0142a\u201d.\n3. **doc_pl_quality**: Jako\u015b\u0107 docstring\u00f3w i komentarzy po polsku\n (zrozumia\u0142o\u015b\u0107 + filozoficzna g\u0142\u0119bia).\n4. **visual_clarity**: Na ile czytelnie wyj\u015bcie w terminalu pokazuje ewolucj\u0119 czasu i stanu.\n5. **stability_score**: Odporno\u015b\u0107 numeryczna (brak NaN, brak niesko\u0144czono\u015bci w typowych ustawieniach).\n\nWYMAGANIA TECHNICZNE:\n- **Deterministyczno\u015b\u0107**: Je\u017celi u\u017cywasz losowo\u015bci (np. losowe warunki pocz\u0105tkowe),\n ustaw ziarno RNG (np. `random.seed(42)`) wewn\u0105trz EVOLVE-BLOCK.\n- **Obs\u0142uga b\u0142\u0119d\u00f3w**: Chro\u0144 si\u0119 przed dzieleniem przez zero, przepe\u0142nieniem oraz osobliwo\u015bciami\n w pobli\u017cu \u201ehoryzontu zdarze\u0144\u201d.\n- **Ewolwowalno\u015b\u0107**:\n * Utrzymuj EVOLVE-BLOCK skupiony na logice fizycznej (si\u0142y, integratory, obserwatorzy),\n bez zb\u0119dnych efekt\u00f3w ubocznych.\n * Unikaj kruchych globali; preferuj przekazywanie parametr\u00f3w / stanu.\n- **Wizualizacja w terminalu**:\n * Zapewnij przynajmniej jedn\u0105 \u015bcie\u017ck\u0119, kt\u00f3ra wypisuje do terminala kr\u00f3tk\u0105 histori\u0119 ewolucji stanu\n (np. kilka\u2013kilkadziesi\u0105t krok\u00f3w).\n * Preferuj kompaktowe wizualizacje (paski, proste wykresy tekstowe, symbole) dzia\u0142aj\u0105ce w czystym tek\u015bcie.\n\n**Zalecane wzorce implementacyjne**:\n - **Architektura warstwowa**:\n * `SystemState`: przechowuje stan (np. `t`, pozycj\u0119, pr\u0119dko\u015b\u0107, entropi\u0119, \u201eczas subiektywny\u201d).\n * `TimeForce` i podklasy: aktualizuj\u0105 stan na podstawie `dt` oraz parametr\u00f3w fizycznych / metaforycznych.\n * `Integrator`: strategia ca\u0142kowania (np. prosty Euler, z mo\u017cliwo\u015bci\u0105 rozbudowy).\n * `Observer`: rejestruje trajektorie, liczy entropi\u0119, mierzy \u201ep\u0142yni\u0119cie\u201d czasu.\n - **Modularno\u015b\u0107**:\n * Oddziel logik\u0119 fizyki od I/O oraz od kodu odpowiedzialnego za wizualizacj\u0119.\n * Utrzymuj proste API, np. `run_simulation(steps: int) -> lista_stan\u00f3w`.\n - **Haki czasowe**:\n * Pozw\u00f3l, aby `dt` by\u0142o dynamiczne \u2013 mo\u017ce zale\u017ce\u0107 od stanu, odleg\u0142o\u015bci od horyzontu zdarze\u0144,\n poziomu entropii lub \u201enapi\u0119cia\u201d w uk\u0142adzie.\n * Zaprojektuj miejsce na odwr\u00f3cenie strza\u0142ki czasu (np. w klasie `EventHorizonForce`).\n\n UWAGI MATEMATYCZNE:\n - **Podstawowa dynamika**:\n * Standardowa aktualizacja czasu: `t_{n+1} = t_n + dt * intensity`.\n * Rozszerzenie na stan: `x_{n+1} = x_n + f(t, x) * dt`, gdzie `f` mo\u017ce zale\u017ce\u0107 od si\u0142y czasu.\n - **Czas subiektywny vs kosmiczny**:\n * Wprowad\u017a `\u03c4` jako \u201eczas odczuwany\u201d, z prost\u0105 relacj\u0105: `d\u03c4 = \u03b3(t, x) * dt`,\n gdzie `0 < \u03b3 \u2264 1` spowalnia lokalne odczuwanie czasu.\n - **Horyzont zdarze\u0144**:\n * W pobli\u017cu promienia `radius` mo\u017cesz modyfikowa\u0107 znak lub skal\u0119 `dt`.\n * Zamiast dzieli\u0107 przez zero, stosuj `max(epsilon, distance)` z ma\u0142ym `epsilon`.\n - **Entropia i strza\u0142ka czasu**:\n * Zdefiniuj funkcj\u0119 entropii `S(t, x)` i staraj si\u0119, aby w typowych scenariuszach\n ros\u0142a wraz z |t|.\n * Pozostaw jednak mo\u017cliwo\u015b\u0107 eksperymentowania z lokalnym spadkiem entropii\n w regionach \u201eodwr\u00f3conego czasu\u201d.\n\n STRATEGIE ALGORYTMICZNE, KT\u00d3RE WARTO ROZWA\u017bY\u0106:\n - **Klasy si\u0142 czasowych**:\n * `TemporalDrift`: liniowe \u201epchni\u0119cie\u201d stanu jak sta\u0142y wiatr czasu.\n * `CurvedTimeField`: nieliniowe przyspieszanie / hamowanie czasu w zale\u017cno\u015bci od po\u0142o\u017cenia.\n * `EventHorizonForce`: obszar, gdzie `dt` zmienia kierunek, maleje do zera albo gwa\u0142townie si\u0119 deformuje.\n - **Integratory**:\n * Zaczynaj od prostego schematu Eulera, ale zostaw interfejs na bardziej zaawansowane metody\n (np. ulepszony krok adaptacyjny).\n - **Wizualizacja w terminalu**:\n * W ka\u017cdej iteracji wypisuj kr\u00f3tk\u0105 lini\u0119 zawieraj\u0105c\u0105 `t`, wybrane komponenty stanu\n oraz prosty pasek lub symboliczny wykres (np. `t=0.30 |\u2588\u2588\u2588-----|`).\n * Je\u017celi dost\u0119pny jest `rich`, u\u017cyj tabel lub pask\u00f3w post\u0119pu do pokazywania trajektorii.\n - **Przygotowanie pod ewolucj\u0119**:\n * Projektuj r\u00f3wnania tak, aby ma\u0142e mutacje (zmiana funkcji `f`, inne parametry si\u0142)\n dawa\u0142y zauwa\u017calnie r\u00f3\u017cne, ale nadal stabilne zachowania.\n * Nie usuwaj kluczowych klas (np. `TimeForce`); lepiej rozszerzaj ich API.\n\n RAMA WALIDACYJNA (DLA EVALUATORA):\n - **Sprawdzenie poprawno\u015bci**:\n * Uruchom kr\u00f3tk\u0105 symulacj\u0119 (np. 10\u201350 krok\u00f3w) i upewnij si\u0119, \u017ce `t` oraz inne wielko\u015bci\n pozostaj\u0105 sko\u0144czone i dobrze zdefiniowane.\n * Funkcja `run()` powinna zwraca\u0107 prost\u0105 struktur\u0119 (np. s\u0142ownik lub list\u0119 s\u0142ownik\u00f3w)\n nadaj\u0105c\u0105 si\u0119 do analizy.\n - **Testy stabilno\u015bci**:\n * Przetestuj r\u00f3\u017cne warto\u015bci `dt` (mniejsze i wi\u0119ksze) i obserwuj, czy uk\u0142ad nie \u201ewybucha\u201d.\n * Przetestuj par\u0119 r\u00f3\u017cnych warunk\u00f3w pocz\u0105tkowych, aby unikn\u0105\u0107 kruchych za\u0142o\u017ce\u0144.\n - **Inspekcja wizualna**:\n * Wyj\u015bcie w terminalu powinno w przejrzysty spos\u00f3b sugerowa\u0107 \u201ep\u0142yni\u0119cie\u201d czasu\n oraz g\u0142\u00f3wne zmiany w stanie uk\u0142adu.\n - **Regresja**:\n * Nowsze wersje kodu nie powinny niszczy\u0107 najprostszych scenariuszy\n (np. liniowego wzrostu `t` przy sta\u0142ej sile czasu).\n\n# PROMPT-BLOCK-START\n\n OPTIMIZATION STRATEGIES TO CONSIDER:\n TODO\n\n GEOMETRIC INSIGHTS & MATHEMATICAL FOUNDATIONS:\n TODO\n\n **Recommended implementation patterns:**\n TODO\n\n VALIDATION FRAMEWORK:\n TODO\n\n# PROMPT-BLOCK-END\n", + "CODEBASE_PATH": "src/", + "INIT_FILE_DATA": { + "filename": "initial_program.py", + "language": "python" + }, + "EVAL_FILE_NAME": "evaluate.py", + "RESOURCES": { + "MAX_MEM_BYTES": 1000000000, + "MEM_CHECK_INTERVAL_S": 0.1 + }, + "EVOLVE_CONFIG": { + "fitness_key": "combined_score", + "num_epochs": 50, + "ckpt": 10, + "max_size": 100, + "init_pop": 6, + "exploration_rate": 0.3, + "selection_policy": "roulette", + "selection_kwargs": { + "roulette_by_rank": true + }, + "early_stopping_rounds": 100, + "num_islands": 4, + "migration_topology": "ring", + "migration_interval": 30, + "migration_rate": 0.1, + "meta_prompting": true, + "use_embedding": true, + "use_map_elites": true, + "num_inspirations": 3, + "max_chat_depth": 3 + }, + "ENSEMBLE": [ + { + "model_name": "qwen3-coder:480b-cloud", + "temp": 0.85, + "top_p": 0.95, + "retries": 3, + "weight": 0.8, + "verify_ssl": false + }, + { + "model_name": "rnj-1:8b", + "temp": 0.85, + "top_p": 0.95, + "retries": 3, + "weight": 0.2, + "verify_ssl": false + } + ], + "SAMPLER_AUX_LM": { + "model_name": "qwen2.5-coder:7b", + "temp": 0.7, + "top_p": 0.95, + "retries": 3, + "weight": 1, + "verify_ssl": false + }, + "EMBEDDING": { + "model_name": "embeddinggemma:300m", + "retries": 3, + "verify_ssl": false + }, + "MAP_ELITES": { + "elite_map_type": "grid", + "features": [ + { + "name": "feat1", + "min_val": 0, + "max_val": 1, + "num_bins": 10 + } + ] + }, + "object": "C:\\Users\\Michal\\Documents\\GitHub\\science-codeevolve\\problems\\F_time\\input\\src", + "evaluator": "C:\\Users\\Michal\\Documents\\GitHub\\science-codeevolve\\problems\\F_time\\input\\evaluate.py", + "t": "" +} \ No newline at end of file diff --git a/src/codeevolve/adversarial.py b/src/codeevolve/adversarial.py new file mode 100644 index 0000000..1d894f6 --- /dev/null +++ b/src/codeevolve/adversarial.py @@ -0,0 +1,164 @@ +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# ===--------------------------------------------------------------------------------------===# +# +"""Adversarial multi-population utilities for CodeEvolve.""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional + +from codeevolve.database import Program + + +@dataclass +class AdversarialConfig: + """Configuration block for adversarial multi-population evolution.""" + + enabled: bool = False + teams: List[str] = field(default_factory=lambda: ["red", "blue"]) + cross_eval_interval: int = 1 + opponents_per_eval: int = 2 + fitness_metric: str = "win_rate" # supported: "win_rate", "elo", "hybrid" + base_fitness_weight: float = 0.2 + elo_k: float = 32.0 + initial_rating: float = 1000.0 + alternating_phases: bool = False + + +@dataclass +class CompetitiveResult: + """Summary of a cross-population evaluation round.""" + + win_rate: float + matches: int + rating: float + fitness: float + + +def assign_team(island_id: int, teams: List[str]) -> str: + """Assigns an island to a team based on its index.""" + + if not teams: + return "default" + return teams[island_id % len(teams)] + + +def should_cross_evaluate(epoch: int, team: str, config: AdversarialConfig) -> bool: + """Determines whether to run a cross-population evaluation this epoch.""" + + if not config.enabled: + return False + + interval = max(1, config.cross_eval_interval) + if epoch % interval != 0: + return False + + if not config.alternating_phases: + return True + + team_index: int = config.teams.index(team) + return (epoch // interval) % len(config.teams) == team_index + + +def update_team_registry( + registry: Optional[Dict[str, Program]], team: str, candidate: Program +) -> None: + """Updates the shared registry with the best program for a team.""" + + if registry is None: + return + + best_prog: Optional[Program] = registry.get(team, None) + if best_prog is None or candidate.fitness > best_prog.fitness: + registry[team] = candidate + + +def sample_opponents( + registry: Optional[Dict[str, Program]], + team: str, + teams: List[str], + max_opponents: int, + random_state, +) -> List[Program]: + """Samples opponents from rival teams registered in the shared pool.""" + + if registry is None: + return [] + + rival_programs: List[Program] = [] + for rival_team in teams: + if rival_team == team: + continue + opponent: Optional[Program] = registry.get(rival_team, None) + if opponent is not None: + rival_programs.append(opponent) + + random_state.shuffle(rival_programs) + return rival_programs[:max_opponents] + + +def _pair_score(candidate_score: float, opponent_score: float) -> float: + """Returns the outcome score for Elo: 1 win, 0.5 draw, 0 loss.""" + + if candidate_score > opponent_score: + return 1.0 + if candidate_score < opponent_score: + return 0.0 + return 0.5 + + +def _elo_update(rating: float, opponent_rating: float, score: float, k: float) -> float: + """Updates an Elo rating given a single match outcome.""" + + expected: float = 1.0 / (1 + 10 ** ((opponent_rating - rating) / 400)) + return rating + k * (score - expected) + + +def compute_competitive_result( + candidate: Program, + opponents: List[Program], + base_fitness_key: str, + config: AdversarialConfig, +) -> CompetitiveResult: + """Computes win-rate and Elo-based fitness against a set of opponents.""" + + if not opponents: + return CompetitiveResult( + win_rate=0.0, + matches=0, + rating=candidate.rating, + fitness=candidate.fitness, + ) + + wins: int = 0 + draws: int = 0 + rating: float = candidate.rating if candidate.rating is not None else config.initial_rating + + candidate_score: float = candidate.eval_metrics.get(base_fitness_key, candidate.fitness) + for opponent in opponents: + opponent_score: float = opponent.eval_metrics.get(base_fitness_key, opponent.fitness) + score: float = _pair_score(candidate_score, opponent_score) + wins += score == 1.0 + draws += score == 0.5 + opp_rating: float = opponent.rating if opponent.rating is not None else config.initial_rating + rating = _elo_update(rating, opp_rating, score, config.elo_k) + + matches: int = len(opponents) + win_rate: float = (wins + 0.5 * draws) / matches + + if config.fitness_metric == "elo": + fitness: float = rating + elif config.fitness_metric == "hybrid": + fitness = config.base_fitness_weight * candidate_score + (1 - config.base_fitness_weight) * win_rate + else: # default to pure win rate + fitness = win_rate + + return CompetitiveResult( + win_rate=win_rate, + matches=matches, + rating=rating, + fitness=fitness, + ) diff --git a/src/codeevolve/agents.py b/src/codeevolve/agents.py new file mode 100644 index 0000000..a2cf261 --- /dev/null +++ b/src/codeevolve/agents.py @@ -0,0 +1,105 @@ +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# +# +# This file implements optional agent utilities for CodeEvolve. +# +# ===--------------------------------------------------------------------------------------===# + +import logging +from dataclasses import dataclass, field +from typing import List, Optional, Tuple + +from codeevolve.database import Program +from codeevolve.lm import OpenAILM +from codeevolve.prompt.sampler import format_prog_msg +from codeevolve.prompt.template import ( + NOVEL_AGENT_SYSTEM_PROMPT, + NOVEL_AGENT_USER_TEMPLATE, +) + + +@dataclass +class NovelAgent: + """LLM-based agent focused on injecting novelty into prompt evolution. + + The agent is designed to occasionally replace the standard meta-prompting + step with a more exploratory proposal that intentionally searches for new + algorithmic directions. It still returns a SEARCH/REPLACE diff compatible + with the existing ``apply_diff_with_fallback`` utility, so it can be + slotted directly into the current evolution loop without changing the + downstream mechanics. + + Attributes: + lm: Configured language model used to author the novel prompt diff. + exploration_rate: Probability of invoking the agent when exploration is + enabled for the epoch. + max_inspirations: Maximum number of inspiration programs to include in + the generated context. + logger: Logger instance used for tracing agent activity. + """ + + lm: OpenAILM + exploration_rate: float = 0.2 + max_inspirations: int = 2 + logger: logging.Logger = field(default_factory=lambda: logging.getLogger(__name__)) + + def should_activate(self, random_state) -> bool: + """Determines whether the agent should run in the current epoch.""" + + return random_state.uniform(0, 1) <= self.exploration_rate + + def _format_inspirations(self, inspirations: Optional[List[Program]]) -> str: + """Formats inspiration programs into a readable block for the prompt.""" + + if not inspirations: + return "No inspiration programs supplied." + + insp_blocks: List[str] = [] + for idx, inspiration in enumerate(inspirations[: self.max_inspirations]): + prog_msg: str = inspiration.prog_msg + if prog_msg is None: + prog_msg = format_prog_msg(prog=inspiration) + insp_blocks.append(f"----------INSPIRATION {idx + 1}----------\n{prog_msg}") + + return "\n".join(insp_blocks) + + async def propose_prompt( + self, prompt: Program, prog: Program, inspirations: Optional[List[Program]] + ) -> Tuple[str, int, int]: + """Generates a novel prompt diff emphasizing exploration.""" + + prog_msg: str = prog.prog_msg + if prog_msg is None: + prog_msg = format_prog_msg(prog=prog) + + content: str = NOVEL_AGENT_USER_TEMPLATE.format( + prompt=prompt.code, + program=prog_msg, + inspirations=self._format_inspirations(inspirations), + ) + + messages = [ + {"role": "system", "content": NOVEL_AGENT_SYSTEM_PROMPT}, + {"role": "user", "content": content}, + ] + + self.logger.info( + "Attempting to run novel prompt proposal using %s...", self.lm.model_name + ) + + response, prompt_tok, compl_tok = await self.lm.generate(messages) + + self.logger.info( + ( + "Novel agent response received (%s prompt tok, %s completion tok)." + ), + prompt_tok, + compl_tok, + ) + + return response, prompt_tok, compl_tok diff --git a/src/codeevolve/cli.py b/src/codeevolve/cli.py index 9f5e550..34a74d8 100644 --- a/src/codeevolve/cli.py +++ b/src/codeevolve/cli.py @@ -10,33 +10,38 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Any, Dict, List, Tuple, Optional - import argparse import asyncio +import ctypes import multiprocessing as mp import multiprocessing.sharedctypes as mpsct import multiprocessing.synchronize as mps -import ctypes import os -from pathlib import Path import re import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple import yaml +from codeevolve.evolution import codeevolve from codeevolve.islands import ( - PipeEdge, - IslandData, - GlobalData, GlobalBestProg, + GlobalData, + IslandData, + PipeEdge, get_edge_list, get_pipe_graph, ) -from codeevolve.evolution import codeevolve from codeevolve.utils.logging_utils import cli_logger +def async_run_evolve( + run_args: Dict[str, Any], isl_data: IslandData, global_data: GlobalData +) -> None: + asyncio.run(codeevolve(run_args, isl_data, global_data)) + + def parse_args() -> argparse.Namespace: """Parses command-line arguments for CodeEvolve execution. @@ -112,7 +117,7 @@ def setup_isl_args(args: Dict[str, Any], num_islands: int) -> Dict[int, Dict[str latest_common_ckpt = max( int(re.search(r"ckpt_(\d+)\.pkl$", f).group(1)) for f in common_ckpts ) - if args["load_ckpt"] and f"ckpt_{args["load_ckpt"]}.pkl" in common_ckpts: + if args["load_ckpt"] and f"ckpt_{args['load_ckpt']}.pkl" in common_ckpts: global_ckpt = args["load_ckpt"] print(f"Loading common checkpoint: {global_ckpt}") else: @@ -143,9 +148,6 @@ def main(): input/output paths, configuration, and execution settings. """ - def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_data: GlobalData): - asyncio.run(codeevolve(run_args, isl_data, global_data)) - # args args: Dict[str, Any] = vars(parse_args()) args["inpt_dir"] = Path(args["inpt_dir"]) @@ -181,6 +183,7 @@ def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_dat sys.exit(1) evolve_config: Dict[str, Any] = config["EVOLVE_CONFIG"] + adversarial_cfg: Dict[str, Any] = config.get("ADVERSARIAL", {}) isl2args: Dict[int, Dict[str, Any]] = setup_isl_args(args, evolve_config["num_islands"]) # synchronization primitives @@ -195,6 +198,9 @@ def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_dat barrier: mps.Barrier = mp.Barrier(parties=evolve_config["num_islands"]) log_queue: mp.Queue = mp.Queue() + manager = mp.Manager() + team_registry = manager.dict() if adversarial_cfg.get("enabled", False) else None + global_data: GlobalData = GlobalData( best_sol=global_best_sol, early_stop_counter=early_stop_counter, @@ -202,6 +208,8 @@ def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_dat lock=lock, barrier=barrier, log_queue=log_queue, + team_registry=team_registry, + adversarial_cfg=adversarial_cfg, ) # islands @@ -229,15 +237,18 @@ def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_dat log_formatter_daemon.start() # spawn processes + teams: List[str] = adversarial_cfg.get("teams", ["red", "blue"]) + for island_id in range(evolve_config["num_islands"]): isl_data: IslandData = IslandData( id=island_id, in_neigh=in_adj[island_id] if in_adj else None, out_neigh=out_adj[island_id] if out_adj else None, + team=teams[island_id % len(teams)] if adversarial_cfg.get("enabled", False) else None, ) process = mp.Process( - target=_async_run_evolve, args=(isl2args[island_id], isl_data, global_data) + target=async_run_evolve, args=(isl2args[island_id], isl_data, global_data) ) processes.append(process) process.start() @@ -245,6 +256,16 @@ def _async_run_evolve(run_args: Dict[str, Any], isl_data: IslandData, global_dat for process in processes: process.join() + # If any island process crashed, surface that as a non-zero exit. + # Otherwise the CLI can incorrectly report success even though nothing ran. + bad_exitcodes = [ + (i, p.exitcode) for i, p in enumerate(processes) if p.exitcode not in (0, None) + ] + if bad_exitcodes: + for idx, code in bad_exitcodes: + print(f"Island process {idx} exited with code {code}.") + return 1 + if args.get("terminal_logging", False): # kill log daemon log_queue.put(None) diff --git a/src/codeevolve/climate.py b/src/codeevolve/climate.py new file mode 100644 index 0000000..1eb2a08 --- /dev/null +++ b/src/codeevolve/climate.py @@ -0,0 +1,159 @@ +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# ===--------------------------------------------------------------------------------------===# +# +"""Seasonal climate utilities for thermal resilience scoring.""" + +import ast +import random +from dataclasses import dataclass, field +from typing import Dict, List, Set, Tuple + +DEFAULT_FUNCTION_POOL: List[str] = [ + "len", + "sum", + "min", + "max", + "sorted", + "enumerate", + "zip", + "range", + "map", + "filter", +] + + +@dataclass +class SeasonProfile: + """Represents the active climate season for an epoch.""" + + name: str + climate: str # "hot" or "cold" + index: int + + +@dataclass +class ClimateConfig: + """Configuration block for climate-based fitness adjustments.""" + + enabled: bool = False + seasons: List[str] = field(default_factory=lambda: ["perpetual"]) + season_length: int = 5 + function_pool: List[str] = field(default_factory=lambda: list(DEFAULT_FUNCTION_POOL)) + hot_fraction: float = 0.5 + survival_weight: float = 0.2 + neutral_baseline: float = 0.5 + seed: int | None = None + + +@dataclass +class ThermalEvaluation: + """Computed thermal resilience statistics for a program.""" + + season: SeasonProfile + hot_traits: Set[str] + cold_traits: Set[str] + hot_hits: int + cold_hits: int + total_hits: int + alignment: float + survival_chance: float + fitness_multiplier: float + + +def _clamp(value: float, lower: float = 0.0, upper: float = 1.0) -> float: + return max(lower, min(upper, value)) + + +def season_profile(epoch: int, config: ClimateConfig) -> SeasonProfile: + """Returns the active season and climate for the given epoch.""" + + season_span: int = max(1, config.season_length) + season_idx: int = ((max(epoch, 1) - 1) // season_span) % max(1, len(config.seasons)) + climate: str = "hot" if season_idx % 2 == 0 else "cold" + return SeasonProfile(name=config.seasons[season_idx], climate=climate, index=season_idx) + + +def assign_thermal_traits( + season: SeasonProfile, config: ClimateConfig, random_state: random.Random +) -> Tuple[Set[str], Set[str]]: + """Assigns functions to hot or cold traits for the active season.""" + + trait_rng = random.Random() + seed_base = config.seed + if seed_base is None: + seed_base = random_state.randint(0, 10_000_000) + trait_rng.seed(seed_base + season.index) + + pool: List[str] = list(dict.fromkeys(config.function_pool)) + trait_rng.shuffle(pool) + hot_cutoff: int = max(1, int(len(pool) * _clamp(config.hot_fraction))) + hot_traits: Set[str] = set(pool[:hot_cutoff]) + cold_traits: Set[str] = set(pool[hot_cutoff:]) + return hot_traits, cold_traits + + +def _count_call_names(code: str, pool: Set[str]) -> Dict[str, int]: + """Counts simple function calls in code that match the pool.""" + + counts: Dict[str, int] = {name: 0 for name in pool} + try: + tree = ast.parse(code) + except SyntaxError: + return counts + + for node in ast.walk(tree): + if isinstance(node, ast.Call): + func = node.func + name: str | None = None + if isinstance(func, ast.Name): + name = func.id + elif isinstance(func, ast.Attribute): + name = func.attr + + if name in counts: + counts[name] += 1 + return counts + + +def evaluate_heat_resilience( + code: str, epoch: int, config: ClimateConfig, random_state: random.Random +) -> ThermalEvaluation: + """Evaluates how well a program aligns with the current climate season.""" + + season = season_profile(epoch, config) + hot_traits, cold_traits = assign_thermal_traits(season, config, random_state) + + pool: Set[str] = set(config.function_pool) + counts: Dict[str, int] = _count_call_names(code=code, pool=pool) + + hot_hits: int = sum(counts[name] for name in hot_traits) + cold_hits: int = sum(counts[name] for name in cold_traits) + total_hits: int = hot_hits + cold_hits + + if total_hits == 0: + alignment = config.neutral_baseline + elif season.climate == "cold": + alignment = cold_hits / total_hits + else: + alignment = hot_hits / total_hits + + alignment = _clamp(alignment) + survival_chance: float = alignment if total_hits > 0 else config.neutral_baseline + fitness_multiplier: float = 1 + config.survival_weight * (survival_chance - config.neutral_baseline) + + return ThermalEvaluation( + season=season, + hot_traits=hot_traits, + cold_traits=cold_traits, + hot_hits=hot_hits, + cold_hits=cold_hits, + total_hits=total_hits, + alignment=alignment, + survival_chance=survival_chance, + fitness_multiplier=fitness_multiplier, + ) + diff --git a/src/codeevolve/database.py b/src/codeevolve/database.py index 8b0ad92..5577a3b 100644 --- a/src/codeevolve/database.py +++ b/src/codeevolve/database.py @@ -10,16 +10,16 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Dict, List, Optional, Callable, Tuple - -from dataclasses import dataclass, field -from abc import ABC, abstractmethod -import random +import bisect import math +import random +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Callable, Dict, List, Optional, Tuple import numpy as np -from codeevolve.utils.cvt_utils import cvt, closest_centroid_idx +from codeevolve.utils.cvt_utils import closest_centroid_idx, cvt @dataclass @@ -81,6 +81,10 @@ class Program: embedding: Optional[List[float]] = None + # Adversarial evaluation metadata + rating: float = 1000.0 + matches: int = 0 + def __repr__(self) -> str: """Returns a string representation of the Program instance. @@ -325,6 +329,9 @@ def __init__( self._pids_pool_cache: List[str] = [] self._rank_cache: Dict[str, int] = {} + # Sorted list of (-fitness, pid) tuples for efficient insertion + # Using negative fitness to achieve descending order with bisect (which works on ascending lists) + self._sorted_pids: List[Tuple[float, str]] = [] self.elite_map_type: Optional[str] = elite_map_type.lower() if elite_map_type else None self.elite_map: Optional[EliteMap] = None @@ -365,15 +372,15 @@ def __repr__(self) -> str: return db_str # program management - ## TODO: improve insertion logic if we are to make more insertions per epoch - # (currently each insertion takes NlogN worst case, we can use bisect or - # heapq to improve this). def _update_caches(self) -> None: """Updates internal caches for programs and their fitness rankings. This method rebuilds the program cache, sorts programs by fitness, updates rank mappings, and identifies best and worst programs. + + Note: This is used for full rebuilds (e.g., after migrations). For single + insertions, use _incremental_update_cache() for better O(log N) performance. """ if getattr(self, "map", None) is not None: self._pids_pool_cache = self.elite_map.get_elite_ids() @@ -382,12 +389,47 @@ def _update_caches(self) -> None: if not self._pids_pool_cache: self._rank_cache = {} + self._sorted_pids = [] + return + + # Build sorted list of (fitness, pid) tuples in descending order + # Using negative fitness for bisect (which works with ascending order) + self._sorted_pids = sorted( + [(-self.programs[pid].fitness, pid) for pid in self._pids_pool_cache] + ) + self._rank_cache = {pid: i for i, (_, pid) in enumerate(self._sorted_pids)} + + def _incremental_update_cache(self, prog: Program) -> None: + """Incrementally updates caches when adding a single program. + + This method uses binary search (bisect) to insert the new program into + the sorted list in O(log N) time, avoiding the O(N log N) full sort. + + Note: This uses bisect_right with a key function (requires Python 3.10+), + which is consistent with the project's minimum Python version requirement. + + Args: + prog: The newly added program to insert into caches. + """ + if not self.is_alive.get(prog.id, False): return - desc_pids: List[str] = sorted( - self._pids_pool_cache, key=lambda pid: self.programs[pid].fitness, reverse=True + # Insert into sorted list using bisect (negative fitness for descending order) + # Use bisect_right to maintain stable ordering (newer programs with same fitness go after older ones) + # The key function extracts fitness values for comparison, avoiding temporary list creation + neg_fitness = -prog.fitness + insertion_point = bisect.bisect_right( + self._sorted_pids, (neg_fitness, ""), key=lambda x: x[0] ) - self._rank_cache = {pid: i for i, pid in enumerate(desc_pids)} + self._sorted_pids.insert(insertion_point, (neg_fitness, prog.id)) + + # Update pool cache + self._pids_pool_cache.insert(insertion_point, prog.id) + + # Update ranks for affected programs (only those at or after insertion point) + for i in range(insertion_point, len(self._sorted_pids)): + _, pid = self._sorted_pids[i] + self._rank_cache[pid] = i def add(self, prog: Program) -> None: """Adds a program to the database. @@ -427,7 +469,12 @@ def add(self, prog: Program) -> None: ): self.worst_prog_id = prog.id - self._update_caches() + # Use incremental update for better performance when adding single programs + # Fall back to full update for MAP-Elites mode or when caches are empty + if self.elite_map is not None or not self._sorted_pids: + self._update_caches() + else: + self._incremental_update_cache(prog) # parent selection diff --git a/src/codeevolve/evaluator.py b/src/codeevolve/evaluator.py index a275396..018a18b 100644 --- a/src/codeevolve/evaluator.py +++ b/src/codeevolve/evaluator.py @@ -10,20 +10,30 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Optional, Dict -import tempfile +import asyncio +import concurrent.futures +import json import logging +import pathlib +import shutil import subprocess +import sys +import tempfile import threading -import json import time +from typing import Dict, Optional + import psutil -import pathlib -import shutil -import sys + from codeevolve.database import Program -# TODO: better sandboxing (e.g. firejail) +# NOTE: For enhanced security in production environments, consider implementing +# additional sandboxing mechanisms such as: +# - Firejail: Linux namespace-based sandboxing tool +# - Docker containers: Isolated containerized execution +# - systemd-nspawn: Lightweight container manager +# - seccomp: Linux system call filtering +# Current implementation uses subprocess isolation with resource limits (memory, time) def mem_monitor( @@ -76,6 +86,7 @@ def __init__( max_mem_b: Optional[int], mem_check_interval_s: Optional[float], logger: Optional[logging.Logger] = None, + max_output_size: Optional[int] = None, ): """Initializes the evaluator with execution parameters and resource limits. @@ -87,12 +98,16 @@ def __init__( max_mem_b: Maximum memory usage in bytes. If None, no memory limit is enforced. mem_check_interval_s: Interval for memory usage checks in seconds. logger: Logger instance for logging evaluation activities. + max_output_size: Maximum size in characters for stdout/stderr storage. + If None, output is not stored in the Program object (default behavior). + If set, output will be truncated to this size. """ self.eval_path: pathlib.Path | str = eval_path self.cwd: Optional[pathlib.Path | str] = cwd self.timeout_s: int = timeout_s self.max_mem_b: Optional[int] = max_mem_b self.mem_check_interval_s: Optional[float] = mem_check_interval_s + self.max_output_size: Optional[int] = max_output_size self.language2extension = { "python": ".py", "javascript": ".js", @@ -155,12 +170,12 @@ def execute(self, prog: Program) -> None: # we copy cwd to temp and pass this temp directory as # the cwd for the program being executed - tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory(delete=False) + tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory() temp_cwd: Optional[tempfile.TemporaryDirectory] = None temp_cwd_dir: Optional[tempfile.TemporaryDirectory] = None if self.cwd: - temp_cwd_dir = tempfile.TemporaryDirectory(delete=False) + temp_cwd_dir = tempfile.TemporaryDirectory() temp_cwd = temp_cwd_dir.name try: shutil.copytree(self.cwd, temp_cwd, dirs_exist_ok=True) @@ -261,6 +276,51 @@ def execute(self, prog: Program) -> None: prog.error = error prog.eval_metrics = eval_metrics - # TODO: figure a good way of using stdout and warning, they might be really big - # prog.output = stdout - # prog.warning = warning + # Optionally store stdout and warning with size limits + if self.max_output_size is not None: + prog.output = stdout[: self.max_output_size] if stdout else None + # warning may be None if there were no warnings + prog.warning = warning[: self.max_output_size] if warning else None + else: + # By default, don't store output to avoid memory issues with large outputs + prog.output = None + prog.warning = None + + async def evaluate_batch( + self, progs: list[Program], max_workers: Optional[int] = None + ) -> list[Program]: + """Evaluates a batch of programs concurrently. + + This helper uses a thread pool to dispatch multiple ``execute`` calls in + parallel. Because program execution happens in subprocesses, threads are + sufficient to unlock parallelism without incurring the pickling + overhead required by process-based pools. + + Args: + progs: List of :class:`Program` instances to evaluate. Each program + is updated in place with its execution results. + max_workers: Optional override for the maximum number of concurrent + evaluations. If not provided, it defaults to the smaller of the + available logical CPUs and the batch size. + + Returns: + The list of input programs after evaluation. + """ + + if not progs: + return [] + + logical_cpus: int = psutil.cpu_count(logical=True) or 1 + worker_count: int = max_workers or min(len(progs), logical_cpus) + self.logger.info( + "Evaluating %d programs in parallel with %d workers...", + len(progs), + worker_count, + ) + + loop = asyncio.get_running_loop() + with concurrent.futures.ThreadPoolExecutor(max_workers=worker_count) as executor: + tasks = [loop.run_in_executor(executor, self.execute, prog) for prog in progs] + await asyncio.gather(*tasks) + + return progs diff --git a/src/codeevolve/evolution.py b/src/codeevolve/evolution.py index b21e2a7..f3da511 100644 --- a/src/codeevolve/evolution.py +++ b/src/codeevolve/evolution.py @@ -10,28 +10,38 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Any, Dict, List, Optional -from uuid import uuid4 import logging from pathlib import Path +from typing import Any, Dict, List, Optional +from uuid import uuid4 -import yaml import numpy as np +import yaml -from codeevolve.database import Program, ProgramDatabase, EliteFeature -from codeevolve.lm import OpenAILM, LMEnsemble, OpenAIEmbedding +from codeevolve.adversarial import ( + AdversarialConfig, + CompetitiveResult, + assign_team, + compute_competitive_result, + sample_opponents, + should_cross_evaluate, + update_team_registry, +) +from codeevolve.climate import ClimateConfig, evaluate_heat_resilience +from codeevolve.agents import NovelAgent +from codeevolve.database import EliteFeature, Program, ProgramDatabase from codeevolve.evaluator import Evaluator -from codeevolve.prompt.sampler import PromptSampler, format_prog_msg from codeevolve.islands import ( - IslandData, GlobalData, - sync_migrate, + IslandData, early_stopping_check, + sync_migrate, ) - -from codeevolve.utils.parsing_utils import apply_diff +from codeevolve.lm import LMEnsemble, OpenAIEmbedding, OpenAILM +from codeevolve.prompt.sampler import PromptSampler, format_prog_msg +from codeevolve.utils.ckpt_utils import load_ckpt, save_ckpt from codeevolve.utils.logging_utils import get_logger -from codeevolve.utils.ckpt_utils import save_ckpt, load_ckpt +from codeevolve.utils.parsing_utils import apply_diff_with_fallback MAX_LOG_MSG_SZ: int = 256 @@ -53,6 +63,7 @@ async def evolve_loop( evaluator: Evaluator, embedding: Optional[OpenAIEmbedding], logger: logging.Logger, + novel_agent: Optional[NovelAgent] = None, ) -> None: """Executes the main evolutionary loop for program and prompt co-evolution. @@ -86,12 +97,43 @@ async def evolve_loop( meta_prompting: bool = evolve_config.get("meta_prompting", False) use_embedding: bool = evolve_config.get("use_embedding", False) + novel_agent_exploration_rate: float = ( + novel_agent.exploration_rate if novel_agent is not None else 0 + ) mp_start_marker: str = evolve_config.get("mp_start_marker", "# PROMPT-BLOCK-START") mp_end_marker: str = evolve_config.get("mp_end_marker", "# PROMPT-BLOCK-END") evolve_start_marker: str = evolve_config.get("evolve_start_marker", "# EVOLVE-BLOCK-START") evolve_end_marker: str = evolve_config.get("evolve_end_marker", "# EVOLVE-BLOCK-END") + adversarial_cfg_raw: Dict[str, Any] = config.get("ADVERSARIAL", {}) + default_adv_cfg: AdversarialConfig = AdversarialConfig() + adversarial_cfg: AdversarialConfig = AdversarialConfig( + **{ + field: adversarial_cfg_raw.get( + field, getattr(default_adv_cfg, field) + ) + for field in AdversarialConfig.__dataclass_fields__ + } + ) + + climate_cfg_raw: Dict[str, Any] = config.get("CLIMATE", {}) + default_climate_cfg: ClimateConfig = ClimateConfig() + climate_cfg: ClimateConfig = ClimateConfig( + **{ + field: climate_cfg_raw.get(field, getattr(default_climate_cfg, field)) + for field in ClimateConfig.__dataclass_fields__ + } + ) + team_name: str = isl_data.team or assign_team(isl_data.id, adversarial_cfg.teams) + logger.info("Adversarial team: %s | cfg: %s", team_name, adversarial_cfg) + if climate_cfg.enabled: + logger.info( + "Climate seasons enabled (%s) with span=%s epochs", + climate_cfg.seasons, + climate_cfg.season_length, + ) + for epoch in range(start_epoch + 1, evolve_config["num_epochs"] + 1): logger.info(f"========= EPOCH {epoch} =========") logger.info( @@ -157,28 +199,50 @@ async def evolve_loop( if meta_prompting and (gen_init_pop or exploration): logger.info("=== META-PROMPT STEP ===") meta_prompt_success: bool = False + use_novel_agent: bool = False + if novel_agent is not None and (gen_init_pop or exploration): + use_novel_agent = novel_agent.should_activate(sol_db.random_state) + logger.info( + "Novel agent active: %s (exploration rate %.2f)", + use_novel_agent, + novel_agent_exploration_rate, + ) ## GENERATE DIFF try: - # TODO: maybe move the logger from inside the sampler class to here - prompt_diff, prompt_tok, compl_tok = await prompt_sampler.meta_prompt( - prompt=parent_prompt, prog=parent_sol - ) + # Note: Logging is handled inside the sampler's meta_prompt method as it's + # directly related to the LLM operation and provides better context + if use_novel_agent: + prompt_diff, prompt_tok, compl_tok = await novel_agent.propose_prompt( + prompt=parent_prompt, + prog=parent_sol, + inspirations=inspirations, + ) + motive: str = "novel_prompt" + else: + prompt_diff, prompt_tok, compl_tok = await prompt_sampler.meta_prompt( + prompt=parent_prompt, prog=parent_sol + ) + motive = "meta_prompt" meta_prompt_success = True evolve_state["tok_usage"].append( { "epoch": epoch, - "motive": "meta_prompt", + "motive": motive, "prompt_tok": prompt_tok, "compl_tok": compl_tok, - "model_name": prompt_sampler.aux_lm.model_name, + "model_name": ( + novel_agent.lm.model_name + if use_novel_agent and novel_agent is not None + else prompt_sampler.aux_lm.model_name + ), } ) except Exception as err: logger.error(f"Error when running prompt on LM: {str(err)}.") error_info: Dict[str, Any] = { "epoch": epoch, - "motive": "meta_prompt", + "motive": "novel_prompt" if use_novel_agent else "meta_prompt", "error_msg": str(err), } evolve_state["errors"].append(error_info) @@ -187,9 +251,9 @@ async def evolve_loop( if meta_prompt_success: try: logger.info("Attempting to SEARCH/REPLACE...") - child_prompt_txt: str = apply_diff( + child_prompt_txt: str = apply_diff_with_fallback( parent_code=parent_prompt.code, - diff=prompt_diff, + diff_or_text=prompt_diff, start_marker=mp_start_marker, end_marker=mp_end_marker, ) @@ -249,7 +313,8 @@ async def evolve_loop( ## GENERATE DIFF try: - # TODO: maybe move the logger from inside the ensemble class to here + # Note: Logging is handled inside the ensemble's generate method as it's + # directly related to the LLM operation and provides better context model_id, sol_diff, prompt_tok, compl_tok = await ensemble.generate(messages=messages) evolve_success = True @@ -275,9 +340,9 @@ async def evolve_loop( if evolve_success: try: logger.info("Attempting to SEARCH/REPLACE...") - child_sol_code: str = apply_diff( + child_sol_code: str = apply_diff_with_fallback( parent_code=parent_sol.code, - diff=sol_diff, + diff_or_text=sol_diff, start_marker=evolve_start_marker, end_marker=evolve_end_marker, ) @@ -313,11 +378,82 @@ async def evolve_loop( ## EVALUATING CHILD PROGRAM evaluator.execute(child_sol) + base_fitness: float = 0 + climate_multiplier: float = 1.0 if child_sol.returncode == 0: - child_sol.fitness = child_sol.eval_metrics[evolve_config["fitness_key"]] + base_fitness = child_sol.eval_metrics[evolve_config["fitness_key"]] + + if climate_cfg.enabled and child_sol.language.lower() == "python": + climate_eval = evaluate_heat_resilience( + code=child_sol.code, + epoch=epoch, + config=climate_cfg, + random_state=sol_db.random_state, + ) + climate_multiplier = climate_eval.fitness_multiplier + child_sol.eval_metrics.update( + { + "climate_alignment": climate_eval.alignment, + "climate_survival_chance": climate_eval.survival_chance, + "climate_multiplier": climate_multiplier, + "climate_season_index": climate_eval.season.index, + "climate_is_hot": 1.0 + if climate_eval.season.climate == "hot" + else 0.0, + "climate_hot_hits": climate_eval.hot_hits, + "climate_cold_hits": climate_eval.cold_hits, + } + ) + logger.info( + ( + "Climate season '%s' (%s) multiplier %.3f | alignment=%.3f," + " hot_hits=%d, cold_hits=%d" + ), + climate_eval.season.name, + climate_eval.season.climate, + climate_multiplier, + climate_eval.alignment, + climate_eval.hot_hits, + climate_eval.cold_hits, + ) + + child_sol.fitness = base_fitness * climate_multiplier child_sol.prog_msg = format_prog_msg(prog=child_sol) child_sol.features = child_sol.eval_metrics + competitive_result: Optional[CompetitiveResult] = None + if adversarial_cfg.enabled and child_sol.returncode == 0: + if should_cross_evaluate(epoch, team_name, adversarial_cfg): + opponents: List[Program] = sample_opponents( + registry=global_data.team_registry, + team=team_name, + teams=adversarial_cfg.teams, + max_opponents=adversarial_cfg.opponents_per_eval, + random_state=sol_db.random_state, + ) + + if opponents: + competitive_result = compute_competitive_result( + candidate=child_sol, + opponents=opponents, + base_fitness_key=evolve_config["fitness_key"], + config=adversarial_cfg, + ) + child_sol.eval_metrics["adversarial_win_rate"] = ( + competitive_result.win_rate + ) + child_sol.eval_metrics["adversarial_matches"] = ( + competitive_result.matches + ) + child_sol.eval_metrics["adversarial_rating"] = ( + competitive_result.rating + ) + child_sol.matches += competitive_result.matches + child_sol.rating = competitive_result.rating + child_sol.fitness = competitive_result.fitness + else: + logger.info("Adversarial evaluation enabled but no opponents available.") + if child_sol.fitness > prompt.fitness: logger.info("Child solution improves on parent prompt fitness.") prompt.fitness = child_sol.fitness @@ -358,6 +494,13 @@ async def evolve_loop( logger.info("Adding child_sol to sol_db.") sol_db.add(child_sol) + if adversarial_cfg.enabled: + update_team_registry( + registry=global_data.team_registry, + team=team_name, + candidate=sol_db.programs[sol_db.best_prog_id], + ) + if child_sol.id == sol_db.best_prog_id: logger.info(f"New best program found -> {child_sol.fitness}.") improved_local_fitness = True @@ -527,6 +670,17 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl config: Dict[Any, Any] = yaml.safe_load(open(args["cfg_path"], "r")) evolve_config = config["EVOLVE_CONFIG"] + adversarial_cfg_raw: Dict[str, Any] = config.get("ADVERSARIAL", {}) + default_adv_cfg: AdversarialConfig = AdversarialConfig() + adversarial_cfg: AdversarialConfig = AdversarialConfig( + **{ + field: adversarial_cfg_raw.get( + field, getattr(default_adv_cfg, field) + ) + for field in AdversarialConfig.__dataclass_fields__ + } + ) + team_name: str = isl_data.team or assign_team(isl_data.id, adversarial_cfg.teams) ensemble: LMEnsemble = LMEnsemble( models_cfg=config["ENSEMBLE"], @@ -544,6 +698,24 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl logger=logger, ) + novel_agent_cfg: Dict[str, Any] = config.get("NOVEL_AGENT", {}) + novel_agent: Optional[NovelAgent] = None + if novel_agent_cfg.get("enabled", False): + assert ( + novel_agent_cfg.get("lm", None) is not None + ), "NOVEL_AGENT.lm must be defined when NOVEL_AGENT.enabled is true." + + novel_agent_lm = OpenAILM( + **novel_agent_cfg["lm"], api_key=args["api_key"], api_base=args["api_base"] + ) + + novel_agent = NovelAgent( + lm=novel_agent_lm, + exploration_rate=novel_agent_cfg.get("exploration_rate", 0.2), + max_inspirations=novel_agent_cfg.get("max_inspirations", 2), + logger=logger, + ) + evaluator: Evaluator = Evaluator( eval_path=Path(config["EVAL_FILE_NAME"]), cwd=args["inpt_dir"], @@ -571,6 +743,13 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl init_sol: Program = sol_db.programs[sol_db.best_prog_id] init_sol.prompt_id = init_prompt.id + if adversarial_cfg.enabled: + update_team_registry( + registry=global_data.team_registry, + team=team_name, + candidate=init_sol, + ) + else: logger.info("Starting anew.") features: Optional[List[EliteFeature]] = None @@ -635,6 +814,7 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl ) evaluator.execute(init_sol) + init_sol.fitness = 0.0 if init_sol.returncode == 0: init_sol.fitness = init_sol.eval_metrics[evolve_config["fitness_key"]] @@ -643,12 +823,21 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl sol_db.add(init_sol) + if adversarial_cfg.enabled: + update_team_registry( + registry=global_data.team_registry, + team=team_name, + candidate=init_sol, + ) + logger.info(f"sol_db={sol_db}") logger.info(f"prompt_db={prompt_db}") logger.info(f"ensemble={ensemble}") logger.info(f"prompt_sampler={prompt_sampler}") logger.info(f"evaluator={evaluator}") logger.info(f"embedding={embedding}") + logger.info(f"novel_agent={novel_agent}") + logger.info(f"adversarial_team={team_name}") logger.info(f"init_prog={init_sol}") # UPDATE GLOBAL BEST @@ -682,4 +871,5 @@ async def codeevolve(args: Dict[str, Any], isl_data: IslandData, global_data: Gl evaluator, embedding, logger, + novel_agent, ) diff --git a/src/codeevolve/islands.py b/src/codeevolve/islands.py index 84a8b87..f052637 100644 --- a/src/codeevolve/islands.py +++ b/src/codeevolve/islands.py @@ -10,16 +10,15 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import List, Tuple, Dict, Optional, DefaultDict - -from collections import defaultdict -from dataclasses import dataclass -import threading +import logging import multiprocessing as mp +import multiprocessing.connection as mpc import multiprocessing.sharedctypes as mpsct import multiprocessing.synchronize as mps -import multiprocessing.connection as mpc -import logging +import threading +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, DefaultDict, Dict, List, Optional, Tuple from codeevolve.database import Program @@ -61,6 +60,7 @@ class IslandData: id: int in_neigh: Optional[List[PipeEdge]] out_neigh: Optional[List[PipeEdge]] + team: Optional[str] = None @dataclass @@ -119,6 +119,8 @@ class GlobalData: lock: mps.Lock barrier: mps.Barrier log_queue: mp.Queue + team_registry: Optional[Dict[str, Program]] = None + adversarial_cfg: Optional[Dict[str, Any]] = None def early_stopping_check( @@ -252,7 +254,15 @@ def get_pipe_graph( # migration -## TODO: async migration without barriers +# NOTE: Future enhancement - Asynchronous Migration +# Current implementation uses synchronization barriers for migration, which ensures +# consistency but may cause islands to wait for slower ones. Consider implementing: +# - Asynchronous migration without barriers using message queues +# - Lock-free data structures for concurrent access +# - Event-driven migration triggers based on population diversity +# - Adaptive migration intervals per island based on convergence rate +# This would improve throughput by allowing faster islands to continue evolution +# while slower islands catch up, at the cost of increased complexity def send_migrants( diff --git a/src/codeevolve/lm.py b/src/codeevolve/lm.py index bedad7c..a1b948d 100644 --- a/src/codeevolve/lm.py +++ b/src/codeevolve/lm.py @@ -10,19 +10,24 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Any, Dict, List, Optional, Tuple - import asyncio -from dataclasses import dataclass, field import logging import random -import httpx - +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple from uuid import uuid4 +import httpx from openai import AsyncOpenAI -# TODO: classes for open-source LM's executing locally. +# NOTE: Future enhancement - Local LM Support +# To support open-source LMs executing locally, consider implementing: +# - LocalLM class compatible with OpenAI API interface (using libraries like llama-cpp-python, vllm) +# - Support for popular models: Llama, Mistral, CodeLlama, StarCoder, etc. +# - Batch processing optimization for local execution +# - GPU memory management and model quantization support +# - Integration with frameworks like HuggingFace Transformers, Ollama +# Current implementation focuses on OpenAI-compatible API endpoints @dataclass diff --git a/src/codeevolve/prompt/sampler.py b/src/codeevolve/prompt/sampler.py index 9450c74..df9aa33 100644 --- a/src/codeevolve/prompt/sampler.py +++ b/src/codeevolve/prompt/sampler.py @@ -10,20 +10,20 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Dict, List, Tuple, Optional -from collections import deque import logging +from collections import deque +from typing import Dict, List, Optional, Tuple -from codeevolve.lm import OpenAILM from codeevolve.database import Program, ProgramDatabase +from codeevolve.lm import OpenAILM from codeevolve.prompt.template import ( - PROG_TEMPLATE, EVOLVE_PROG_TASK_TEMPLATE, + EVOLVE_PROG_TEMPLATE, EVOLVE_PROG_WINSP_TASK_TEMPLATE, EVOLVE_PROMPT_TASK_TEMPLATE, EVOLVE_PROMPT_TEMPLATE, - EVOLVE_PROG_TEMPLATE, INSP_PROG_TEMPLATE, + PROG_TEMPLATE, ) diff --git a/src/codeevolve/prompt/template.py b/src/codeevolve/prompt/template.py index d384d4f..aa69314 100644 --- a/src/codeevolve/prompt/template.py +++ b/src/codeevolve/prompt/template.py @@ -90,6 +90,37 @@ def exp(a: int, b: int) -> int: >>>>>>> REPLACE """ +NOVEL_AGENT_SYSTEM_PROMPT = """ +You are NovelAgent, a specialist that proposes bold but controlled prompt updates +to encourage exploration in CodeEvolve. You must keep changes confined to the +existing PROMPT-BLOCK sections so they remain compatible with SEARCH/REPLACE +patching. When proposing changes, highlight alternative algorithms, stricter +constraints, or different exploration strategies that could yield qualitatively +new programs while preserving safety and formatting rules. +""" + +NOVEL_AGENT_USER_TEMPLATE = """ +CURRENT SYSTEM PROMPT +--------------------- +{prompt} + +LATEST PROGRAM AND RESULTS +-------------------------- +{program} + +INSPIRATIONS +------------- +{inspirations} + +TASK +---- +Produce a SEARCH/REPLACE diff that updates the content inside the PROMPT-BLOCK +markers. Your revisions should push for a novel search direction (e.g., new +heuristics, alternative algorithmic framing, or explicit diversity pressure) +while keeping instructions precise and executable. Do not alter content outside +the PROMPT-BLOCK markers and preserve all existing formatting. +""" + EVOLVE_PROG_WINSP_TASK_TEMPLATE = """ # TASK: CODE EVOLUTION Your goal is to evolve the provided program by modifying specific sections. diff --git a/src/codeevolve/utils/ckpt_utils.py b/src/codeevolve/utils/ckpt_utils.py index 0284143..2a7186d 100644 --- a/src/codeevolve/utils/ckpt_utils.py +++ b/src/codeevolve/utils/ckpt_utils.py @@ -10,10 +10,10 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Any, Dict, Tuple, Optional import logging -import pickle as pkl import pathlib +import pickle as pkl +from typing import Any, Dict, Optional, Tuple from codeevolve.database import ProgramDatabase diff --git a/src/codeevolve/utils/logging_utils.py b/src/codeevolve/utils/logging_utils.py index 71336b8..0abfe75 100644 --- a/src/codeevolve/utils/logging_utils.py +++ b/src/codeevolve/utils/logging_utils.py @@ -10,21 +10,17 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Any, Dict, Optional - import logging import multiprocessing as mp -import time -from collections import deque -import re import os import pathlib +import re +import time +from collections import deque +from typing import Any, Dict, Optional from codeevolve.islands import GlobalData -from typing import Optional -import logging - class SizeLimitedFormatter(logging.Formatter): """Custom logging formatter that enforces a maximum message size. diff --git a/src/codeevolve/utils/parsing_utils.py b/src/codeevolve/utils/parsing_utils.py index 04e7fb7..cb98c83 100644 --- a/src/codeevolve/utils/parsing_utils.py +++ b/src/codeevolve/utils/parsing_utils.py @@ -11,8 +11,8 @@ # # ===--------------------------------------------------------------------------------------===# -from typing import Dict, Tuple, List import re +from typing import Dict, List, Optional, Tuple class SearchAndReplaceError(Exception): @@ -46,6 +46,112 @@ class EvolveBlockError(Exception): pass +def _strip_markdown_fences(text: str) -> str: + """Remove common Markdown code fences from an LLM response. + + This is intentionally conservative: it only strips a single outermost fenced + code block and leaves inner content untouched. + """ + + s = text.strip() + if s.startswith("```"): + # Drop opening fence line (``` or ```lang) + first_newline = s.find("\n") + if first_newline != -1: + s = s[first_newline + 1 :] + # Drop closing fence if present + if s.rstrip().endswith("```"): + s = s.rstrip() + s = s[:-3] + return s.strip() + + +def extract_evolve_block_contents( + text: str, + start_marker: str = "# EVOLVE-BLOCK-START", + end_marker: str = "# EVOLVE-BLOCK-END", +) -> List[str]: + """Extracts evolve-block contents from arbitrary text. + + Returns a list of contents (without markers). If no blocks exist, returns []. + """ + + evolve_regex: str = rf"\s*{re.escape(start_marker)}\s*\n?(.*?)\n?\s*{re.escape(end_marker)}" + return [m.group(1) for m in re.finditer(evolve_regex, text, re.DOTALL)] + + +def apply_evolve_block_replacement( + parent_code: str, + replacement_text: str, + start_marker: str = "# EVOLVE-BLOCK-START", + end_marker: str = "# EVOLVE-BLOCK-END", +) -> str: + """Fallback: replace evolve-block content using raw replacement text. + + If replacement_text itself contains evolve markers, only the extracted block + contents are used. Otherwise the whole replacement_text is inserted into the + first evolve block. + """ + + evolve_regex: str = rf"\s*{re.escape(start_marker)}\s*\n?(.*?)\n?\s*{re.escape(end_marker)}" + evolve_spans: List[Tuple[int, int]] = find_evolve_block_spans( + parent_code=parent_code, evolve_regex=evolve_regex + ) + + cleaned = _strip_markdown_fences(replacement_text) + extracted = extract_evolve_block_contents( + cleaned, start_marker=start_marker, end_marker=end_marker + ) + if not extracted: + # Treat the full response as the evolve-block body. + extracted = [cleaned] + + # If counts match, replace all blocks in order; otherwise replace only the first. + replace_all = len(extracted) == len(evolve_spans) + + child_code_parts: List[str] = [] + last_end: int = 0 + for i, (start, end) in enumerate(evolve_spans): + child_code_parts.append(parent_code[last_end:start]) + if replace_all: + child_code_parts.append(extracted[i]) + else: + child_code_parts.append(extracted[0] if i == 0 else parent_code[start:end]) + last_end = end + child_code_parts.append(parent_code[last_end:]) + return "".join(child_code_parts) + + +def apply_diff_with_fallback( + parent_code: str, + diff_or_text: str, + start_marker: str = "# EVOLVE-BLOCK-START", + end_marker: str = "# EVOLVE-BLOCK-END", + diff_regex: str = r"<{7}\s*SEARCH\s*\n?(.*?)\n?\s*={7}\s*\n?(.*?)\n?\s*>{7}\s*REPLACE", +) -> str: + """Apply SEARCH/REPLACE diffs; if none exist, replace evolve block content. + + This makes the system robust to LLMs that return whole code instead of the + requested diff format. + """ + + try: + return apply_diff( + parent_code=parent_code, + diff=diff_or_text, + start_marker=start_marker, + end_marker=end_marker, + diff_regex=diff_regex, + ) + except DiffError: + return apply_evolve_block_replacement( + parent_code=parent_code, + replacement_text=diff_or_text, + start_marker=start_marker, + end_marker=end_marker, + ) + + def _sanitize_block_content(text: str, start_marker: str, end_marker: str) -> str: """Removes specific start and end marker lines and strips surrounding whitespace. diff --git a/tests/__init__.py b/tests/__init__.py index abe52ee..a231f95 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -8,4 +8,4 @@ # # This file initializes the tests module for CodeEvolve. # -# ===--------------------------------------------------------------------------------------===# \ No newline at end of file +# ===--------------------------------------------------------------------------------------===# diff --git a/tests/test_adversarial.py b/tests/test_adversarial.py new file mode 100644 index 0000000..de6b9cf --- /dev/null +++ b/tests/test_adversarial.py @@ -0,0 +1,74 @@ +# ===--------------------------------------------------------------------------------------===# +# +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 +# +# ===--------------------------------------------------------------------------------------===# + +"""Unit tests for adversarial multi-population helpers.""" + +import random + +from codeevolve.adversarial import ( + AdversarialConfig, + assign_team, + compute_competitive_result, + sample_opponents, + should_cross_evaluate, +) +from codeevolve.database import Program + + +def _mk_prog(pid: str, fitness: float, rating: float = 1000.0) -> Program: + prog = Program(id=pid, code="print('hi')", language="python") + prog.fitness = fitness + prog.rating = rating + prog.eval_metrics = {"score": fitness} + return prog + + +def test_assign_team_round_robin(): + cfg = AdversarialConfig(enabled=True, teams=["red", "blue", "green"]) + assert assign_team(0, cfg.teams) == "red" + assert assign_team(1, cfg.teams) == "blue" + assert assign_team(4, cfg.teams) == "blue" + + +def test_sample_opponents_prefers_rivals(): + registry = { + "red": _mk_prog("r1", 0.1), + "blue": _mk_prog("b1", 0.5), + } + opponents = sample_opponents( + registry, + team="red", + teams=["red", "blue"], + max_opponents=2, + random_state=random.Random(0), + ) + assert len(opponents) == 1 + assert opponents[0].id == "b1" + + +def test_competitive_result_win_rate_and_elo(): + cfg = AdversarialConfig(enabled=True, fitness_metric="hybrid", base_fitness_weight=0.5, elo_k=16) + candidate = _mk_prog("c", fitness=0.8) + opponents = [_mk_prog("o1", fitness=0.4), _mk_prog("o2", fitness=0.4)] + + result = compute_competitive_result(candidate, opponents, base_fitness_key="score", config=cfg) + + assert result.matches == 2 + assert result.win_rate == 1.0 + assert result.rating > candidate.rating + # hybrid fitness blends base fitness and win rate + assert result.fitness > candidate.fitness + + +def test_should_cross_evaluate_with_interval_and_alternation(): + cfg = AdversarialConfig(enabled=True, cross_eval_interval=2, alternating_phases=True, teams=["red", "blue"]) + assert should_cross_evaluate(epoch=2, team="red", config=cfg) is False + assert should_cross_evaluate(epoch=2, team="blue", config=cfg) is True + cfg.alternating_phases = False + assert should_cross_evaluate(epoch=4, team="red", config=cfg) is True + diff --git a/tests/test_apply_diff.py b/tests/test_apply_diff.py index 3991e40..ce26bb7 100644 --- a/tests/test_apply_diff.py +++ b/tests/test_apply_diff.py @@ -13,10 +13,11 @@ import pytest from codeevolve.utils.parsing_utils import ( - apply_diff, - SearchAndReplaceError, DiffError, EvolveBlockError, + SearchAndReplaceError, + apply_diff, + apply_diff_with_fallback, ) @@ -258,6 +259,44 @@ def foobar2(x:int): def barfoo2(y:int): return y+6 # EVOLVE-BLOCK-END +""" + ) + + def test_fallback_replaces_evolve_block_when_no_diff_blocks(self): + parent_code = """ +# EVOLVE-BLOCK-START +old_code +# EVOLVE-BLOCK-END +""" + replacement = """ +# EVOLVE-BLOCK-START +new_code +# EVOLVE-BLOCK-END +""" + child_code = apply_diff_with_fallback(parent_code, replacement) + assert ( + child_code + == """ +# EVOLVE-BLOCK-START +new_code +# EVOLVE-BLOCK-END +""" + ) + + def test_fallback_uses_raw_text_when_no_markers(self): + parent_code = """ +# EVOLVE-BLOCK-START +old_code +# EVOLVE-BLOCK-END +""" + replacement = "new_code" + child_code = apply_diff_with_fallback(parent_code, replacement) + assert ( + child_code + == """ +# EVOLVE-BLOCK-START +new_code +# EVOLVE-BLOCK-END """ ) diff --git a/tests/test_climate.py b/tests/test_climate.py new file mode 100644 index 0000000..2b5e71c --- /dev/null +++ b/tests/test_climate.py @@ -0,0 +1,84 @@ +# Part of the CodeEvolve Project, under the Apache License v2.0. +# See https://github.com/inter-co/science-codeevolve/blob/main/LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for climate-based thermal resilience helpers.""" + +import random + +from codeevolve.climate import ( + ClimateConfig, + assign_thermal_traits, + evaluate_heat_resilience, + season_profile, +) + + +def _wrap_single_call(fn_name: str) -> str: + return f"def foo(xs):\n return {fn_name}(xs)\n" + + +def test_season_rotation_and_climate_flag(): + cfg = ClimateConfig(enabled=True, seasons=["summer", "winter", "spring"], season_length=2) + + summer = season_profile(epoch=1, config=cfg) + winter = season_profile(epoch=3, config=cfg) + + assert summer.name == "summer" + assert summer.climate == "hot" + assert winter.name == "winter" + assert winter.climate == "cold" + + +def test_assign_thermal_traits_is_deterministic_with_seed(): + cfg = ClimateConfig( + enabled=True, + seasons=["dry"], + function_pool=["len", "sum", "min"], + hot_fraction=0.34, + seed=123, + ) + season = season_profile(epoch=1, config=cfg) + + hot_a, cold_a = assign_thermal_traits(season, cfg, random.Random(0)) + hot_b, cold_b = assign_thermal_traits(season, cfg, random.Random(5)) + + assert hot_a == hot_b + assert cold_a == cold_b + assert len(hot_a) == 1 # max(1, hot_fraction * pool_size) + + +def test_heat_resilience_rewards_alignment_per_season(): + cfg = ClimateConfig( + enabled=True, + seasons=["hot", "cold"], + season_length=1, + function_pool=["len", "sum", "min", "max"], + hot_fraction=0.5, + survival_weight=0.5, + neutral_baseline=0.5, + seed=99, + ) + + hot_traits, cold_traits = assign_thermal_traits( + season_profile(epoch=1, config=cfg), cfg, random.Random(0) + ) + hot_favored = next(iter(hot_traits)) + hot_eval = evaluate_heat_resilience( + code=_wrap_single_call(hot_favored), epoch=1, config=cfg, random_state=random.Random(1) + ) + + assert hot_eval.survival_chance > cfg.neutral_baseline + assert hot_eval.fitness_multiplier > 1 + + cold_traits_epoch2 = assign_thermal_traits( + season_profile(epoch=2, config=cfg), cfg, random.Random(0) + )[1] + cold_favored = next(iter(cold_traits_epoch2)) + cold_eval = evaluate_heat_resilience( + code=_wrap_single_call(cold_favored), epoch=2, config=cfg, random_state=random.Random(1) + ) + + assert cold_eval.survival_chance > cfg.neutral_baseline + assert cold_eval.fitness_multiplier > 1 + diff --git a/tests/test_evaluator_batch.py b/tests/test_evaluator_batch.py new file mode 100644 index 0000000..49fe8ba --- /dev/null +++ b/tests/test_evaluator_batch.py @@ -0,0 +1,36 @@ +import asyncio +import time + +from codeevolve.database import Program +from codeevolve.evaluator import Evaluator + + +def test_evaluate_batch_runs_programs_in_parallel(): + # Use a lightweight evaluator and monkeypatch execute to avoid subprocess calls. + evaluator = Evaluator( + eval_path="/dev/null", cwd=None, timeout_s=1, max_mem_b=None, mem_check_interval_s=None + ) + + # Create a few dummy programs to evaluate. + programs = [ + Program(id=f"prog-{idx}", code="", language="python") for idx in range(3) + ] + + async def run_batch(): + # Simulate work that takes time to help detect parallel execution. + def fake_execute(prog: Program): + time.sleep(0.1) + prog.eval_metrics["finished"] = True + + evaluator.execute = fake_execute # type: ignore[assignment] + + start = time.perf_counter() + await evaluator.evaluate_batch(programs, max_workers=2) + return time.perf_counter() - start + + duration = asyncio.run(run_batch()) + + # Two workers processing three ~0.1s tasks should complete in comfortably + # under 0.3s if execution overlaps. + assert duration < 0.3 + assert all("finished" in program.eval_metrics for program in programs)