Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,29 @@ llm:
model: "moa&readurls-o3" # Test-time compute + web access
```

</details>
<details>
<summary><b>🕵 Aliyun BaiLian(CoT analysis)</b></summary>
The `enable_thinking` parameter can only be enabled when specifying a model in the `models` list.

```yaml
# config.yaml
llm:
api_base: "https://dashscope.aliyuncs.com/compatible-mode/v1"
thinking_budget: 2048
models:
- name: "qwen-flash"
weight: 0.5
enable_thinking: false
- name: "qwen-plus"
weight: 0.5
enable_thinking: true
```

```bash
export OPENAI_API_KEY="your-bailian-api-key"
```

</details>

## Examples Gallery
Expand Down Expand Up @@ -450,12 +473,18 @@ random_seed: 42 # Full reproducibility

llm:
# Ensemble configuration
api_base: "https://dashscope.aliyuncs.com/compatible-mode/v1"
thinking_budget: 2048 # budget for thinking models, only work when enable_thinking
models:
- name: "gemini-2.5-pro"
- name: "qwen-plus"
weight: 0.6
- name: "gemini-2.5-flash"
enable_thinking: true # important: if the API provider donot provide this paramters, set this will cause fail!
- name: "qwen-flash"
weight: 0.4
enable_thinking: false
temperature: 0.7
max_tokens: 16384
timeout: 300

database:
# MAP-Elites quality-diversity
Expand Down
15 changes: 13 additions & 2 deletions openevolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,17 @@ class LLMModelConfig:
timeout: int = None
retries: int = None
retry_delay: int = None
thinking_budget: Optional[int] = None

# Reproducibility
random_seed: Optional[int] = None

# Reasoning parameters
reasoning_effort: Optional[str] = None

# enable_thinking (most used in Chinese providers)
enable_thinking: Optional[bool] = None

def __post_init__(self):
"""Post-initialization to resolve ${VAR} env var references in api_key"""
self.api_key = _resolve_env_var(self.api_key)
Expand All @@ -96,6 +100,7 @@ class LLMConfig(LLMModelConfig):
temperature: float = 0.7
top_p: float = 0.95
max_tokens: int = 4096
thinking_budget: Optional[int] = None

# Request parameters
timeout: int = 60
Expand Down Expand Up @@ -125,7 +130,8 @@ def __post_init__(self):
if self.primary_model:
# Create primary model
primary_model = LLMModelConfig(
name=self.primary_model, weight=self.primary_model_weight or 1.0
name=self.primary_model,
weight=self.primary_model_weight or 1.0,
)
self.models.append(primary_model)

Expand Down Expand Up @@ -171,6 +177,8 @@ def __post_init__(self):
"retry_delay": self.retry_delay,
"random_seed": self.random_seed,
"reasoning_effort": self.reasoning_effort,
"enable_thinking": self.enable_thinking,
"thinking_budget": self.thinking_budget,
}
self.update_model_params(shared_config)

Expand All @@ -191,7 +199,8 @@ def rebuild_models(self) -> None:
if self.primary_model:
# Create primary model
primary_model = LLMModelConfig(
name=self.primary_model, weight=self.primary_model_weight or 1.0
name=self.primary_model,
weight=self.primary_model_weight or 1.0,
)
self.models.append(primary_model)

Expand Down Expand Up @@ -224,6 +233,8 @@ def rebuild_models(self) -> None:
"retry_delay": self.retry_delay,
"random_seed": self.random_seed,
"reasoning_effort": self.reasoning_effort,
"enable_thinking": self.enable_thinking,
"thinking_budget": self.thinking_budget,
}
self.update_model_params(shared_config)

Expand Down
11 changes: 8 additions & 3 deletions openevolve/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,17 +1002,17 @@ def _llm_judge_novelty(self, program: Program, similar_program: Program) -> bool
messages=[{"role": "user", "content": user_msg}],
),
)
content: str = future.result()
content, _reasoning = future.result()
except RuntimeError:
# No event loop running, safe to use asyncio.run()
content: str = asyncio.run(
content, _reasoning = asyncio.run(
self.novelty_llm.generate_with_context(
system_message=NOVELTY_SYSTEM_MSG,
messages=[{"role": "user", "content": user_msg}],
)
)

if content is None or content is None:
if content is None:
logger.warning("Novelty LLM returned empty response")
return True

Expand Down Expand Up @@ -2519,6 +2519,7 @@ def log_prompt(
template_key: str,
prompt: Dict[str, str],
responses: Optional[List[str]] = None,
reasonings: Optional[List[str]] = None,
) -> None:
"""
Log a prompt for a program.
Expand All @@ -2538,6 +2539,10 @@ def log_prompt(
responses = []
prompt["responses"] = responses

# Optionally store model reasoning traces
if reasonings:
prompt["reasonings"] = reasonings

if self.prompts_by_program is None:
self.prompts_by_program = {}

Expand Down
9 changes: 5 additions & 4 deletions openevolve/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,25 +77,25 @@ async def run_iteration_with_shared_db(
iteration_start = time.time()

# Generate code modification
llm_response = await llm_ensemble.generate_with_context(
llm_content, llm_reasoning = await llm_ensemble.generate_with_context(
system_message=prompt["system"],
messages=[{"role": "user", "content": prompt["user"]}],
)

# Parse the response
if config.diff_based_evolution:
diff_blocks = extract_diffs(llm_response, config.diff_pattern)
diff_blocks = extract_diffs(llm_content, config.diff_pattern)

if not diff_blocks:
logger.warning(f"Iteration {iteration+1}: No valid diffs found in response")
return None

# Apply the diffs
child_code = apply_diff(parent.code, llm_response, config.diff_pattern)
child_code = apply_diff(parent.code, llm_content, config.diff_pattern)
changes_summary = format_diff_summary(diff_blocks)
else:
# Parse full rewrite
new_code = parse_full_rewrite(llm_response, config.language)
new_code = parse_full_rewrite(llm_content, config.language)

if not new_code:
logger.warning(f"Iteration {iteration+1}: No valid code found in response")
Expand Down Expand Up @@ -141,6 +141,7 @@ async def run_iteration_with_shared_db(
"system": prompt["system"],
"user": prompt["user"],
"responses": [llm_response] if llm_response is not None else [],
"reasonings": [llm_reasoning] if llm_reasoning is not None else [],
}
}
if database.config.log_prompts
Expand Down
4 changes: 2 additions & 2 deletions openevolve/llm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ class LLMInterface(ABC):
"""Abstract base class for LLM interfaces"""

@abstractmethod
async def generate(self, prompt: str, **kwargs) -> str:
async def generate(self, prompt: str, **kwargs) -> tuple[str, Optional[str]]:
"""Generate text from a prompt"""
pass

@abstractmethod
async def generate_with_context(
self, system_message: str, messages: List[Dict[str, str]], **kwargs
) -> str:
) -> tuple[str, Optional[str]]:
"""Generate text using a system message and conversational context"""
pass
14 changes: 7 additions & 7 deletions openevolve/llm/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ def __init__(self, models_cfg: List[LLMModelConfig]):
)
logger._ensemble_logged = True

async def generate(self, prompt: str, **kwargs) -> str:
async def generate(self, prompt: str, **kwargs) -> Tuple[str, Optional[str]]:
"""Generate text using a randomly selected model based on weights"""
model = self._sample_model()
return await model.generate(prompt, **kwargs)

async def generate_with_context(
self, system_message: str, messages: List[Dict[str, str]], **kwargs
) -> str:
) -> Tuple[str, Optional[str]]:
"""Generate text using a system message and conversational context"""
model = self._sample_model()
return await model.generate_with_context(system_message, messages, **kwargs)
Expand All @@ -74,21 +74,21 @@ def _sample_model(self) -> LLMInterface:
logger.info(f"Sampled model: {vars(sampled_model)['model']}")
return sampled_model

async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[str]:
async def generate_multiple(self, prompt: str, n: int, **kwargs) -> List[Tuple[str, Optional[str]]]:
"""Generate multiple texts in parallel"""
tasks = [self.generate(prompt, **kwargs) for _ in range(n)]
return await asyncio.gather(*tasks)

async def parallel_generate(self, prompts: List[str], **kwargs) -> List[str]:
async def parallel_generate(self, prompts: List[str], **kwargs) -> List[Tuple[str, Optional[str]]]:
"""Generate responses for multiple prompts in parallel"""
tasks = [self.generate(prompt, **kwargs) for prompt in prompts]
return await asyncio.gather(*tasks)

async def generate_all_with_context(
self, system_message: str, messages: List[Dict[str, str]], **kwargs
) -> str:
"""Generate text using a all available models and average their returned metrics"""
responses = []
) -> List[Tuple[str, Optional[str]]]:
"""Generate text using all available models and collect their responses"""
responses: List[Tuple[str, Optional[str]]] = []
for model in self.models:
responses.append(await model.generate_with_context(system_message, messages, **kwargs))
return responses
54 changes: 47 additions & 7 deletions openevolve/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def __init__(
self.api_key = model_cfg.api_key
self.random_seed = getattr(model_cfg, "random_seed", None)
self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None)
self.enable_thinking = getattr(model_cfg, "enable_thinking", None)
self.thinking_budget = getattr(model_cfg, "thinking_budget", None)

# Set up API client
# OpenAI client requires max_retries to be int, not None
Expand All @@ -53,7 +55,7 @@ def __init__(
logger.info(f"Initialized OpenAI LLM with model: {self.model}")
logger._initialized_models.add(self.model)

async def generate(self, prompt: str, **kwargs) -> str:
async def generate(self, prompt: str, **kwargs) -> tuple[str, Optional[str]]:
"""Generate text from a prompt"""
return await self.generate_with_context(
system_message=self.system_message,
Expand All @@ -63,7 +65,7 @@ async def generate(self, prompt: str, **kwargs) -> str:

async def generate_with_context(
self, system_message: str, messages: List[Dict[str, str]], **kwargs
) -> str:
) -> tuple[str, Optional[str]]:
"""Generate text using a system message and conversational context"""
# Prepare messages with system message
formatted_messages = [{"role": "system", "content": system_message}]
Expand Down Expand Up @@ -120,6 +122,30 @@ async def generate_with_context(
if reasoning_effort is not None:
params["reasoning_effort"] = reasoning_effort

# Attach provider-specific extras such as enable_thinking/thinking_budget
extra_body = dict(kwargs.get("extra_body") or {})
enable_thinking = kwargs.get("enable_thinking", self.enable_thinking)
thinking_budget = kwargs.get("thinking_budget", self.thinking_budget)

if enable_thinking is not None:
extra_body.setdefault("enable_thinking", enable_thinking)
if thinking_budget is not None:
extra_body.setdefault("thinking_budget", thinking_budget)
else:
# Warn once per model if thinking_budget is provided without enable_thinking
if thinking_budget is not None:
if not hasattr(OpenAILLM, "_warned_budget_without_thinking"):
OpenAILLM._warned_budget_without_thinking = set()
if self.model not in OpenAILLM._warned_budget_without_thinking:
logger.warning(
"thinking_budget set for model %s without enable_thinking; skipping extra_body",
self.model,
)
OpenAILLM._warned_budget_without_thinking.add(self.model)

if extra_body:
params["extra_body"] = extra_body

# Add seed parameter for reproducibility if configured
# Skip seed parameter for Google AI Studio endpoint as it doesn't support it
seed = kwargs.get("seed", self.random_seed)
Expand All @@ -139,8 +165,8 @@ async def generate_with_context(

for attempt in range(retries + 1):
try:
response = await asyncio.wait_for(self._call_api(params), timeout=timeout)
return response
content, reasoning_content = await asyncio.wait_for(self._call_api(params), timeout=timeout)
return content, reasoning_content
except asyncio.TimeoutError:
if attempt < retries:
logger.warning(f"Timeout on attempt {attempt + 1}/{retries + 1}. Retrying...")
Expand All @@ -158,15 +184,29 @@ async def generate_with_context(
logger.error(f"All {retries + 1} attempts failed with error: {str(e)}")
raise

async def _call_api(self, params: Dict[str, Any]) -> str:
# Safety net to satisfy type checkers; the loop above always returns or raises
raise RuntimeError("Failed to generate completion after retries")

async def _call_api(self, params: Dict[str, Any]) -> tuple[str, Optional[str]]:
"""Make the actual API call"""
# Use asyncio to run the blocking API call in a thread pool
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None, lambda: self.client.chat.completions.create(**params)
)
message = response.choices[0].message

content = message.content
# Logging of system prompt, user message and response content
logger = logging.getLogger(__name__)
logger.debug(f"API parameters: {params}")
logger.debug(f"API response: {response.choices[0].message.content}")
return response.choices[0].message.content
logger.debug(f"API response: {content}")

# Extract reasoning content if available
reasoning_content = None
if hasattr(message, "reasoning_content"):
logger.debug(f"API reasoning content: {message.reasoning_content}")
reasoning_content = message.reasoning_content
print(reasoning_content)

return content, reasoning_content
Loading