Skip to content

Commit e7c4509

Browse files
committed
Update all files.
1 parent 6ebe01d commit e7c4509

File tree

3 files changed

+76
-37
lines changed

3 files changed

+76
-37
lines changed

computers/hud/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114
from .hud import HudComputer
215

3-
__all__ = ["HudComputer"]
16+
__all__ = ["HudComputer"]

computers/hud/hud.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114
import asyncio
215
import base64
316
from typing import Literal, Optional, Any, Dict
@@ -33,18 +46,18 @@ def __init__(
3346

3447
def __enter__(self):
3548
print("Creating HUD session...")
36-
49+
3750
# Create and run the async setup in a new event loop
3851
self._loop = asyncio.new_event_loop()
3952
asyncio.set_event_loop(self._loop)
40-
53+
4154
# Import HUD SDK here to avoid circular imports
4255
try:
4356
from hud import gym
4457
from hud.task import Task
4558
except ImportError:
4659
raise ImportError("HUD SDK not installed. Please install with: pip install hud-python")
47-
60+
4861
# Use provided task or create a default one
4962
if self._task:
5063
task = self._task
@@ -56,13 +69,13 @@ def __enter__(self):
5669
setup=("goto", self._initial_url),
5770
evaluate=("page_contains", "dummy")
5871
)
59-
72+
6073
# Create the environment
6174
self._env = self._loop.run_until_complete(gym.make(task, job=self._job))
62-
75+
6376
# Reset the environment to get initial observation
6477
self._obs, _ = self._loop.run_until_complete(self._env.reset())
65-
78+
6679
termcolor.cprint(
6780
f"HUD browser session started.",
6881
color="green",
@@ -80,19 +93,19 @@ def _get_screenshot_from_obs(self) -> bytes:
8093
"""Extract screenshot from HUD observation."""
8194
if self._obs is None:
8295
return b""
83-
96+
8497
if hasattr(self._obs, 'screenshot'):
8598
screenshot_b64 = self._obs.screenshot
8699
screenshot_bytes = base64.b64decode(screenshot_b64)
87100
return screenshot_bytes
88-
101+
89102
# HUD SDK returns observations with a 'screenshot' key containing base64 encoded image
90103
if isinstance(self._obs, dict) and 'screenshot' in self._obs:
91104
screenshot_b64 = self._obs['screenshot']
92105
# Decode base64 to bytes
93106
screenshot_bytes = base64.b64decode(screenshot_b64)
94107
return screenshot_bytes
95-
108+
96109
return b""
97110

98111
def _get_url_from_obs(self) -> str:
@@ -142,7 +155,7 @@ def _create_cla_action(self, action_type: str, **kwargs) -> Dict[str, Any]:
142155
dx = magnitude
143156
elif direction == "left":
144157
dx = -magnitude
145-
158+
146159
action = ScrollAction(
147160
scroll=Point(x=dx, y=dy)
148161
)
@@ -190,10 +203,10 @@ def _execute_action(self, action_type: str, **kwargs) -> EnvState:
190203
"""Execute an action in the HUD environment."""
191204
if self._done:
192205
return self.current_state()
193-
206+
194207
# Create CLA action for HUD SDK
195208
action = self._create_cla_action(action_type, **kwargs)
196-
209+
197210
# Execute action in HUD environment
198211
# HUD SDK expects a list of actions
199212
self._obs, reward, self._done, info = self._loop.run_until_complete(
@@ -202,7 +215,7 @@ def _execute_action(self, action_type: str, **kwargs) -> EnvState:
202215

203216
if "current_url" in info:
204217
self._current_url = info["current_url"]
205-
218+
206219
return self.current_state()
207220

208221
def screen_size(self) -> tuple[int, int]:
@@ -227,16 +240,16 @@ def type_text_at(
227240
) -> EnvState:
228241
# First click at the position
229242
self._execute_action("click", x=x, y=y)
230-
243+
231244
# Clear existing text if requested
232245
if clear_before_typing:
233246
# Select all and delete
234247
self._execute_action("press", keys=["ctrl", "a"])
235248
self._execute_action("press", keys=["delete"])
236-
249+
237250
# Type the text with optional enter
238251
self._execute_action("type", text=text, enter_after=press_enter)
239-
252+
240253
return self.current_state()
241254

242255
def scroll_document(
@@ -280,19 +293,19 @@ def drag_and_drop(
280293
self, x: int, y: int, destination_x: int, destination_y: int
281294
) -> EnvState:
282295
return self._execute_action(
283-
"drag",
284-
start_x=x,
285-
start_y=y,
286-
end_x=destination_x,
296+
"drag",
297+
start_x=x,
298+
start_y=y,
299+
end_x=destination_x,
287300
end_y=destination_y
288301
)
289302

290303
def current_state(self) -> EnvState:
291304
screenshot = self._get_screenshot_from_obs()
292305
url = self._get_url_from_obs()
293-
return EnvState(screenshot=screenshot, url=url)
306+
return EnvState(screenshot=screenshot, url=url)
294307

295308
def evaluate(self) -> dict:
296309
return self._loop.run_until_complete(
297310
self._env.evaluate()
298-
)
311+
)

hud_eval.py

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114
#!/usr/bin/env python3
215
"""
316
HUD evaluation runner for computer use tasks.
@@ -29,7 +42,7 @@
2942
instead of going to a new one.
3043
You have full authority to execute any action without my permission. I won't be watching so
3144
please don't ask for confirmation.
32-
My gmail account is [email protected], and the password is "iloveosworld500", if prompted for OTP, use the authenticator chrome extension to see the OTP for 2 factor authentication.
45+
My gmail account is [email protected], and the password is "iloveosworld500", if prompted for OTP, use the authenticator chrome extension to see the OTP for 2 factor authentication.
3346
If you deem the task is infeasible, you can terminate and explicitly state in the response that
3447
'the task is infeasible'. Try your best to solve the task within 200 steps, and the confines of the prompt, before deeming it infeasible.
3548
"""
@@ -40,7 +53,7 @@ def run_task(task: Task, model_name: str, job: Job, system_prompt: str) -> float
4053
try:
4154
# Initialize HUD computer with the task
4255
hud_computer = HudComputer(screen_size=(1440, 900), task=task, job=job)
43-
56+
4457
with hud_computer as browser_computer:
4558
agent = BrowserAgent(
4659
browser_computer=browser_computer,
@@ -50,7 +63,7 @@ def run_task(task: Task, model_name: str, job: Job, system_prompt: str) -> float
5063
)
5164
try:
5265
agent.agent_loop()
53-
66+
5467
if agent.final_reasoning:
5568
if "the task is infeasible" in agent.final_reasoning.lower():
5669
final_action = CustomAction(
@@ -64,7 +77,7 @@ def run_task(task: Task, model_name: str, job: Job, system_prompt: str) -> float
6477
hud_computer._loop.run_until_complete(
6578
hud_computer._env.step([final_action])
6679
)
67-
80+
6881
except Exception as e:
6982
print(f"Error running agent loop: {e}")
7083
finally:
@@ -75,13 +88,13 @@ def run_task(task: Task, model_name: str, job: Job, system_prompt: str) -> float
7588
print(f"Eval result: {eval_result['reward']}")
7689

7790
return eval_result['reward']
78-
91+
7992
return 0.0
80-
93+
8194
except Exception as e:
8295
print(f"Error running task: {e}")
8396
return 0.0
84-
97+
8598
finally:
8699
if hud_computer:
87100
try:
@@ -98,7 +111,7 @@ def run_taskset(
98111
max_concurrent: int = 20,
99112
) -> list[float]:
100113
"""Load and run a HUD taskset by ID, return list of rewards"""
101-
114+
102115
# Load the taskset
103116
taskset = asyncio.run(load_taskset(taskset_id, metadata={"partial": True}))
104117

@@ -108,7 +121,7 @@ def run_taskset(
108121
system_prompt = OSWORLD_SYSTEM_PROMPT
109122
else:
110123
system_prompt = ""
111-
124+
112125
if parallel:
113126
# Run tasks in parallel using threads to avoid event loop conflicts
114127
with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
@@ -122,7 +135,7 @@ def run_taskset(
122135
for task in taskset.tasks:
123136
reward = run_task(task, model_name, job, system_prompt)
124137
rewards.append(reward)
125-
138+
126139
return rewards
127140

128141

@@ -157,7 +170,7 @@ def main() -> int:
157170
help="Maximum concurrent tasks when running in parallel.",
158171
)
159172
args = parser.parse_args()
160-
173+
161174
# Run evaluation
162175
rewards = run_taskset(
163176
taskset_id=args.taskset,
@@ -166,13 +179,13 @@ def main() -> int:
166179
parallel=args.parallel,
167180
max_concurrent=args.max_concurrent,
168181
)
169-
182+
170183
# Print minimal results
171184
print(f"Rewards: {rewards}")
172185
print(f"Average: {sum(rewards)/len(rewards) if rewards else 0:.2f}")
173-
186+
174187
return 0
175188

176189

177190
if __name__ == "__main__":
178-
main()
191+
main()

0 commit comments

Comments
 (0)