Skip to content

Commit 318ac59

Browse files
Implement v2 actions on Cloud Run.
1 parent e23bc49 commit 318ac59

File tree

8 files changed

+343
-85
lines changed

8 files changed

+343
-85
lines changed

apiserver/commands.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,23 +24,39 @@ class NavigateArgs(BaseModel):
2424

2525

2626
class ClickAtArgs(BaseModel):
27-
y: int
2827
x: int
28+
y: int
2929

3030

3131
class HoverAtArgs(BaseModel):
32-
y: int
3332
x: int
33+
y: int
3434

3535

3636
class TypeTextAtArgs(BaseModel):
37-
y: int
3837
x: int
38+
y: int
3939
text: str
40+
press_enter: bool
41+
clear_before_typing: bool
4042

4143

4244
class ScrollDocumentArgs(BaseModel):
43-
direction: Literal["up"] | Literal["down"]
45+
direction: Literal["up", "down", "left", "right"]
46+
47+
48+
class ScrollAtArgs(BaseModel):
49+
x: int
50+
y: int
51+
direction: Literal["up", "down", "left", "right"]
52+
magnitude: int
53+
54+
55+
class DragAndDropArgs(BaseModel):
56+
x: int
57+
y: int
58+
destination_x: int
59+
destination_y: int
4460

4561

4662
class KeyCombinationArgs(BaseModel):
@@ -72,6 +88,16 @@ class ScrollDocument(BaseModel):
7288
args: ScrollDocumentArgs
7389

7490

91+
class ScrollAt(BaseModel):
92+
name: Literal["scroll_at"]
93+
args: ScrollAtArgs
94+
95+
96+
class DragAndDrop(BaseModel):
97+
name: Literal["drag_and_drop"]
98+
args: DragAndDropArgs
99+
100+
75101
class Wait5Seconds(BaseModel):
76102
name: Literal["wait_5_seconds"]
77103
args: Optional[EmptyJson] = Field(None)
@@ -120,6 +146,8 @@ class Shutdown(BaseModel):
120146
HoverAt,
121147
TypeTextAt,
122148
ScrollDocument,
149+
ScrollAt,
150+
DragAndDrop,
123151
GoBack,
124152
GoForward,
125153
Search,

apiserver/test_app.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,12 @@ def test_get_command_timeout_standard_command():
114114

115115
def test_get_command_timeout_type_text_at_command():
116116
text = "hello"
117-
command = TypeTextAt(name="type_text_at", args=TypeTextAtArgs(y=0, x=0, text=text))
117+
command = TypeTextAt(
118+
name="type_text_at",
119+
args=TypeTextAtArgs(
120+
y=0, x=0, text=text, press_enter=False, clear_before_typing=True
121+
),
122+
)
118123
base_timeout = 10
119124
key_delay = 0.1
120125
expected_timeout = base_timeout + (len(text) * key_delay)
@@ -124,7 +129,12 @@ def test_get_command_timeout_type_text_at_command():
124129

125130
def test_get_command_timeout_type_text_at_command_zero_delay():
126131
text = "world"
127-
command = TypeTextAt(name="type_text_at", args=TypeTextAtArgs(y=0, x=0, text=text))
132+
command = TypeTextAt(
133+
name="type_text_at",
134+
args=TypeTextAtArgs(
135+
y=0, x=0, text=text, press_enter=False, clear_before_typing=True
136+
),
137+
)
128138
base_timeout = 20
129139
key_delay = 0.0
130140
expected_timeout = base_timeout

apiserver/test_commands.py

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
HoverAt,
2020
TypeTextAt,
2121
ScrollDocument,
22+
ScrollAt,
23+
DragAndDrop,
2224
GoBack,
2325
GoForward,
2426
Search,
@@ -38,21 +40,27 @@ def test_navigate(self):
3840
self.assertEqual(command.model_dump(), function_call)
3941

4042
def test_click_at(self):
41-
function_call = {"name": "click_at", "args": {"y": 1, "x": 2}}
43+
function_call = {"name": "click_at", "args": {"x": 2, "y": 1}}
4244
command = CommandModel.model_validate(function_call)
4345
self.assertIsInstance(command.root, ClickAt)
4446
self.assertEqual(command.model_dump(), function_call)
4547

4648
def test_hover_at(self):
47-
function_call = {"name": "hover_at", "args": {"y": 1, "x": 2}}
49+
function_call = {"name": "hover_at", "args": {"x": 2, "y": 1}}
4850
command = CommandModel.model_validate(function_call)
4951
self.assertIsInstance(command.root, HoverAt)
5052
self.assertEqual(command.model_dump(), function_call)
5153

5254
def test_type_text_at(self):
5355
function_call = {
5456
"name": "type_text_at",
55-
"args": {"y": 1, "x": 2, "text": "one"},
57+
"args": {
58+
"x": 2,
59+
"y": 1,
60+
"text": "one",
61+
"press_enter": True,
62+
"clear_before_typing": False,
63+
},
5664
}
5765
command = CommandModel.model_validate(
5866
function_call,
@@ -62,7 +70,13 @@ def test_type_text_at(self):
6270
command.model_dump(),
6371
{
6472
"name": "type_text_at",
65-
"args": {"y": 1, "x": 2, "text": "one"},
73+
"args": {
74+
"x": 2,
75+
"y": 1,
76+
"text": "one",
77+
"press_enter": True,
78+
"clear_before_typing": False,
79+
},
6680
},
6781
)
6882

@@ -77,6 +91,58 @@ def test_scroll_document(self):
7791
self.assertIsInstance(command.root, ScrollDocument)
7892
self.assertEqual(command.model_dump(), function_call)
7993

94+
function_call = {
95+
"name": "scroll_document",
96+
"args": {"direction": "down"},
97+
}
98+
command = CommandModel.model_validate(
99+
function_call,
100+
)
101+
self.assertIsInstance(command.root, ScrollDocument)
102+
self.assertEqual(command.model_dump(), function_call)
103+
104+
function_call = {
105+
"name": "scroll_document",
106+
"args": {"direction": "left"},
107+
}
108+
command = CommandModel.model_validate(
109+
function_call,
110+
)
111+
self.assertIsInstance(command.root, ScrollDocument)
112+
self.assertEqual(command.model_dump(), function_call)
113+
114+
function_call = {
115+
"name": "scroll_document",
116+
"args": {"direction": "right"},
117+
}
118+
command = CommandModel.model_validate(
119+
function_call,
120+
)
121+
self.assertIsInstance(command.root, ScrollDocument)
122+
self.assertEqual(command.model_dump(), function_call)
123+
124+
def test_scroll_at(self):
125+
function_call = {
126+
"name": "scroll_at",
127+
"args": {"x": 1, "y": 2, "direction": "up", "magnitude": 10},
128+
}
129+
command = CommandModel.model_validate(
130+
function_call,
131+
)
132+
self.assertIsInstance(command.root, ScrollAt)
133+
self.assertEqual(command.model_dump(), function_call)
134+
135+
def test_drag_and_drop(self):
136+
function_call = {
137+
"name": "drag_and_drop",
138+
"args": {"x": 1, "y": 2, "destination_x": 3, "destination_y": 4},
139+
}
140+
command = CommandModel.model_validate(
141+
function_call,
142+
)
143+
self.assertIsInstance(command.root, DragAndDrop)
144+
self.assertEqual(command.model_dump(), function_call)
145+
80146
def test_go_back(self):
81147
function_call = {"name": "go_back", "args": {}}
82148
command = CommandModel.model_validate(function_call)

computers/cloud_run/cloud_run.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import base64
1515
import termcolor
1616
import time
17-
from typing import Any, Optional
17+
from typing import Any, Optional, Literal
1818
import requests
1919
from urllib.parse import urljoin
2020
from ..computer import (
@@ -110,25 +110,46 @@ def _run_command(
110110
def open_web_browser(self) -> EnvState:
111111
return self._run_command("open_web_browser")
112112

113-
def click_at(self, y, x):
113+
def click_at(self, x: int, y: int) -> EnvState:
114114
return self._run_command("click_at", args={"x": x, "y": y})
115115

116-
def hover_at(self, y, x):
116+
def hover_at(self, x: int, y: int) -> EnvState:
117117
return self._run_command("hover_at", args={"x": x, "y": y})
118118

119-
def type_text_at(self, x: int, y: int, text: str) -> EnvState:
119+
def type_text_at(
120+
self,
121+
x: int,
122+
y: int,
123+
text: str,
124+
press_enter: bool,
125+
clear_before_typing: bool,
126+
) -> EnvState:
120127
return self._run_command(
121128
"type_text_at",
122129
args={
123130
"x": x,
124131
"y": y,
125132
"text": text,
133+
"press_enter": press_enter,
134+
"clear_before_typing": clear_before_typing,
126135
},
127136
)
128137

129138
def scroll_document(self, direction: str) -> EnvState:
130139
return self._run_command("scroll_document", args={"direction": direction})
131140

141+
def scroll_at(
142+
self,
143+
x: int,
144+
y: int,
145+
direction: Literal["up", "down", "left", "right"],
146+
magnitude: int,
147+
) -> EnvState:
148+
return self._run_command(
149+
"scroll_at",
150+
args={"x": x, "y": y, "direction": direction, "magnitude": magnitude},
151+
)
152+
132153
def wait_5_seconds(self) -> EnvState:
133154
return self._run_command("wait_5_seconds")
134155

@@ -147,6 +168,19 @@ def navigate(self, url: str) -> EnvState:
147168
def key_combination(self, keys: list[str]) -> EnvState:
148169
return self._run_command("key_combination", args={"keys": "+".join(keys)})
149170

171+
def drag_and_drop(
172+
self, x: int, y: int, destination_x: int, destination_y: int
173+
) -> EnvState:
174+
return self._run_command(
175+
"drag_and_drop",
176+
args={
177+
"x": x,
178+
"y": y,
179+
"destination_x": destination_x,
180+
"destination_y": destination_y,
181+
},
182+
)
183+
150184
def current_state(self) -> EnvState:
151185
return self._run_command("screenshot")
152186

computers/puppeteer/browser.ts

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,30 +96,64 @@ export class BrowserShell implements ComputerShell {
9696
break;
9797
case 'type_text_at':
9898
await this.page.mouse.click(c.args.x, c.args.y);
99+
if (c.args.clear_before_typing) {
100+
await this.page.keyboard.down('Control');
101+
await this.page.keyboard.press('a');
102+
await this.page.keyboard.up('Control');
103+
await this.page.keyboard.press('Backspace');
104+
}
99105
// see https://github.com/puppeteer/puppeteer/issues/1648
100106
for (let i = 0; i < c.args.text.length; i++) {
101107
await this.page.keyboard.type(c.args.text[i]);
102108
await new Promise(resolve => setTimeout(resolve, 100));
103109
}
104-
await this.page.keyboard.press('Enter');
110+
if (c.args.press_enter) {
111+
await this.page.keyboard.press('Enter');
112+
}
105113
break;
106114
case 'scroll_document':
115+
if (c.args.direction === 'up') {
116+
await this.page.keyboard.press('PageUp');
117+
} else if (c.args.direction === 'down') {
118+
await this.page.keyboard.press('PageDown');
119+
} else {
120+
let deltaX = 0;
121+
if (c.args.direction === 'left') {
122+
let viewport = this.page.viewport();
123+
if (viewport) {
124+
deltaX = -viewport.width / 2;
125+
}
126+
}
127+
if (c.args.direction === 'right') {
128+
let viewport = this.page.viewport();
129+
if (viewport) {
130+
deltaX = viewport.width / 2;
131+
}
132+
}
133+
await this.page.mouse.wheel({ deltaX });
134+
}
135+
break;
136+
case 'scroll_at':
137+
await this.page.mouse.move(c.args.x, c.args.y);
107138
let deltaX = 0;
108139
let deltaY = 0;
109-
if (c.args.direction === 'left') {
110-
deltaX = -900;
111-
}
112-
if (c.args.direction === 'right') {
113-
deltaX = 900;
114-
}
115140
if (c.args.direction === 'up') {
116-
deltaY = -900;
117-
}
118-
if (c.args.direction === 'down') {
119-
deltaY = 900;
141+
deltaY = -c.args.magnitude;
142+
} else if (c.args.direction === 'down') {
143+
deltaY = c.args.magnitude;
144+
} else if (c.args.direction === 'left') {
145+
deltaX = -c.args.magnitude;
146+
} else if (c.args.direction === 'right') {
147+
deltaX = c.args.magnitude;
120148
}
121149
await this.page.mouse.wheel({deltaY, deltaX});
122150
break;
151+
case 'drag_and_drop':
152+
await this.page.mouse.move(c.args.x, c.args.y);
153+
await this.page.mouse.down();
154+
await this.page.mouse.move(c.args.destination_x, c.args.destination_y);
155+
await this.page.mouse.up();
156+
break;
123157
case 'wait_5_seconds':
124158
await new Promise(resolve => setTimeout(resolve, 5000));
125159
break;

0 commit comments

Comments
 (0)