Skip to content

Commit c069d61

Browse files
Merge pull request #37 from ryanguo-google/main
Add FunctionResponsePart Support, and fix issues when using Vertex API
1 parent ac86771 commit c069d61

File tree

4 files changed

+51
-11
lines changed

4 files changed

+51
-11
lines changed

agent.py

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,24 @@
3030

3131
from computers import EnvState, Computer
3232

33+
MAX_RECENT_TURN_WITH_SCREENSHOTS = 3
34+
PREDEFINED_COMPUTER_USE_FUNCTIONS = [
35+
"open_web_browser",
36+
"click_at",
37+
"hover_at",
38+
"type_text_at",
39+
"scroll_document",
40+
"scroll_at",
41+
"wait_5_seconds",
42+
"go_back",
43+
"go_forward",
44+
"search",
45+
"navigate",
46+
"key_combination",
47+
"drag_and_drop",
48+
]
49+
50+
3351
console = Console()
3452

3553
# Built-in Computer Use tools will return "EnvState".
@@ -60,12 +78,6 @@ def __init__(
6078
vertexai=os.environ.get("USE_VERTEXAI", "0").lower() in ["true", "1"],
6179
project=os.environ.get("VERTEXAI_PROJECT"),
6280
location=os.environ.get("VERTEXAI_LOCATION"),
63-
http_options=types.HttpOptions(
64-
api_version="v1alpha",
65-
base_url=os.environ.get(
66-
"GEMINI_API_SERVER", "https://generativelanguage.googleapis.com"
67-
),
68-
),
6981
)
7082
self._contents: list[Content] = [
7183
Content(
@@ -101,7 +113,6 @@ def __init__(
101113
),
102114
types.Tool(function_declarations=custom_functions),
103115
],
104-
thinking_config=types.ThinkingConfig(include_thoughts=True),
105116
)
106117

107118
def handle_action(self, action: types.FunctionCall) -> FunctionResponseT:
@@ -321,9 +332,9 @@ def run_one_iteration(self) -> Literal["COMPLETE", "CONTINUE"]:
321332
"url": fc_result.url,
322333
**extra_fr_fields,
323334
},
324-
data=[
325-
types.Part(
326-
inline_data=types.Blob(
335+
parts=[
336+
types.FunctionResponsePart(
337+
inline_data=types.FunctionResponseBlob(
327338
mime_type="image/png", data=fc_result.screenshot
328339
)
329340
)
@@ -342,6 +353,35 @@ def run_one_iteration(self) -> Literal["COMPLETE", "CONTINUE"]:
342353
)
343354
)
344355

356+
# only keep screenshots in the few most recent turns, remove the screenshot images from the old turns.
357+
turn_with_screenshots_found = 0
358+
for content in reversed(self._contents):
359+
if content.role == "user" and content.parts:
360+
# check if content has screenshot of the predefined computer use functions.
361+
has_screenshot = False
362+
for part in content.parts:
363+
if (
364+
part.function_response
365+
and part.function_response.parts
366+
and part.function_response.name
367+
in PREDEFINED_COMPUTER_USE_FUNCTIONS
368+
):
369+
has_screenshot = True
370+
break
371+
372+
if has_screenshot:
373+
turn_with_screenshots_found += 1
374+
# remove the screenshot image if the number of screenshots exceed the limit.
375+
if turn_with_screenshots_found > MAX_RECENT_TURN_WITH_SCREENSHOTS:
376+
for part in content.parts:
377+
if (
378+
part.function_response
379+
and part.function_response.parts
380+
and part.function_response.name
381+
in PREDEFINED_COMPUTER_USE_FUNCTIONS
382+
):
383+
part.function_response.parts = None
384+
345385
return "CONTINUE"
346386

347387
def _get_safety_confirmation(

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
termcolor==3.1.0
22
pydantic==2.11.4
3-
./sdk/google_genai-1.27.0-py3-none-any.whl
3+
google-genai==1.39.0
44
playwright==1.52.0
55
browserbase==1.3.0
66
rich

sdk/google-genai-1.11.0.tgz

-1.12 MB
Binary file not shown.
-205 KB
Binary file not shown.

0 commit comments

Comments
 (0)