1+ # main.py
2+
3+ # Important Instructions:
4+ # 1. Close any existing Chrome instances.
5+ # 2. Start Chrome with remote debugging enabled:
6+ # /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
7+ # 3. Run the FastAPI server:
8+ # uvicorn main:app --host 127.0.0.1 --port 8888 --reload --workers 1
9+ # make sure you set OPENAI_API_KEY=yourOpenAIKeyHere to .env file
10+
11+ import os
12+ os .environ ["PYDANTIC_V1_COMPAT_MODE" ] = "true"
13+
14+ from langchain_google_genai import ChatGoogleGenerativeAI
15+ from browser_use import Agent
16+ from dotenv import load_dotenv
17+ import platform
18+ import asyncio
19+ from fastapi import FastAPI , HTTPException , Query , BackgroundTasks
20+ from pydantic import BaseModel
21+ from pydantic import SecretStr
22+ from browser_use .browser .browser import Browser , BrowserConfig
23+ import logging
24+ import traceback
25+ from datetime import datetime
26+ from typing import List , Optional
27+ from enum import Enum
28+ from fastapi .middleware .cors import CORSMiddleware
29+
30+
31+
32+ # ----------------------------
33+ # 1. Configure Logging
34+ # ----------------------------
35+ logging .basicConfig (level = logging .INFO )
36+ logger = logging .getLogger (__name__ )
37+
38+ # ----------------------------
39+ # 2. Load Environment Variables
40+ # ----------------------------
41+ load_dotenv ()
42+
43+ # Verify the OpenAI API key is loaded
44+ api_key = os .getenv ("GEMINI_API_KEY" )
45+ if not api_key :
46+ raise ValueError (
47+ "GEMINI_API_KEY not found in .env file. Make sure your .env file is set up correctly."
48+ )
49+
50+ # ----------------------------
51+ # 3. Initialize FastAPI App
52+ # ----------------------------
53+ app = FastAPI (title = "AI Agent API with BrowserUse" , version = "1.0" )
54+
55+
56+ # Configure CORS
57+ app .add_middleware (
58+ CORSMiddleware ,
59+ allow_origins = ["*" ], # For development: allow all origins. In production, specify exact origins.
60+ allow_credentials = True ,
61+ allow_methods = ["*" ],
62+ allow_headers = ["*" ],
63+ )
64+
65+ # ----------------------------
66+ # 4. Define Pydantic Models
67+ # ----------------------------
68+
69+ class TaskRequest (BaseModel ):
70+ task : str
71+
72+ class TaskResponse (BaseModel ):
73+ result : str
74+
75+ class TaskStatus (str , Enum ):
76+ RUNNING = "running"
77+ COMPLETED = "completed"
78+ FAILED = "failed"
79+
80+ class TaskRecord (BaseModel ):
81+ id : int
82+ task : str
83+ status : TaskStatus
84+ start_time : datetime
85+ end_time : Optional [datetime ] = None
86+ duration : Optional [float ] = None # Duration in seconds
87+ result : Optional [str ] = None
88+ error : Optional [str ] = None
89+
90+ # ----------------------------
91+ # 5. Initialize Task Registry
92+ # ----------------------------
93+ task_records : List [TaskRecord ] = []
94+ task_id_counter : int = 0
95+ task_lock = asyncio .Lock () # To manage concurrent access to task_records
96+
97+ # ----------------------------
98+ # 6. Define Background Task Function
99+ # ----------------------------
100+
101+
102+ def get_chrome_path () -> str :
103+ """
104+ Returns the most common Chrome executable path based on the operating system.
105+ Raises:
106+ FileNotFoundError: If Chrome is not found in the expected path.
107+ """
108+ system = platform .system ()
109+
110+ if system == "Windows" :
111+ # Common installation path for Windows
112+ chrome_path = os .path .join (
113+ os .environ .get ("PROGRAMFILES" , "C:\\ Program Files" ),
114+ "Google\\ Chrome\\ Application\\ chrome.exe"
115+ )
116+ elif system == "Darwin" :
117+ # Common installation path for macOS
118+ chrome_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
119+ elif system == "Linux" :
120+ # Common installation path for Linux
121+ chrome_path = "/usr/bin/google-chrome"
122+ else :
123+ raise FileNotFoundError (f"Unsupported operating system: { system } " )
124+
125+ # Verify that the Chrome executable exists at the determined path
126+ if not os .path .exists (chrome_path ):
127+ raise FileNotFoundError (f"Google Chrome executable not found at: { chrome_path } " )
128+
129+ return chrome_path
130+
131+
132+
133+ async def execute_task (task_id : int , task : str ):
134+ """
135+ Background task to execute the AI agent.
136+ Initializes a new browser instance for each task to ensure isolation.
137+ """
138+ global task_records
139+ browser = None # Initialize browser instance for this task
140+ try :
141+ logger .info (f"Starting background task ID { task_id } : { task } " )
142+
143+ # Create and add the task record with status 'running'
144+ async with task_lock :
145+ task_record = TaskRecord (
146+ id = task_id ,
147+ task = task ,
148+ status = TaskStatus .RUNNING ,
149+ start_time = datetime .utcnow ()
150+ )
151+ task_records .append (task_record )
152+
153+ # Initialize a new browser instance for this task
154+ logger .info (f"Task ID { task_id } : Initializing new browser instance." )
155+ browser = Browser (
156+ config = BrowserConfig (
157+ chrome_instance_path = get_chrome_path (), # Update if different
158+ disable_security = True ,
159+ headless = False , # Set to True for headless mode
160+ # Removed 'remote_debugging_port' as it caused issues
161+ )
162+ )
163+ logger .info (f"Task ID { task_id } : Browser initialized successfully." )
164+
165+ # Initialize and run the Agent with the new browser instance
166+ agent = Agent (
167+ task = task ,
168+ llm = ChatGoogleGenerativeAI (model = 'gemini-2.0-flash-exp' , api_key = SecretStr (api_key )),
169+ browser = browser
170+ )
171+ logger .info (f"Task ID { task_id } : Agent initialized. Running task." )
172+ result = await agent .run ()
173+ logger .info (f"Task ID { task_id } : Agent.run() completed successfully." )
174+
175+ # Update the task record with status 'completed'
176+ async with task_lock :
177+ for record in task_records :
178+ if record .id == task_id :
179+ record .status = TaskStatus .COMPLETED
180+ record .end_time = datetime .utcnow ()
181+ record .duration = (record .end_time - record .start_time ).total_seconds ()
182+ record .result = result
183+ break
184+
185+ except Exception as e :
186+ logger .error (f"Error in background task ID { task_id } : { e } " )
187+ logger .error (traceback .format_exc ())
188+
189+ # Update the task record with status 'failed'
190+ async with task_lock :
191+ for record in task_records :
192+ if record .id == task_id :
193+ record .status = TaskStatus .FAILED
194+ record .end_time = datetime .utcnow ()
195+ record .duration = (record .end_time - record .start_time ).total_seconds ()
196+ record .error = str (e )
197+ break
198+ finally :
199+ # Ensure that the browser is closed in case of failure or success
200+ if browser :
201+ try :
202+ logger .info (f"Task ID { task_id } : Closing browser instance." )
203+ await browser .close ()
204+ logger .info (f"Task ID { task_id } : Browser instance closed successfully." )
205+ except Exception as close_e :
206+ logger .error (f"Task ID { task_id } : Error closing browser: { close_e } " )
207+ logger .error (traceback .format_exc ())
208+
209+ # ----------------------------
210+ # 7. Define POST /run Endpoint
211+ # ----------------------------
212+ @app .post ("/run" , response_model = TaskResponse )
213+ async def run_task_post (request : TaskRequest , background_tasks : BackgroundTasks ):
214+ """
215+ POST Endpoint to run the AI agent with a specified task.
216+
217+ - **task**: The task description for the AI agent.
218+ """
219+ global task_id_counter
220+ task = request .task
221+ logger .info (f"Received task via POST: { task } " )
222+
223+ # Increment task ID
224+ async with task_lock :
225+ task_id_counter += 1
226+ current_task_id = task_id_counter
227+
228+ # Enqueue the background task
229+ background_tasks .add_task (execute_task , current_task_id , task )
230+
231+ # Respond immediately
232+ return TaskResponse (result = "Task is being processed." )
233+
234+ # ----------------------------
235+ # 8. Define GET /run Endpoint
236+ # ----------------------------
237+ @app .get ("/run" , response_model = TaskResponse )
238+ async def run_task_get (
239+ task : str = Query (..., description = "The task description for the AI agent." ),
240+ background_tasks : BackgroundTasks = None
241+ ):
242+ """
243+ GET Endpoint to run the AI agent with a specified task.
244+
245+ - **task**: The task description for the AI agent.
246+ """
247+ global task_id_counter
248+ logger .info (f"Received task via GET: { task } " )
249+
250+ # Increment task ID
251+ async with task_lock :
252+ task_id_counter += 1
253+ current_task_id = task_id_counter
254+
255+ # Enqueue the background task
256+ background_tasks .add_task (execute_task , current_task_id , task )
257+
258+ # Respond immediately
259+ return TaskResponse (result = "Task is being processed." )
260+
261+ # ----------------------------
262+ # 9. Define GET /lastResponses Endpoint
263+ # ----------------------------
264+ @app .get ("/lastResponses" , response_model = List [TaskRecord ])
265+ async def get_last_responses (
266+ limit : Optional [int ] = Query (100 , description = "Maximum number of task records to return" ),
267+ status : Optional [TaskStatus ] = Query (None , description = "Filter by task status" )
268+ ):
269+ """
270+ GET Endpoint to retrieve the last task responses.
271+
272+ - **limit**: The maximum number of task records to return (default: 100).
273+ - **status**: (Optional) Filter tasks by status ('running', 'completed', 'failed').
274+
275+ Returns a list of task records in descending order of task ID.
276+ """
277+ async with task_lock :
278+ filtered_tasks = task_records .copy ()
279+ if status :
280+ filtered_tasks = [task for task in filtered_tasks if task .status == status ]
281+ # Sort and limit
282+ sorted_tasks = sorted (filtered_tasks , key = lambda x : x .id , reverse = True )[:limit ]
283+ return sorted_tasks
284+
285+ # ----------------------------
286+ # 10. Define Root Endpoint
287+ # ----------------------------
288+ @app .get ("/" )
289+ def read_root ():
290+ return {
291+ "message" : "AI Agent API with BrowserUse is running. Use the /run endpoint with a 'task' field in the POST request body or as a query parameter in a GET request to execute tasks."
292+ }
293+
294+ #For executable.
295+ # ----------------------------
296+ # 12. Entry Point
297+ # ----------------------------
298+ if __name__ == "__main__" :
299+ import uvicorn
300+
301+ uvicorn .run ("mainGemini:app" , host = "127.0.0.1" , port = 8888 , reload = True , workers = 1 )
0 commit comments