11import asyncio
2+ import datetime
23from pathlib import Path
4+ from typing import Dict , Optional
35
46from inference .core import logger
57from inference .core .env import (
2022 WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT ,
2123 WEBRTC_MODAL_FUNCTION_GPU ,
2224 WEBRTC_MODAL_FUNCTION_MAX_INPUTS ,
25+ WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT ,
2326 WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS ,
2427 WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW ,
2528 WEBRTC_MODAL_FUNCTION_TIME_LIMIT ,
3336 WEBRTC_MODAL_TOKEN_SECRET ,
3437 WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE ,
3538)
39+ from inference .core .exceptions import RoboflowAPIUnsuccessfulRequestError
3640from inference .core .interfaces .webrtc_worker .entities import (
3741 WebRTCWorkerRequest ,
3842 WebRTCWorkerResult ,
4145 init_rtc_peer_connection_with_loop ,
4246)
4347from inference .core .version import __version__
48+ from inference .usage_tracking .collector import usage_collector
49+ from inference .usage_tracking .plan_details import WebRTCPlan
4450
4551try :
4652 import modal
118124 }
119125
120126 class RTCPeerConnectionModal :
127+ _webrtc_request : Optional [WebRTCWorkerRequest ] = modal .parameter (default = None )
128+ _exec_session_started : Optional [datetime .datetime ] = modal .parameter (
129+ default = None
130+ )
131+ _exec_session_stopped : Optional [datetime .datetime ] = modal .parameter (
132+ default = None
133+ )
134+
121135 @modal .method ()
122136 def rtc_peer_connection_modal (
123137 self ,
124138 webrtc_request : WebRTCWorkerRequest ,
125139 q : modal .Queue ,
126140 ):
127- logger .info ("Received webrtc offer" )
141+ logger .info ("*** Spawning %s:" , self .__class__ .__name__ )
142+ logger .info (
143+ "webrtc_realtime_processing: %s" ,
144+ webrtc_request .webrtc_realtime_processing ,
145+ )
146+ logger .info ("stream_output: %s" , webrtc_request .stream_output )
147+ logger .info ("data_output: %s" , webrtc_request .data_output )
148+ logger .info ("declared_fps: %s" , webrtc_request .declared_fps )
149+ logger .info ("rtsp_url: %s" , webrtc_request .rtsp_url )
150+ logger .info ("processing_timeout: %s" , webrtc_request .processing_timeout )
151+ logger .info ("requested_plan: %s" , webrtc_request .requested_plan )
152+ logger .info ("requested_gpu: %s" , webrtc_request .requested_gpu )
153+ logger .info ("requested_region: %s" , webrtc_request .requested_region )
154+ logger .info (
155+ "ICE servers: %s" ,
156+ len (
157+ webrtc_request .webrtc_config .iceServers
158+ if webrtc_request .webrtc_config
159+ else []
160+ ),
161+ )
162+ self ._webrtc_request = webrtc_request
128163
129164 def send_answer (obj : WebRTCWorkerResult ):
130165 logger .info ("Sending webrtc answer" )
@@ -137,22 +172,56 @@ def send_answer(obj: WebRTCWorkerResult):
137172 )
138173 )
139174
175+ # https://modal.com/docs/reference/modal.enter
176+ # Modal usage calculation is relying on no concurrency and no hot instances
177+ @modal .enter ()
178+ def start (self ):
179+ self ._exec_session_started = datetime .datetime .now ()
180+
181+ @modal .exit ()
182+ def stop (self ):
183+ if not self ._webrtc_request :
184+ return
185+ self ._exec_session_stopped = datetime .datetime .now ()
186+ workflow_id = self ._webrtc_request .workflow_configuration .workflow_id
187+ if not workflow_id :
188+ if self ._webrtc_request .workflow_configuration .workflow_specification :
189+ workflow_id = usage_collector ._calculate_resource_hash (
190+ resource_details = self ._webrtc_request .workflow_configuration .workflow_specification
191+ )
192+ else :
193+ workflow_id = "unknown"
194+
195+ # requested plan is guaranteed to be set due to validation in spawn_rtc_peer_connection_modal
196+ webrtc_plan = self ._webrtc_request .requested_plan
197+
198+ usage_collector .record_usage (
199+ source = workflow_id ,
200+ category = "modal" ,
201+ api_key = self ._webrtc_request .api_key ,
202+ resource_details = {"plan" : webrtc_plan },
203+ execution_duration = (
204+ self ._exec_session_stopped - self ._exec_session_started
205+ ).total_seconds (),
206+ )
207+ usage_collector .push_usage_payloads ()
208+
140209 # Modal derives function name from class name
141210 # https://modal.com/docs/reference/modal.App#cls
142211 @app .cls (
143- ** {
144- ** decorator_kwargs ,
145- "enable_memory_snapshot" : True ,
146- }
212+ ** decorator_kwargs ,
147213 )
148214 class RTCPeerConnectionModalCPU (RTCPeerConnectionModal ):
149215 pass
150216
151217 @app .cls (
152218 ** {
153219 ** decorator_kwargs ,
154- "gpu" : WEBRTC_MODAL_FUNCTION_GPU ,
155- "experimental_options" : {"enable_gpu_snapshot" : True },
220+ "enable_memory_snapshot" : False ,
221+ "gpu" : WEBRTC_MODAL_FUNCTION_GPU , # https://modal.com/docs/guide/gpu#specifying-gpu-type
222+ "experimental_options" : {
223+ "enable_gpu_snapshot" : WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
224+ },
156225 }
157226 )
158227 class RTCPeerConnectionModalGPU (RTCPeerConnectionModal ):
@@ -161,6 +230,31 @@ class RTCPeerConnectionModalGPU(RTCPeerConnectionModal):
161230 def spawn_rtc_peer_connection_modal (
162231 webrtc_request : WebRTCWorkerRequest ,
163232 ) -> WebRTCWorkerResult :
233+ webrtc_plans : Optional [Dict [str , WebRTCPlan ]] = (
234+ usage_collector ._plan_details .get_webrtc_plans (
235+ api_key = webrtc_request .api_key
236+ )
237+ )
238+ if webrtc_plans and webrtc_request .requested_plan :
239+ if webrtc_request .requested_plan not in webrtc_plans :
240+ raise RoboflowAPIUnsuccessfulRequestError (
241+ f"Unknown requested plan { webrtc_request .requested_plan } "
242+ )
243+ webrtc_request .requested_gpu = webrtc_plans [
244+ webrtc_request .requested_plan
245+ ].gpu
246+ if (
247+ webrtc_plans
248+ and not webrtc_request .requested_plan
249+ and webrtc_request .requested_gpu
250+ ):
251+ gpu_to_plan = {v .gpu : k for k , v in webrtc_plans .items ()}
252+ if webrtc_request .requested_gpu not in gpu_to_plan :
253+ raise RoboflowAPIUnsuccessfulRequestError (
254+ f"Requested gpu { webrtc_request .requested_gpu } not associated with any plan"
255+ )
256+ webrtc_request .requested_plan = gpu_to_plan [webrtc_request .requested_gpu ]
257+
164258 # https://modal.com/docs/reference/modal.Client#from_credentials
165259 client = modal .Client .from_credentials (
166260 token_id = WEBRTC_MODAL_TOKEN_ID ,
@@ -186,27 +280,32 @@ def spawn_rtc_peer_connection_modal(
186280 )
187281 deployed_cls .hydrate (client = client )
188282 if webrtc_request .processing_timeout is None :
189- logger . warning ( "Spawning webrtc modal function without timeout" )
190- else :
191- logger .info (
192- "Spawning webrtc modal function with timeout %s" ,
193- webrtc_request .processing_timeout ,
194- )
283+ webrtc_request . processing_timeout = WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT
284+ logger . warning ( "No timeout specified, using max timeout" )
285+ logger .info (
286+ "Spawning webrtc modal function with timeout %s" ,
287+ webrtc_request .processing_timeout ,
288+ )
195289 # https://modal.com/docs/reference/modal.Cls#with_options
196290 cls_with_options = deployed_cls .with_options (
197291 timeout = webrtc_request .processing_timeout ,
198292 )
199- if (
200- webrtc_request .requested_gpu is not None
201- and webrtc_request .requested_gpu != WEBRTC_MODAL_FUNCTION_GPU
202- ):
203- logger .warning (
204- "Spawning webrtc modal function with custom gpu %s" ,
293+ if webrtc_request .requested_gpu is not None :
294+ logger .info (
295+ "Spawning webrtc modal function with gpu %s" ,
205296 webrtc_request .requested_gpu ,
206297 )
207298 cls_with_options = cls_with_options .with_options (
208299 gpu = webrtc_request .requested_gpu ,
209300 )
301+ if webrtc_request .requested_region :
302+ logger .info (
303+ "Spawning webrtc modal function with region %s" ,
304+ webrtc_request .requested_region ,
305+ )
306+ cls_with_options = cls_with_options .with_options (
307+ region = webrtc_request .requested_region ,
308+ )
210309 rtc_modal_obj : RTCPeerConnectionModal = cls_with_options ()
211310 # https://modal.com/docs/reference/modal.Queue#ephemeral
212311 with modal .Queue .ephemeral (client = client ) as q :
0 commit comments