roboflow
diff --git a/‎inference/core/env.py‎
Lines changed: 4 additions & 0 deletions b/‎inference/core/env.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎inference/core/interfaces/webrtc_worker/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/entities.py‎
Lines changed: 17 additions & 15 deletions b/‎inference/core/interfaces/webrtc_worker/entities.py‎
Lines changed: 17 additions & 15 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/modal.py‎
Lines changed: 118 additions & 19 deletions b/‎inference/core/interfaces/webrtc_worker/modal.py‎
Lines changed: 118 additions & 19 deletions
diff --git a/‎inference/core/interfaces/webrtc_worker/webrtc.py‎
Lines changed: 29 additions & 15 deletions b/‎inference/core/interfaces/webrtc_worker/webrtc.py‎
Lines changed: 29 additions & 15 deletions
diff --git a/‎inference/core/version.py‎
Lines changed: 1 addition & 1 deletion b/‎inference/core/version.py‎
Lines changed: 1 addition & 1 deletion
@@ -705,6 +705,10 @@
 WEBRTC_MODAL_FUNCTION_TIME_LIMIT = int(
     os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "3600")
 )
+# seconds
+WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT = int(
+    os.getenv("WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT", "604800")  # 7 days
+)
 WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(
     os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")
 )
 
@@ -4,6 +4,8 @@
 from inference.core.env import WEBRTC_MODAL_TOKEN_ID, WEBRTC_MODAL_TOKEN_SECRET
 from inference.core.interfaces.webrtc_worker.cpu import rtc_peer_connection_process
 from inference.core.interfaces.webrtc_worker.entities import (
+    RTCIceServer,
+    WebRTCConfig,
     WebRTCWorkerRequest,
     WebRTCWorkerResult,
 )
@@ -12,6 +14,17 @@
 async def start_worker(
     webrtc_request: WebRTCWorkerRequest,
 ) -> WebRTCWorkerResult:
+    if webrtc_request.webrtc_turn_config:
+        webrtc_request.webrtc_config = WebRTCConfig(
+            iceServers=[
+                RTCIceServer(
+                    urls=[webrtc_request.webrtc_turn_config.urls],
+                    username=webrtc_request.webrtc_turn_config.username,
+                    credential=webrtc_request.webrtc_turn_config.credential,
+                )
+            ]
+        )
+
     if WEBRTC_MODAL_TOKEN_ID and WEBRTC_MODAL_TOKEN_SECRET:
         try:
             from inference.core.interfaces.webrtc_worker.modal import (
 
@@ -14,10 +14,22 @@
 )
 
 
+class RTCIceServer(BaseModel):
+    urls: List[str]
+    username: Optional[str] = None
+    credential: Optional[str] = None
+
+
+class WebRTCConfig(BaseModel):
+    iceServers: List[RTCIceServer]
+
+
 class WebRTCWorkerRequest(BaseModel):
     api_key: Optional[str] = None
     workflow_configuration: WorkflowConfiguration
     webrtc_offer: WebRTCOffer
+    webrtc_config: Optional[WebRTCConfig] = None
+    # TODO: to be removed, replaced with webrtc_config
     webrtc_turn_config: Optional[WebRTCTURNConfig] = None
     webrtc_realtime_processing: bool = (
         WEBRTC_REALTIME_PROCESSING  # when set to True, MediaRelay.subscribe will be called with buffered=False
@@ -27,21 +39,11 @@ class WebRTCWorkerRequest(BaseModel):
     declared_fps: Optional[float] = None
     rtsp_url: Optional[str] = None
     processing_timeout: Optional[int] = WEBRTC_MODAL_FUNCTION_TIME_LIMIT
-    # https://modal.com/docs/guide/gpu#specifying-gpu-type
-    requested_gpu: Optional[
-        Literal[
-            "T4",
-            "L4",
-            "A10",
-            "A100",
-            "A100-40GB",
-            "A100-80GB",
-            "L40S",
-            "H100/H100!",
-            "H200",
-            "B200",
-        ]
-    ] = "T4"
+    requested_plan: Optional[str] = "webrtc-gpu-small"
+    # TODO: replaced with requested_plan
+    requested_gpu: Optional[str] = None
+    # must be valid region: https://modal.com/docs/guide/region-selection#region-options
+    requested_region: Optional[str] = None
 
 
 class WebRTCVideoMetadata(BaseModel):
 
@@ -1,5 +1,7 @@
 import asyncio
+import datetime
 from pathlib import Path
+from typing import Dict, Optional
 
 from inference.core import logger
 from inference.core.env import (
@@ -20,6 +22,7 @@
     WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
     WEBRTC_MODAL_FUNCTION_GPU,
     WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
+    WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT,
     WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS,
     WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW,
     WEBRTC_MODAL_FUNCTION_TIME_LIMIT,
@@ -33,6 +36,7 @@
     WEBRTC_MODAL_TOKEN_SECRET,
     WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE,
 )
+from inference.core.exceptions import RoboflowAPIUnsuccessfulRequestError
 from inference.core.interfaces.webrtc_worker.entities import (
     WebRTCWorkerRequest,
     WebRTCWorkerResult,
@@ -41,6 +45,8 @@
     init_rtc_peer_connection_with_loop,
 )
 from inference.core.version import __version__
+from inference.usage_tracking.collector import usage_collector
+from inference.usage_tracking.plan_details import WebRTCPlan
 
 try:
     import modal
@@ -118,13 +124,42 @@
     }
 
     class RTCPeerConnectionModal:
+        _webrtc_request: Optional[WebRTCWorkerRequest] = modal.parameter(default=None)
+        _exec_session_started: Optional[datetime.datetime] = modal.parameter(
+            default=None
+        )
+        _exec_session_stopped: Optional[datetime.datetime] = modal.parameter(
+            default=None
+        )
+
         @modal.method()
         def rtc_peer_connection_modal(
             self,
             webrtc_request: WebRTCWorkerRequest,
             q: modal.Queue,
         ):
-            logger.info("Received webrtc offer")
+            logger.info("*** Spawning %s:", self.__class__.__name__)
+            logger.info(
+                "webrtc_realtime_processing: %s",
+                webrtc_request.webrtc_realtime_processing,
+            )
+            logger.info("stream_output: %s", webrtc_request.stream_output)
+            logger.info("data_output: %s", webrtc_request.data_output)
+            logger.info("declared_fps: %s", webrtc_request.declared_fps)
+            logger.info("rtsp_url: %s", webrtc_request.rtsp_url)
+            logger.info("processing_timeout: %s", webrtc_request.processing_timeout)
+            logger.info("requested_plan: %s", webrtc_request.requested_plan)
+            logger.info("requested_gpu: %s", webrtc_request.requested_gpu)
+            logger.info("requested_region: %s", webrtc_request.requested_region)
+            logger.info(
+                "ICE servers: %s",
+                len(
+                    webrtc_request.webrtc_config.iceServers
+                    if webrtc_request.webrtc_config
+                    else []
+                ),
+            )
+            self._webrtc_request = webrtc_request
 
             def send_answer(obj: WebRTCWorkerResult):
                 logger.info("Sending webrtc answer")
@@ -137,22 +172,56 @@ def send_answer(obj: WebRTCWorkerResult):
                 )
             )
 
+        # https://modal.com/docs/reference/modal.enter
+        # Modal usage calculation is relying on no concurrency and no hot instances
+        @modal.enter()
+        def start(self):
+            self._exec_session_started = datetime.datetime.now()
+
+        @modal.exit()
+        def stop(self):
+            if not self._webrtc_request:
+                return
+            self._exec_session_stopped = datetime.datetime.now()
+            workflow_id = self._webrtc_request.workflow_configuration.workflow_id
+            if not workflow_id:
+                if self._webrtc_request.workflow_configuration.workflow_specification:
+                    workflow_id = usage_collector._calculate_resource_hash(
+                        resource_details=self._webrtc_request.workflow_configuration.workflow_specification
+                    )
+                else:
+                    workflow_id = "unknown"
+
+            # requested plan is guaranteed to be set due to validation in spawn_rtc_peer_connection_modal
+            webrtc_plan = self._webrtc_request.requested_plan
+
+            usage_collector.record_usage(
+                source=workflow_id,
+                category="modal",
+                api_key=self._webrtc_request.api_key,
+                resource_details={"plan": webrtc_plan},
+                execution_duration=(
+                    self._exec_session_stopped - self._exec_session_started
+                ).total_seconds(),
+            )
+            usage_collector.push_usage_payloads()
+
     # Modal derives function name from class name
     # https://modal.com/docs/reference/modal.App#cls
     @app.cls(
-        **{
-            **decorator_kwargs,
-            "enable_memory_snapshot": True,
-        }
+        **decorator_kwargs,
     )
     class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
         pass
 
     @app.cls(
         **{
             **decorator_kwargs,
-            "gpu": WEBRTC_MODAL_FUNCTION_GPU,
-            "experimental_options": {"enable_gpu_snapshot": True},
+            "enable_memory_snapshot": False,
+            "gpu": WEBRTC_MODAL_FUNCTION_GPU,  # https://modal.com/docs/guide/gpu#specifying-gpu-type
+            "experimental_options": {
+                "enable_gpu_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
+            },
         }
     )
     class RTCPeerConnectionModalGPU(RTCPeerConnectionModal):
@@ -161,6 +230,31 @@ class RTCPeerConnectionModalGPU(RTCPeerConnectionModal):
     def spawn_rtc_peer_connection_modal(
         webrtc_request: WebRTCWorkerRequest,
     ) -> WebRTCWorkerResult:
+        webrtc_plans: Optional[Dict[str, WebRTCPlan]] = (
+            usage_collector._plan_details.get_webrtc_plans(
+                api_key=webrtc_request.api_key
+            )
+        )
+        if webrtc_plans and webrtc_request.requested_plan:
+            if webrtc_request.requested_plan not in webrtc_plans:
+                raise RoboflowAPIUnsuccessfulRequestError(
+                    f"Unknown requested plan {webrtc_request.requested_plan}"
+                )
+            webrtc_request.requested_gpu = webrtc_plans[
+                webrtc_request.requested_plan
+            ].gpu
+        if (
+            webrtc_plans
+            and not webrtc_request.requested_plan
+            and webrtc_request.requested_gpu
+        ):
+            gpu_to_plan = {v.gpu: k for k, v in webrtc_plans.items()}
+            if webrtc_request.requested_gpu not in gpu_to_plan:
+                raise RoboflowAPIUnsuccessfulRequestError(
+                    f"Requested gpu {webrtc_request.requested_gpu} not associated with any plan"
+                )
+            webrtc_request.requested_plan = gpu_to_plan[webrtc_request.requested_gpu]
+
         # https://modal.com/docs/reference/modal.Client#from_credentials
         client = modal.Client.from_credentials(
             token_id=WEBRTC_MODAL_TOKEN_ID,
@@ -186,27 +280,32 @@ def spawn_rtc_peer_connection_modal(
         )
         deployed_cls.hydrate(client=client)
         if webrtc_request.processing_timeout is None:
-            logger.warning("Spawning webrtc modal function without timeout")
-        else:
-            logger.info(
-                "Spawning webrtc modal function with timeout %s",
-                webrtc_request.processing_timeout,
-            )
+            webrtc_request.processing_timeout = WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT
+            logger.warning("No timeout specified, using max timeout")
+        logger.info(
+            "Spawning webrtc modal function with timeout %s",
+            webrtc_request.processing_timeout,
+        )
         # https://modal.com/docs/reference/modal.Cls#with_options
         cls_with_options = deployed_cls.with_options(
             timeout=webrtc_request.processing_timeout,
         )
-        if (
-            webrtc_request.requested_gpu is not None
-            and webrtc_request.requested_gpu != WEBRTC_MODAL_FUNCTION_GPU
-        ):
-            logger.warning(
-                "Spawning webrtc modal function with custom gpu %s",
+        if webrtc_request.requested_gpu is not None:
+            logger.info(
+                "Spawning webrtc modal function with gpu %s",
                 webrtc_request.requested_gpu,
             )
             cls_with_options = cls_with_options.with_options(
                 gpu=webrtc_request.requested_gpu,
             )
+        if webrtc_request.requested_region:
+            logger.info(
+                "Spawning webrtc modal function with region %s",
+                webrtc_request.requested_region,
+            )
+            cls_with_options = cls_with_options.with_options(
+                region=webrtc_request.requested_region,
+            )
         rtc_modal_obj: RTCPeerConnectionModal = cls_with_options()
         # https://modal.com/docs/reference/modal.Queue#ephemeral
         with modal.Queue.ephemeral(client=client) as q:
 
@@ -446,12 +446,18 @@ async def _wait_ice_complete(peer_connection: RTCPeerConnectionWithLoop, timeout
 
     @peer_connection.on("icegatheringstatechange")
     def _():
+        logger.info(
+            "ICE gathering state changed to %s", peer_connection.iceGatheringState
+        )
         if not fut.done() and peer_connection.iceGatheringState == "complete":
             fut.set_result(True)
 
     try:
+        logger.info("Waiting for ICE gathering to complete...")
         await asyncio.wait_for(fut, timeout)
+        logger.info("ICE gathering completed")
     except asyncio.TimeoutError:
+        logger.info("ICE gathering did not complete in %s seconds", timeout)
         pass
 
 
@@ -466,10 +472,16 @@ async def init_rtc_peer_connection_with_loop(
     if webrtc_request.processing_timeout is not None:
         try:
             time_limit_seconds = int(webrtc_request.processing_timeout)
-            termination_date = datetime.datetime.now() + datetime.timedelta(
+            datetime_now = datetime.datetime.now()
+            termination_date = datetime_now + datetime.timedelta(
                 seconds=time_limit_seconds - 1
             )
-            logger.info("Setting termination date to %s", termination_date)
+            logger.info(
+                "Setting termination date to %s (%s seconds from %s)",
+                termination_date,
+                time_limit_seconds,
+                datetime_now,
+            )
         except (TypeError, ValueError):
             pass
     if webrtc_request.stream_output is None:
@@ -578,20 +590,22 @@ async def init_rtc_peer_connection_with_loop(
         )
         return
 
-    if webrtc_request.webrtc_turn_config:
-        turn_server = RTCIceServer(
-            urls=[webrtc_request.webrtc_turn_config.urls],
-            username=webrtc_request.webrtc_turn_config.username,
-            credential=webrtc_request.webrtc_turn_config.credential,
-        )
-        peer_connection = RTCPeerConnectionWithLoop(
-            configuration=RTCConfiguration(iceServers=[turn_server]),
-            asyncio_loop=asyncio_loop,
-        )
+    if webrtc_request.webrtc_config is not None:
+        ice_servers = []
+        for ice_server in webrtc_request.webrtc_config.iceServers:
+            ice_servers.append(
+                RTCIceServer(
+                    urls=ice_server.urls,
+                    username=ice_server.username,
+                    credential=ice_server.credential,
+                )
+            )
     else:
-        peer_connection = RTCPeerConnectionWithLoop(
-            asyncio_loop=asyncio_loop,
-        )
+        ice_servers = None
+    peer_connection = RTCPeerConnectionWithLoop(
+        configuration=RTCConfiguration(iceServers=ice_servers) if ice_servers else None,
+        asyncio_loop=asyncio_loop,
+    )
 
     relay = MediaRelay()
 
 
@@ -1,4 +1,4 @@
-__version__ = "0.60.1rc2"
+__version__ = "0.60.1"
 
 
 if __name__ == "__main__":
Original file line number	Diff line number	Diff line change
`@@ -705,6 +705,10 @@`
`705`	`705`	`WEBRTC_MODAL_FUNCTION_TIME_LIMIT = int(`
`706`	`706`	`os.getenv("WEBRTC_MODAL_FUNCTION_TIME_LIMIT", "3600")`
`707`	`707`	`)`
	`708`	`+# seconds`
	`709`	`+WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT = int(`
	`710`	`+ os.getenv("WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT", "604800") # 7 days`
	`711`	`+)`
`708`	`712`	`WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(`
`709`	`713`	`os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")`
`710`	`714`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.60.1rc2"`
	`1`	`+__version__ = "0.60.1"`
`2`	`2`
`3`	`3`
`4`	`4`	`if __name__ == "__main__":`