Skip to content

Commit 15c8b17

Browse files
Merge pull request #1729 from roboflow/feat/modal-tags
Modal tags
2 parents 88adbdc + f3d4385 commit 15c8b17

File tree

5 files changed

+89
-51
lines changed

5 files changed

+89
-51
lines changed

inference/core/env.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,8 @@
709709
WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT = int(
710710
os.getenv("WEBRTC_MODAL_FUNCTION_MAX_TIME_LIMIT", "604800") # 7 days
711711
)
712+
# seconds
713+
WEBRTC_MODAL_SHUTDOWN_RESERVE = int(os.getenv("WEBRTC_MODAL_SHUTDOWN_RESERVE", "1"))
712714
WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT = str2bool(
713715
os.getenv("WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT", "True")
714716
)

inference/core/interfaces/webrtc_worker/entities.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
from enum import Enum
23
from typing import Any, Dict, List, Literal, Optional, Union
34

@@ -39,6 +40,7 @@ class WebRTCWorkerRequest(BaseModel):
3940
declared_fps: Optional[float] = None
4041
rtsp_url: Optional[str] = None
4142
processing_timeout: Optional[int] = WEBRTC_MODAL_FUNCTION_TIME_LIMIT
43+
processing_session_started: Optional[datetime.datetime] = None
4244
requested_plan: Optional[str] = "webrtc-gpu-small"
4345
# TODO: replaced with requested_plan
4446
requested_gpu: Optional[str] = None

inference/core/interfaces/webrtc_worker/modal.py

Lines changed: 75 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
MODELS_CACHE_AUTH_CACHE_MAX_SIZE,
1616
MODELS_CACHE_AUTH_CACHE_TTL,
1717
MODELS_CACHE_AUTH_ENABLED,
18+
PRELOAD_HF_IDS,
1819
PROJECT,
1920
ROBOFLOW_INTERNAL_SERVICE_SECRET,
2021
WEBRTC_MODAL_APP_NAME,
@@ -44,6 +45,7 @@
4445
from inference.core.interfaces.webrtc_worker.webrtc import (
4546
init_rtc_peer_connection_with_loop,
4647
)
48+
from inference.core.roboflow_api import get_roboflow_workspace
4749
from inference.core.version import __version__
4850
from inference.usage_tracking.collector import usage_collector
4951
from inference.usage_tracking.plan_details import WebRTCPlan
@@ -55,11 +57,10 @@
5557

5658

5759
if modal is not None:
60+
docker_tag: str = WEBRTC_MODAL_IMAGE_TAG if WEBRTC_MODAL_IMAGE_TAG else __version__
5861
# https://modal.com/docs/reference/modal.Image
5962
video_processing_image = (
60-
modal.Image.from_registry(
61-
f"{WEBRTC_MODAL_IMAGE_NAME}:{WEBRTC_MODAL_IMAGE_TAG if WEBRTC_MODAL_IMAGE_TAG else __version__}"
62-
)
63+
modal.Image.from_registry(f"{WEBRTC_MODAL_IMAGE_NAME}:{docker_tag}")
6364
.pip_install("modal")
6465
.entrypoint([])
6566
)
@@ -71,6 +72,7 @@
7172
app = modal.App(
7273
name=WEBRTC_MODAL_APP_NAME,
7374
image=video_processing_image,
75+
tags={"tag": docker_tag},
7476
)
7577

7678
decorator_kwargs = {
@@ -81,56 +83,51 @@
8183
"enable_memory_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT,
8284
"max_inputs": WEBRTC_MODAL_FUNCTION_MAX_INPUTS,
8385
"env": {
84-
"ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
85-
"ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
86-
"PROJECT": PROJECT,
87-
"LOG_LEVEL": LOG_LEVEL,
88-
"INTERNAL_WEIGHTS_URL_SUFFIX": INTERNAL_WEIGHTS_URL_SUFFIX,
89-
"MODELS_CACHE_AUTH_ENABLED": str(MODELS_CACHE_AUTH_ENABLED),
90-
"MODELS_CACHE_AUTH_CACHE_TTL": str(MODELS_CACHE_AUTH_CACHE_TTL),
91-
"MODELS_CACHE_AUTH_CACHE_MAX_SIZE": str(MODELS_CACHE_AUTH_CACHE_MAX_SIZE),
92-
"METRICS_ENABLED": "False",
9386
"ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS": str(
9487
ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS
9588
),
96-
"WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE": WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE,
89+
"ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES": "False",
90+
"DISABLE_INFERENCE_CACHE": "True",
91+
"DISABLE_VERSION_CHECK": "True",
92+
"HF_HOME": Path(MODEL_CACHE_DIR).joinpath("hf_home").as_posix(),
93+
"INTERNAL_WEIGHTS_URL_SUFFIX": INTERNAL_WEIGHTS_URL_SUFFIX,
94+
"METRICS_ENABLED": "False",
9795
"MODAL_TOKEN_ID": MODAL_TOKEN_ID,
9896
"MODAL_TOKEN_SECRET": MODAL_TOKEN_SECRET,
9997
"MODAL_WORKSPACE_NAME": MODAL_WORKSPACE_NAME,
100-
"ALLOW_WORKFLOW_BLOCKS_ACCESSING_ENVIRONMENTAL_VARIABLES": "False",
101-
"DISABLE_VERSION_CHECK": "True",
10298
"MODEL_CACHE_DIR": MODEL_CACHE_DIR,
103-
"HF_HOME": Path(MODEL_CACHE_DIR).joinpath("hf_home").as_posix(),
99+
"MODELS_CACHE_AUTH_CACHE_MAX_SIZE": str(MODELS_CACHE_AUTH_CACHE_MAX_SIZE),
100+
"MODELS_CACHE_AUTH_CACHE_TTL": str(MODELS_CACHE_AUTH_CACHE_TTL),
101+
"MODELS_CACHE_AUTH_ENABLED": str(MODELS_CACHE_AUTH_ENABLED),
102+
"LOG_LEVEL": LOG_LEVEL,
103+
"ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
104+
"PRELOAD_HF_IDS": PRELOAD_HF_IDS,
105+
"PROJECT": PROJECT,
106+
"ROBOFLOW_INTERNAL_SERVICE_NAME": WEBRTC_MODAL_ROBOFLOW_INTERNAL_SERVICE_NAME,
107+
"ROBOFLOW_INTERNAL_SERVICE_SECRET": ROBOFLOW_INTERNAL_SERVICE_SECRET,
108+
"WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE": WORKFLOWS_CUSTOM_PYTHON_EXECUTION_MODE,
104109
"TELEMETRY_USE_PERSISTENT_QUEUE": "False",
105-
"DISABLE_INFERENCE_CACHE": "True",
106-
"WEBRTC_MODAL_FUNCTION_GPU": WEBRTC_MODAL_FUNCTION_GPU,
107-
"WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW": str(
108-
WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
109-
),
110110
"WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS": str(
111111
WEBRTC_MODAL_FUNCTION_BUFFER_CONTAINERS
112112
),
113+
"WEBRTC_MODAL_FUNCTION_GPU": WEBRTC_MODAL_FUNCTION_GPU,
113114
"WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS": str(
114115
WEBRTC_MODAL_FUNCTION_MIN_CONTAINERS
115116
),
117+
"WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW": str(
118+
WEBRTC_MODAL_FUNCTION_SCALEDOWN_WINDOW
119+
),
116120
"WEBRTC_MODAL_FUNCTION_TIME_LIMIT": str(WEBRTC_MODAL_FUNCTION_TIME_LIMIT),
117121
"WEBRTC_MODAL_IMAGE_NAME": WEBRTC_MODAL_IMAGE_NAME,
118122
"WEBRTC_MODAL_IMAGE_TAG": WEBRTC_MODAL_IMAGE_TAG,
119123
"WEBRTC_MODAL_RTSP_PLACEHOLDER": WEBRTC_MODAL_RTSP_PLACEHOLDER,
120124
"WEBRTC_MODAL_RTSP_PLACEHOLDER_URL": WEBRTC_MODAL_RTSP_PLACEHOLDER_URL,
121-
"ONNXRUNTIME_EXECUTION_PROVIDERS": "[CUDAExecutionProvider,CPUExecutionProvider]",
122125
},
123126
"volumes": {MODEL_CACHE_DIR: rfcache_volume},
124127
}
125128

126129
class RTCPeerConnectionModal:
127130
_webrtc_request: Optional[WebRTCWorkerRequest] = modal.parameter(default=None)
128-
_exec_session_started: Optional[datetime.datetime] = modal.parameter(
129-
default=None
130-
)
131-
_exec_session_stopped: Optional[datetime.datetime] = modal.parameter(
132-
default=None
133-
)
134131

135132
@modal.method()
136133
def rtc_peer_connection_modal(
@@ -139,6 +136,12 @@ def rtc_peer_connection_modal(
139136
q: modal.Queue,
140137
):
141138
logger.info("*** Spawning %s:", self.__class__.__name__)
139+
logger.info("Inference tag: %s", docker_tag)
140+
_exec_session_started = datetime.datetime.now()
141+
webrtc_request.processing_session_started = _exec_session_started
142+
logger.info(
143+
"WebRTC session started at %s", _exec_session_started.isoformat()
144+
)
142145
logger.info(
143146
"webrtc_realtime_processing: %s",
144147
webrtc_request.webrtc_realtime_processing,
@@ -171,18 +174,11 @@ def send_answer(obj: WebRTCWorkerResult):
171174
send_answer=send_answer,
172175
)
173176
)
174-
175-
# https://modal.com/docs/reference/modal.enter
176-
# Modal usage calculation is relying on no concurrency and no hot instances
177-
@modal.enter()
178-
def start(self):
179-
self._exec_session_started = datetime.datetime.now()
180-
181-
@modal.exit()
182-
def stop(self):
183-
if not self._webrtc_request:
184-
return
185-
self._exec_session_stopped = datetime.datetime.now()
177+
_exec_session_stopped = datetime.datetime.now()
178+
logger.info(
179+
"WebRTC session stopped at %s",
180+
_exec_session_stopped.isoformat(),
181+
)
186182
workflow_id = self._webrtc_request.workflow_configuration.workflow_id
187183
if not workflow_id:
188184
if self._webrtc_request.workflow_configuration.workflow_specification:
@@ -195,16 +191,38 @@ def stop(self):
195191
# requested plan is guaranteed to be set due to validation in spawn_rtc_peer_connection_modal
196192
webrtc_plan = self._webrtc_request.requested_plan
197193

194+
video_source = "realtime browser stream"
195+
if self._webrtc_request.rtsp_url:
196+
video_source = "rtsp"
197+
elif not self._webrtc_request.webrtc_realtime_processing:
198+
video_source = "buffered browser stream"
199+
198200
usage_collector.record_usage(
199201
source=workflow_id,
200202
category="modal",
201203
api_key=self._webrtc_request.api_key,
202-
resource_details={"plan": webrtc_plan},
204+
resource_details={
205+
"plan": webrtc_plan,
206+
"billable": True,
207+
"video_source": video_source,
208+
},
203209
execution_duration=(
204-
self._exec_session_stopped - self._exec_session_started
210+
_exec_session_stopped - _exec_session_started
205211
).total_seconds(),
206212
)
207213
usage_collector.push_usage_payloads()
214+
logger.info("Function completed")
215+
216+
# https://modal.com/docs/reference/modal.enter
217+
# https://modal.com/docs/guide/memory-snapshot#gpu-memory-snapshot
218+
@modal.enter(snap=True)
219+
def start(self):
220+
# TODO: pre-load models
221+
logger.info("Starting container")
222+
223+
@modal.exit()
224+
def stop(self):
225+
logger.info("Stopping container")
208226

209227
# Modal derives function name from class name
210228
# https://modal.com/docs/reference/modal.App#cls
@@ -217,7 +235,6 @@ class RTCPeerConnectionModalCPU(RTCPeerConnectionModal):
217235
@app.cls(
218236
**{
219237
**decorator_kwargs,
220-
"enable_memory_snapshot": False,
221238
"gpu": WEBRTC_MODAL_FUNCTION_GPU, # https://modal.com/docs/guide/gpu#specifying-gpu-type
222239
"experimental_options": {
223240
"enable_gpu_snapshot": WEBRTC_MODAL_FUNCTION_ENABLE_MEMORY_SNAPSHOT
@@ -266,7 +283,21 @@ def spawn_rtc_peer_connection_modal(
266283
)
267284
except modal.exception.NotFoundError:
268285
logger.info("Deploying webrtc modal app %s", WEBRTC_MODAL_APP_NAME)
269-
app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client)
286+
app.deploy(name=WEBRTC_MODAL_APP_NAME, client=client, tag=docker_tag)
287+
288+
workspace_id = webrtc_request.workflow_configuration.workspace_name
289+
if not workspace_id:
290+
try:
291+
workspace_id = get_roboflow_workspace(api_key=webrtc_request.api_key)
292+
webrtc_request.workflow_configuration.workspace_name = workspace_id
293+
except Exception:
294+
pass
295+
296+
tags = {"tag": docker_tag}
297+
if workspace_id:
298+
tags["workspace_id"] = workspace_id
299+
300+
# TODO: tag function run
270301

271302
if webrtc_request.requested_gpu:
272303
RTCPeerConnectionModal = RTCPeerConnectionModalGPU

inference/core/interfaces/webrtc_worker/webrtc.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from inference.core.env import (
2424
WEBRTC_MODAL_RTSP_PLACEHOLDER,
2525
WEBRTC_MODAL_RTSP_PLACEHOLDER_URL,
26+
WEBRTC_MODAL_SHUTDOWN_RESERVE,
2627
)
2728
from inference.core.exceptions import (
2829
MissingApiKeyError,
@@ -465,22 +466,25 @@ async def init_rtc_peer_connection_with_loop(
465466
webrtc_request: WebRTCWorkerRequest,
466467
send_answer: Callable[[WebRTCWorkerResult], None],
467468
asyncio_loop: Optional[asyncio.AbstractEventLoop] = None,
469+
shutdown_reserve: int = WEBRTC_MODAL_SHUTDOWN_RESERVE,
468470
) -> RTCPeerConnectionWithLoop:
469471
termination_date = None
470472
terminate_event = asyncio.Event()
471473

472474
if webrtc_request.processing_timeout is not None:
473475
try:
474476
time_limit_seconds = int(webrtc_request.processing_timeout)
475-
datetime_now = datetime.datetime.now()
477+
datetime_now = webrtc_request.processing_session_started
478+
if datetime_now is None:
479+
datetime_now = datetime.datetime.now()
476480
termination_date = datetime_now + datetime.timedelta(
477-
seconds=time_limit_seconds - 1
481+
seconds=time_limit_seconds - shutdown_reserve
478482
)
479483
logger.info(
480484
"Setting termination date to %s (%s seconds from %s)",
481-
termination_date,
485+
termination_date.isoformat(),
482486
time_limit_seconds,
483-
datetime_now,
487+
datetime_now.isoformat(),
484488
)
485489
except (TypeError, ValueError):
486490
pass
@@ -653,7 +657,7 @@ def on_track(track: RemoteStreamTrack):
653657

654658
@peer_connection.on("connectionstatechange")
655659
async def on_connectionstatechange():
656-
logger.info("Connection state is %s", peer_connection.connectionState)
660+
logger.info("on_connectionstatechange: %s", peer_connection.connectionState)
657661
if peer_connection.connectionState in {"failed", "closed"}:
658662
if video_processor.track:
659663
logger.info("Stopping video processor track")
@@ -662,7 +666,6 @@ async def on_connectionstatechange():
662666
logger.info("Stopping WebRTC peer")
663667
await peer_connection.close()
664668
terminate_event.set()
665-
logger.info("'connectionstatechange' event handler finished")
666669

667670
@peer_connection.on("datachannel")
668671
def on_datachannel(channel: RTCDataChannel):

inference/core/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.61.0"
1+
__version__ = "0.61.1"
22

33

44
if __name__ == "__main__":

0 commit comments

Comments
 (0)