Skip to content

Commit c617216

Browse files
Merge pull request #1728 from roboflow/feature/inference-exp-jp-5
Feature/inference exp jp 5
2 parents d9dfeda + 0fc6cf1 commit c617216

File tree

17 files changed

+681
-57
lines changed

17 files changed

+681
-57
lines changed

.github/workflows/docker.jetson.6.2.0.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,14 @@ on:
1212
type: boolean
1313
description: "Do you want to push image after build?"
1414
default: false
15+
custom_tag:
16+
type: string
17+
description: "Custom tag to use for the image (overrides VERSION)"
18+
default: ""
1519

1620
env:
1721
VERSION: "0.0.0" # Default version, will be overwritten
22+
BASE_IMAGE: "roboflow/roboflow-inference-server-jetson-6.2.0"
1823

1924
jobs:
2025
docker:
@@ -35,13 +40,22 @@ jobs:
3540
uses: actions/checkout@v4
3641
- name: Read version from file
3742
run: echo "VERSION=$(DISABLE_VERSION_CHECK=true python ./inference/core/version.py)" >> $GITHUB_ENV
43+
- name: Determine Image Tags
44+
id: tags
45+
uses: ./.github/actions/determine-tags
46+
with:
47+
custom_tag: ${{ github.event.inputs.custom_tag }}
48+
version: ${{ env.VERSION }}
49+
base_image: ${{ env.BASE_IMAGE }}
50+
force_push: ${{ github.event.inputs.force_push }}
51+
token: ${{ secrets.GITHUB_TOKEN }}
3852
- name: Set up Depot CLI
3953
uses: depot/setup-action@v1
4054
- name: Build and Push
4155
uses: depot/build-push-action@v1
4256
with:
4357
push: ${{ github.event_name == 'release' || (github.event.inputs.force_push == 'true')}}
4458
project: grl7ffzxd7
45-
tags: roboflow/roboflow-inference-server-jetson-6.2.0:latest,roboflow/roboflow-inference-server-jetson-6.2.0:${{ env.VERSION}}
59+
tags: ${{ steps.tags.outputs.image_tags }}
4660
platforms: linux/arm64
4761
file: ./docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0

docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 AS builder
44

55
ARG DEBIAN_FRONTEND=noninteractive
6-
ARG CMAKE_VERSION=3.31.10
7-
ARG PYTORCH_VERSION=2.8.0
8-
ARG TORCHVISION_VERSION=0.23.0
6+
ARG CMAKE_VERSION=4.2.0
7+
ARG PYTORCH_VERSION=2.6.0
8+
ARG TORCHVISION_VERSION=0.21.0
99
ARG OPENCV_VERSION=4.10.0
1010
ARG ONNXRUNTIME_VERSION=1.20.0
1111
ENV LANG=en_US.UTF-8
@@ -94,6 +94,7 @@ RUN git clone --recursive --branch v${PYTORCH_VERSION} https://github.com/pytorc
9494
export PYTORCH_BUILD_VERSION=${PYTORCH_VERSION} PYTORCH_BUILD_NUMBER=1 && \
9595
export CMAKE_BUILD_TYPE=Release BUILD_SHARED_LIBS=ON USE_PRIORITIZED_TEXT_FOR_LD=1 && \
9696
export MAX_JOBS=12 && \
97+
export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
9798
python3 setup.py bdist_wheel && \
9899
python3 -m pip install dist/torch-*.whl
99100

@@ -113,6 +114,15 @@ ENV CUDA_HOME=/usr/local/cuda \
113114
PATH=/usr/local/cuda/bin:$PATH \
114115
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
115116

117+
# Install Tensorrt
118+
RUN apt remove -y 'libnvinfer*' 'libnvonnxparsers*' 'libnvparsers*' 'libnvinfer-plugin*' 'python3-libnvinfer*' 'tensorrt*'
119+
WORKDIR /build/tensorrt-10.x
120+
RUN wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/local_repo/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \
121+
dpkg -i nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \
122+
cp /var/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6/nv-tensorrt-local-tegra-C50F04B9-keyring.gpg /usr/share/keyrings/ && \
123+
apt-get update && \
124+
apt-get install -y tensorrt
125+
116126
# Build onnxruntime-gpu from source with TensorRT support
117127
WORKDIR /build/onnxruntime
118128
RUN git clone --recursive --branch v${ONNXRUNTIME_VERSION} https://github.com/microsoft/onnxruntime.git && \
@@ -178,6 +188,7 @@ RUN uv pip install --system --break-system-packages --index-strategy unsafe-best
178188
-r requirements.sdk.http.txt \
179189
-r requirements.easyocr.txt \
180190
-r requirements.jetson.txt \
191+
"pycuda>=2025.0.0,<2026.0.0" \
181192
"setuptools<=75.5.0" \
182193
packaging \
183194
&& rm -rf ~/.cache/uv
@@ -205,7 +216,6 @@ RUN ln -sf /usr/bin/python3 /usr/bin/python && \
205216
RUN cd /usr/local/lib/python3.10/dist-packages && \
206217
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
207218
rm -rf debugpy* jupyterlab* jupyter_* notebook* ipython* ipykernel* || true && \
208-
rm -rf torch/bin torch/include || true && \
209219
rm -rf onnx/backend/test onnx/test || true && \
210220
rm -rf scipy/*/tests pandas/tests || true && \
211221
rm -rf */examples */benchmarks */docs || true && \
@@ -285,6 +295,12 @@ RUN ldconfig
285295

286296
# Copy Python packages
287297
COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
298+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt /usr/local/lib/python3.10/dist-packages/tensorrt
299+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt-10.7.0.dist-info
300+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch
301+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch-10.7.0.dist-info
302+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean /usr/local/lib/python3.10/dist-packages/tensorrt_lean
303+
COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_lean-10.7.0.dist-info
288304
COPY --from=builder /usr/local/bin/inference /usr/local/bin/inference
289305

290306
ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:$PYTHONPATH
@@ -295,6 +311,8 @@ COPY inference_cli inference_cli
295311
COPY inference_sdk inference_sdk
296312
COPY docker/config/gpu_http.py gpu_http.py
297313

314+
RUN python -m pip uninstall -y boto3 botocore && python -m pip install "boto3>=1.40.0,<=1.41.5" "botocore>=1.40.0,<=1.41.5"
315+
298316
# Environment variables
299317
ENV VERSION_CHECK_MODE=once \
300318
CORE_MODEL_SAM2_ENABLED=True \
@@ -306,14 +324,14 @@ ENV VERSION_CHECK_MODE=once \
306324
ORT_TENSORRT_ENGINE_CACHE_PATH=/tmp/ort_cache \
307325
ORT_TENSORRT_MAX_WORKSPACE_SIZE=4294967296 \
308326
ORT_TENSORRT_BUILDER_OPTIMIZATION_LEVEL=5 \
309-
ONNXRUNTIME_EXECUTION_PROVIDERS=[TensorrtExecutionProvider] \
310-
REQUIRED_ONNX_PROVIDERS=TensorrtExecutionProvider \
311327
OPENBLAS_CORETYPE=ARMV8 \
312328
LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \
313329
WORKFLOWS_STEP_EXECUTION_MODE=local \
314330
WORKFLOWS_MAX_CONCURRENT_STEPS=4 \
315331
API_LOGGING_ENABLED=True \
316-
DISABLE_WORKFLOW_ENDPOINTS=false
332+
DISABLE_WORKFLOW_ENDPOINTS=false \
333+
ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \
334+
USE_INFERENCE_EXP_MODELS=False
317335

318336
LABEL org.opencontainers.image.description="Inference Server - Jetson 6.2.0 (PyTorch from source, numpy 2.x)"
319337

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
FROM roboflow/l4t-ml:r35.2.1-py3.12-cu118-trt-10-v0.0.1
2+
3+
COPY requirements/requirements.clip.txt \
4+
requirements/requirements.http.txt \
5+
requirements/requirements.doctr.txt \
6+
requirements/requirements.groundingdino.txt \
7+
requirements/requirements.sdk.http.txt \
8+
requirements/requirements.yolo_world.txt \
9+
requirements/_requirements.txt \
10+
requirements/requirements.easyocr.txt \
11+
requirements/requirements.gpu.txt \
12+
./
13+
14+
RUN python -m pip install \
15+
-r _requirements.txt \
16+
-r requirements.clip.txt \
17+
-r requirements.http.txt \
18+
-r requirements.doctr.txt \
19+
-r requirements.groundingdino.txt \
20+
-r requirements.sdk.http.txt \
21+
-r requirements.yolo_world.txt \
22+
-r requirements.easyocr.txt \
23+
-r requirements.gpu.txt \
24+
"pycuda>=2025.0.0,<2026.0.0"
25+
26+
27+
WORKDIR /app/
28+
COPY inference inference
29+
COPY inference_cli inference_cli
30+
COPY inference_sdk inference_sdk
31+
COPY docker/config/gpu_http.py gpu_http.py
32+
COPY .release .release
33+
COPY requirements requirements
34+
COPY Makefile Makefile
35+
36+
RUN make create_inference_cli_whl PYTHON=python3.12
37+
RUN python -m pip install dist/inference_cli*.whl
38+
39+
ENV VERSION_CHECK_MODE=continuous \
40+
PROJECT=roboflow-platform \
41+
ORT_TENSORRT_FP16_ENABLE=1 \
42+
ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \
43+
CORE_MODEL_SAM_ENABLED=False \
44+
PROJECT=roboflow-platform \
45+
NUM_WORKERS=1 \
46+
HOST=0.0.0.0 \
47+
PORT=9001 \
48+
OPENBLAS_CORETYPE=ARMV8 \
49+
LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \
50+
WORKFLOWS_STEP_EXECUTION_MODE=local \
51+
WORKFLOWS_MAX_CONCURRENT_STEPS=2 \
52+
API_LOGGING_ENABLED=True \
53+
CORE_MODEL_TROCR_ENABLED=false \
54+
RUNS_ON_JETSON=True \
55+
ENABLE_PROMETHEUS=True \
56+
ENABLE_STREAM_API=True \
57+
STREAM_API_PRELOADED_PROCESSES=2 \
58+
PYTHONPATH=/app:$PYTHONPATH
59+
ENV CORE_MODEL_SAM3_ENABLED=False \
60+
ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \
61+
USE_INFERENCE_EXP_MODELS=False
62+
63+
ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT

inference/core/env.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,9 @@
208208

209209
# Enable experimental RFDETR backend (inference_exp) rollout, default is True
210210
USE_INFERENCE_EXP_MODELS = str2bool(os.getenv("USE_INFERENCE_EXP_MODELS", "False"))
211+
ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS = str2bool(
212+
os.getenv("ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS", "False")
213+
)
211214

212215
# ID of host device, default is None
213216
DEVICE_ID = os.getenv("DEVICE_ID", None)

inference/core/models/exp_adapter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
ObjectDetectionInferenceResponse,
1616
ObjectDetectionPrediction,
1717
)
18-
from inference.core.env import API_KEY
18+
from inference.core.env import ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS, API_KEY
1919
from inference.core.logger import logger
2020
from inference.core.models.base import Model
2121
from inference.core.utils.image_utils import load_image_rgb
@@ -37,7 +37,10 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
3737
from inference_exp import AutoModel # type: ignore
3838

3939
self._exp_model: ObjectDetectionModel = AutoModel.from_pretrained(
40-
model_id_or_path=model_id, api_key=self.api_key
40+
model_id_or_path=model_id,
41+
api_key=self.api_key,
42+
allow_untrusted_packages=ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS,
43+
allow_direct_local_storage_loading=False,
4144
)
4245
# if hasattr(self._exp_model, "optimize_for_inference"):
4346
# self._exp_model.optimize_for_inference()

inference_cli/benchmark.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from inference_cli.lib.benchmark.dataset import PREDEFINED_DATASETS
88
from inference_cli.lib.benchmark_adapter import (
99
run_infer_api_speed_benchmark,
10+
run_inference_experimental_benchmark,
1011
run_python_package_speed_benchmark,
1112
run_workflow_api_speed_benchmark,
1213
)
@@ -269,5 +270,109 @@ def python_package_speed(
269270
raise typer.Exit(code=1)
270271

271272

273+
@benchmark_app.command(
274+
help="This command provides a benchmark of inference-exp package. Currently, support for this feature "
275+
"is experimental."
276+
)
277+
def inference_experimental_speed(
278+
model_id: Annotated[
279+
str,
280+
typer.Option(
281+
"--model_id",
282+
"-m",
283+
help="Model ID in format project/version.",
284+
),
285+
],
286+
dataset_reference: Annotated[
287+
str,
288+
typer.Option(
289+
"--dataset_reference",
290+
"-d",
291+
help=f"Name of predefined dataset (one of {list(PREDEFINED_DATASETS.keys())}) or path to directory with images",
292+
),
293+
] = "coco",
294+
warm_up_inferences: Annotated[
295+
int,
296+
typer.Option("--warm_up_inferences", "-wi", help="Number of warm-up requests"),
297+
] = 10,
298+
benchmark_inferences: Annotated[
299+
int,
300+
typer.Option(
301+
"--benchmark_requests", "-bi", help="Number of benchmark requests"
302+
),
303+
] = 1000,
304+
batch_size: Annotated[
305+
int,
306+
typer.Option("--batch_size", "-bs", help="Batch size of single request"),
307+
] = 1,
308+
api_key: Annotated[
309+
Optional[str],
310+
typer.Option(
311+
"--api-key",
312+
"-a",
313+
help="Roboflow API key for your workspace. If not given - env variable `ROBOFLOW_API_KEY` will be used",
314+
),
315+
] = None,
316+
model_configuration: Annotated[
317+
Optional[str],
318+
typer.Option(
319+
"--model_config", "-mc", help="Location of yaml file with model config"
320+
),
321+
] = None,
322+
output_location: Annotated[
323+
Optional[str],
324+
typer.Option(
325+
"--output_location",
326+
"-o",
327+
help="Location where to save the result (path to file or directory)",
328+
),
329+
] = None,
330+
model_package_id: Annotated[
331+
Optional[str],
332+
typer.Option(
333+
"--model_package_id",
334+
"-o",
335+
help="Selected model package ID (leave blank to run auto-negotiation)",
336+
),
337+
] = None,
338+
turn_images_to_tensors: Annotated[
339+
bool,
340+
typer.Option(
341+
"--images-as-tensors/--no-images-as-tensors",
342+
help="Boolean flag to decide if input images are to be loaded as tensors on the device that model "
343+
"is running, or should be left as np.arrays.",
344+
),
345+
] = True,
346+
allow_untrusted_packages: Annotated[
347+
bool,
348+
typer.Option(
349+
"--allow-untrusted-packages/--no-allow-untrusted-packages",
350+
help="Boolean flag to decide if untrusted packages (for example the ones registered by clients) are "
351+
"allowed to be loaded.",
352+
),
353+
] = True,
354+
):
355+
try:
356+
run_inference_experimental_benchmark(
357+
model_id=model_id,
358+
dataset_reference=dataset_reference,
359+
warm_up_inferences=warm_up_inferences,
360+
benchmark_inferences=benchmark_inferences,
361+
batch_size=batch_size,
362+
api_key=api_key,
363+
model_configuration=model_configuration,
364+
output_location=output_location,
365+
model_package_id=model_package_id,
366+
turn_images_to_tensors=turn_images_to_tensors,
367+
allow_untrusted_packages=allow_untrusted_packages,
368+
)
369+
except KeyboardInterrupt:
370+
print("Benchmark interrupted.")
371+
return
372+
except Exception as error:
373+
typer.echo(f"Command failed. Cause: {error}")
374+
raise typer.Exit(code=1)
375+
376+
272377
if __name__ == "__main__":
273378
benchmark_app()

0 commit comments

Comments
 (0)