NVIDIA
diff --git a/‎examples/rl/environment_configs/countdown.yaml‎
Lines changed: 3 additions & 0 deletions b/‎examples/rl/environment_configs/countdown.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/functional_tests/python_test_utils/test_grpo_training_loop.py‎
Lines changed: 58 additions & 0 deletions b/‎tests/functional_tests/python_test_utils/test_grpo_training_loop.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎tests/functional_tests/shell_test_utils/run_ci_test.sh‎
Lines changed: 18 additions & 0 deletions b/‎tests/functional_tests/shell_test_utils/run_ci_test.sh‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json‎
Lines changed: 287 additions & 0 deletions b/‎tests/functional_tests/test_cases/gpt/gpt_grpo_tp1_pp1_dp8_583m_throughputtest/golden_values_dev_dgx_h100.json‎
Lines changed: 287 additions & 0 deletions
@@ -0,0 +1,3 @@
+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent
+  agent_args: {}
+  weight: 1.0
@@ -85,6 +85,7 @@ dev = [
     "wget",
     "onnxscript",
     "fastapi~=0.50",                                          # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
+    "datasets",
 ]
 
 lts = [
@@ -103,6 +104,7 @@ lts = [
     "wget",
     "onnxscript",
     "fastapi~=0.50",                      # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0
+    "datasets",
 ]
 
 [dependency-groups]
 
@@ -0,0 +1,58 @@
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+import json
+import logging
+import math
+from statistics import median
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+def test_grpo_training_loop(golden_values_path: str, test_values_path: str) -> None:
+
+    with open(golden_values_path, 'r') as f1, open(test_values_path, 'r') as f2:
+        golden_values_content = f1.read()
+        tensorboard_content = f2.read()
+
+    output_groundtruth = json.loads(golden_values_content)
+
+    if isinstance(output_groundtruth, str):
+        # Handle JSONL output, assume only one line in this case.
+        output_groundtruth = json.loads(output_groundtruth)
+
+    output_current = json.loads(tensorboard_content)
+    if isinstance(output_current, str):
+        # Handle JSONL output, assume only one line in this case.
+        output_current = json.loads(output_current)
+
+    assert set(output_groundtruth.keys()).issuperset(
+        set(output_current.keys())
+    ), f"Some IDs from groundtruth are missing in current: {output_groundtruth.keys()} vs {output_current.keys()}"
+    if set(output_groundtruth.keys()) != set(output_current.keys()):
+        logger.warning(
+            f"Some IDs from groundtruth are missing in output, only the subset of ids in groundtruth will be tested: {output_groundtruth.keys()} vs {output_current.keys()}"
+        )
+    assert len(output_groundtruth) > 0, "No test performed for output"
+
+    if "iteration-time" in output_groundtruth.keys():
+
+        # First warmup iteration is excluded from iteration-time statistics.
+        iteration_time_sampled = median(
+            [l for l in output_current["iteration-time"]['values'].values()][1:]
+        )
+        iteration_time_golden = median(
+            [l for l in output_groundtruth["iteration-time"]['values'].values()][1:]
+        )
+
+        # 10% is empirically observed to be within hardware variance.
+        assert (
+            0.9 * iteration_time_golden <= iteration_time_sampled <= 1.2 * iteration_time_golden
+        ), (
+            f"Iteration time {iteration_time_sampled} ms not within 10% below or 20% above "
+            f"golden value ~{iteration_time_golden} ms. "
+            f"Sampled: {output_current['iteration-time']} ms. "
+            f"Please update golden values in the functional tests if this is expected."
+        )
+
+        output_groundtruth.pop('iteration-time')
@@ -314,6 +314,24 @@ for i in $(seq 1 $N_REPEAT); do
             fi
         fi
 
+        # For rl jobs
+        if [[ "$MODE" == "rl" && ("$TRAINING_EXIT_CODE" -eq 0 || "$TEST_TYPE" == "release") ]]; then
+            if [[ "$TEST_TYPE" == "frozen-start" ]]; then
+                TRAIN_ITERS=$(cat $TRAINING_PARAMS_PATH |
+                    /usr/local/bin/yq '.MODEL_ARGS."--exit-interval" // "50"')
+                uv run --no-sync python $ROOT_DIR/tests/functional_tests/python_test_utils/get_test_results_from_tensorboard_logs.py \
+                    --logs-dir $TENSORBOARD_PATH \
+                    --train-iters $TRAIN_ITERS \
+                    --output-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
+                    "${EXTRACT_ARGS[@]}"
+                uv run --no-sync pytest -s -o log_cli=true --log-cli-level=info $ROOT_DIR/tests/functional_tests/python_test_utils/test_grpo_training_loop.py \
+                    --golden-values-path $GOLDEN_VALUES_PATH \
+                    --test-values-path ${OUTPUT_PATH}/$(basename $GOLDEN_VALUES_PATH) \
+                    --model-config-path ${TRAINING_PARAMS_PATH} \
+                    $ALLOW_NONDETERMINISTIC_ALGO_ARG
+            fi
+        fi
+
         # Abort if training failed
         if [[ "$TRAINING_EXIT_CODE" -ne 0 && "$TEST_TYPE" != "release" ]]; then
             echo "Training failed. Aborting."
 
@@ -0,0 +1,287 @@
+{
+    "lm loss": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 0.0,
+            "2": 0.04415,
+            "3": 0.0378,
+            "4": 0.02944,
+            "5": 0.0,
+            "6": 0.0,
+            "7": 0.0,
+            "8": 0.08111,
+            "9": 0.0,
+            "10": 0.0,
+            "11": 0.0,
+            "12": 0.0,
+            "13": 0.0,
+            "14": 0.05935,
+            "15": 0.0,
+            "16": 0.05496,
+            "17": 0.0,
+            "18": 0.0,
+            "19": 0.0,
+            "20": 0.04534,
+            "21": 0.0,
+            "22": 0.0,
+            "23": 0.0,
+            "24": 0.0,
+            "25": 0.0,
+            "26": 0.0,
+            "27": 0.0,
+            "28": 0.0,
+            "29": 0.0,
+            "30": 0.0,
+            "31": 0.0,
+            "32": 0.0,
+            "33": 0.0,
+            "34": 0.0,
+            "35": 0.0,
+            "36": 0.0,
+            "37": 0.0099,
+            "38": 0.0,
+            "39": 0.0,
+            "40": 0.0,
+            "41": 0.03221,
+            "42": 0.0,
+            "43": 0.0,
+            "44": 0.0,
+            "45": 0.0,
+            "46": 0.0,
+            "47": 0.0,
+            "48": 0.0,
+            "49": 0.0,
+            "50": 0.0
+        }
+    },
+    "num-zeros": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 583687296.0,
+            "2": 0.0,
+            "3": 0.0,
+            "4": 49.0,
+            "5": 583687296.0,
+            "6": 583687296.0,
+            "7": 583687296.0,
+            "8": 12.0,
+            "9": 583687296.0,
+            "10": 583687296.0,
+            "11": 583687296.0,
+            "12": 583687296.0,
+            "13": 583687296.0,
+            "14": 6.0,
+            "15": 583687296.0,
+            "16": 62.0,
+            "17": 583687296.0,
+            "18": 583687296.0,
+            "19": 583687296.0,
+            "20": 23.0,
+            "21": 583687296.0,
+            "22": 583687296.0,
+            "23": 583687296.0,
+            "24": 583687296.0,
+            "25": 583687296.0,
+            "26": 583687296.0,
+            "27": 583687296.0,
+            "28": 583687296.0,
+            "29": 583687296.0,
+            "30": 583687296.0,
+            "31": 583687296.0,
+            "32": 583687296.0,
+            "33": 583687296.0,
+            "34": 583687296.0,
+            "35": 583687296.0,
+            "36": 583687296.0,
+            "37": 37.0,
+            "38": 583687296.0,
+            "39": 583687296.0,
+            "40": 583687296.0,
+            "41": 53.0,
+            "42": 583687296.0,
+            "43": 583687296.0,
+            "44": 583687296.0,
+            "45": 583687296.0,
+            "46": 583687296.0,
+            "47": 583687296.0,
+            "48": 583687296.0,
+            "49": 583687296.0,
+            "50": 583687296.0
+        }
+    },
+    "mem-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 55320928256.0,
+            "2": 55319695360.0,
+            "3": 55319674880.0,
+            "4": 55319638016.0,
+            "5": 55319638016.0,
+            "6": 55319638016.0,
+            "7": 55319633920.0,
+            "8": 55319625728.0,
+            "9": 55319621632.0,
+            "10": 55319625728.0,
+            "11": 55319625728.0,
+            "12": 55319629824.0,
+            "13": 55319547904.0,
+            "14": 55319552000.0,
+            "15": 55319552000.0,
+            "16": 55319552000.0,
+            "17": 55319552000.0,
+            "18": 55319552000.0,
+            "19": 55319556096.0,
+            "20": 55319556096.0,
+            "21": 55319556096.0,
+            "22": 55319556096.0,
+            "23": 55319556096.0,
+            "24": 55319560192.0,
+            "25": 55319560192.0,
+            "26": 55319560192.0,
+            "27": 55319560192.0,
+            "28": 55319552000.0,
+            "29": 55319552000.0,
+            "30": 55319552000.0,
+            "31": 55319552000.0,
+            "32": 55319552000.0,
+            "33": 55319552000.0,
+            "34": 55319556096.0,
+            "35": 55319556096.0,
+            "36": 55319556096.0,
+            "37": 55319560192.0,
+            "38": 55319560192.0,
+            "39": 55319560192.0,
+            "40": 55319556096.0,
+            "41": 55319552000.0,
+            "42": 55319552000.0,
+            "43": 55319552000.0,
+            "44": 55319552000.0,
+            "45": 55319552000.0,
+            "46": 55319552000.0,
+            "47": 55319556096.0,
+            "48": 55319556096.0,
+            "49": 55319556096.0,
+            "50": 55319552000.0
+        }
+    },
+    "mem-max-allocated-bytes": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 64753942528.0,
+            "2": 69804253184.0,
+            "3": 69804253184.0,
+            "4": 69804253184.0,
+            "5": 69804253184.0,
+            "6": 69804253184.0,
+            "7": 69804253184.0,
+            "8": 69804253184.0,
+            "9": 69804253184.0,
+            "10": 69804253184.0,
+            "11": 69804253184.0,
+            "12": 69804253184.0,
+            "13": 69804253184.0,
+            "14": 69804253184.0,
+            "15": 69804253184.0,
+            "16": 69804253184.0,
+            "17": 69804253184.0,
+            "18": 69804253184.0,
+            "19": 69804253184.0,
+            "20": 69804253184.0,
+            "21": 69804253184.0,
+            "22": 69804253184.0,
+            "23": 69804253184.0,
+            "24": 69804253184.0,
+            "25": 69804253184.0,
+            "26": 69804253184.0,
+            "27": 69804253184.0,
+            "28": 69804253184.0,
+            "29": 69804253184.0,
+            "30": 69804253184.0,
+            "31": 69804253184.0,
+            "32": 69804253184.0,
+            "33": 69804253184.0,
+            "34": 69804253184.0,
+            "35": 69804253184.0,
+            "36": 69804253184.0,
+            "37": 69804253184.0,
+            "38": 69804253184.0,
+            "39": 69804253184.0,
+            "40": 69804253184.0,
+            "41": 69804253184.0,
+            "42": 69804253184.0,
+            "43": 69804253184.0,
+            "44": 69804253184.0,
+            "45": 69804253184.0,
+            "46": 69804253184.0,
+            "47": 69804253184.0,
+            "48": 69804253184.0,
+            "49": 69804253184.0,
+            "50": 69804253184.0
+        }
+    },
+    "iteration-time": {
+        "start_step": 1,
+        "end_step": 50,
+        "step_interval": 1,
+        "values": {
+            "1": 74.35665,
+            "2": 5.25731,
+            "3": 5.75582,
+            "4": 4.02061,
+            "5": 3.8529,
+            "6": 3.91732,
+            "7": 4.14616,
+            "8": 3.83737,
+            "9": 3.75158,
+            "10": 3.91902,
+            "11": 3.96073,
+            "12": 3.83611,
+            "13": 3.86989,
+            "14": 3.88658,
+            "15": 4.46432,
+            "16": 3.90389,
+            "17": 3.8143,
+            "18": 3.86593,
+            "19": 3.78307,
+            "20": 3.90922,
+            "21": 3.82247,
+            "22": 3.76037,
+            "23": 4.00863,
+            "24": 3.74678,
+            "25": 3.86492,
+            "26": 3.83492,
+            "27": 3.86387,
+            "28": 3.99894,
+            "29": 3.85812,
+            "30": 4.34066,
+            "31": 3.88411,
+            "32": 3.80617,
+            "33": 3.90347,
+            "34": 3.7771,
+            "35": 3.84701,
+            "36": 3.81111,
+            "37": 3.75554,
+            "38": 3.99552,
+            "39": 3.87227,
+            "40": 3.81079,
+            "41": 3.83039,
+            "42": 3.74567,
+            "43": 3.82531,
+            "44": 3.78258,
+            "45": 3.73294,
+            "46": 4.579,
+            "47": 3.72516,
+            "48": 3.8117,
+            "49": 3.80651,
+            "50": 3.78283
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+- agent_type: examples.rl.environments.countdown.countdown_agent.CountdownAgent`
	`2`	`+ agent_args: {}`
	`3`	`+ weight: 1.0`
Original file line number	Diff line number	Diff line change
`@@ -85,6 +85,7 @@ dev = [`
`85`	`85`	`"wget",`
`86`	`86`	`"onnxscript",`
`87`	`87`	`"fastapi~=0.50", # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0`
	`88`	`+ "datasets",`
`88`	`89`	`]`
`89`	`90`
`90`	`91`	`lts = [`
`@@ -103,6 +104,7 @@ lts = [`
`103`	`104`	`"wget",`
`104`	`105`	`"onnxscript",`
`105`	`106`	`"fastapi~=0.50", # Forcing a little bit more recent version of fastapi to be compatible with pydantic 2.0`
	`107`	`+ "datasets",`
`106`	`108`	`]`
`107`	`109`
`108`	`110`	`[dependency-groups]`