debugging

danielkorzekwa · danielkorzekwa · commit f5b85bf2efac · 2025-11-26T11:05:27.000+01:00
Signed-off-by: Daniel Korzekwa &lt;dkorzekwa@nvidia.com&gt;
diff --git a/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py b/tests/gpu/torch/prune/plugins/test_mcore_gpt_minitron_pruning.py
@@ -93,8 +93,25 @@ def _get_model(initialize_megatron=True):
 
     model = _get_model()
 
+    # Set seeds for deterministic dummy input generation AFTER model initialization
+    # (get_mcore_gpt_model calls initialize_for_megatron which sets seed=1234)
+    torch.manual_seed(1234)
+    torch.cuda.manual_seed_all(1234)
+    # Enable deterministic behavior for cuDNN
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
     sd = model.state_dict()
 
+    # Debug: Check weight initialization
+    if rank == 0:
+        print("\n=== Weight Initialization Check ===")
+        qkv_key = "decoder.layers.0.self_attention.linear_qkv.weight"
+        if qkv_key in sd:
+            qkv_weight = sd[qkv_key]
+            print(f"{qkv_key}: mean={qkv_weight.mean().item():.16f}")
+        print("=" * 50 + "\n")
+
     def forward_loop(m):
         for _ in range(5):
             run_mcore_inference_with_dummy_input(m, batch_size, hidden_size)
@@ -145,17 +162,30 @@ def forward_loop(m):
         # Test case 1: MHA - pruned ffn/4 (num_attention_heads=8, num_query_groups=8, ffn_div=4)
         if pruned_ffn_div == 4:
             # Layer scores
+            if rank == 0:
+                print("\n=== TEST CASE 1 ===")
+                print(f"layer_scores[1] = {pruning_scores['layer_scores'][1]:.16f}")
+                print(f"layer_scores[2] = {pruning_scores['layer_scores'][2]:.16f}")
             assert pruning_scores["layer_scores"][1] == pytest.approx(2.0868452191352844, abs=1e-5)
             assert pruning_scores["layer_scores"][2] == pytest.approx(1.7638601660728455, abs=1e-5)
 
             # Validate decoder.layers.0.mlp activations
             mlp_0_acts = rank_0_activations["decoder.layers.0.mlp"]
+            if rank == 0:
+                print(f"mlp_0_acts.min() = {mlp_0_acts.min().item():.16f}")
+                print(f"mlp_0_acts.max() = {mlp_0_acts.max().item():.16f}")
+                print(f"mlp_0_acts.mean() = {mlp_0_acts.mean().item():.16f}")
             assert mlp_0_acts.min().item() == pytest.approx(0.0015609927941114, abs=1e-5)
             assert mlp_0_acts.max().item() == pytest.approx(0.3844809532165527, abs=1e-5)
             assert mlp_0_acts.mean().item() == pytest.approx(0.0629318505525589, abs=1e-5)
 
             # Validate decoder.layers.1.mlp activations
             mlp_1_acts = rank_0_activations["decoder.layers.1.mlp"]
+            if rank == 0:
+                print(f"mlp_1_acts.min() = {mlp_1_acts.min().item():.16f}")
+                print(f"mlp_1_acts.max() = {mlp_1_acts.max().item():.16f}")
+                print(f"mlp_1_acts.mean() = {mlp_1_acts.mean().item():.16f}")
+                print("=" * 50 + "\n")
             assert mlp_1_acts.min().item() == pytest.approx(0.0001484956446802, abs=1e-5)
             assert mlp_1_acts.max().item() == pytest.approx(0.7835369110107422, abs=1e-5)
             assert mlp_1_acts.mean().item() == pytest.approx(0.0926810950040817, abs=1e-5)
@@ -244,14 +274,14 @@ def forward_loop(m):
     [
         # MHA - pruned ffn/4
         (8, 8, "squared_relu", "LayerNorm", 4, 1, 1, 1, 1, False, "rope", False, False),
-        # GQA - pruned attention/2
-        (8, 4, "squared_relu", "RMSNorm", 1, 2, 2, 1, 1, False, "rope", False, False),
-        # GQA - pruned hidden_size/4
-        (8, 4, "swiglu", "RMSNorm", 1, 1, 1, 4, 1, False, "rope", True, False),
-        # MHA - pruned num_layers/2
-        (8, 8, "swiglu", "LayerNorm", 1, 1, 1, 1, 2, False, "rope", False, False),
-        # GQA - pruned all/2, uneven pp
-        (8, 4, "swiglu", "RMSNorm", 2, 2, 2, 2, 2, True, "yarn", False, True),
+        # # GQA - pruned attention/2
+        # (8, 4, "squared_relu", "RMSNorm", 1, 2, 2, 1, 1, False, "rope", False, False),
+        # # GQA - pruned hidden_size/4
+        # (8, 4, "swiglu", "RMSNorm", 1, 1, 1, 4, 1, False, "rope", True, False),
+        # # MHA - pruned num_layers/2
+        # (8, 8, "swiglu", "LayerNorm", 1, 1, 1, 1, 2, False, "rope", False, False),
+        # # GQA - pruned all/2, uneven pp
+        # (8, 4, "swiglu", "RMSNorm", 2, 2, 2, 2, 2, True, "yarn", False, True),
     ],
 )
 def test_mcore_gpt_pruning(