Removed parallel max finding algorithm because it contained a bug. Changed logic of Fusion multiplier parameter to be better (updated documentation as well). Added some debug printouts.

FilipO28555 · FilipO28555 · commit bdb86e4666c0 · 2025-11-25T18:46:13.000+01:00
diff --git a/docs/source/usage/workflows/fusionReactions.rst b/docs/source/usage/workflows/fusionReactions.rst
@@ -181,21 +181,30 @@ Simulation Parameters
 
 .. note:: Fusion production multiplier (Fmult)
 
-   Increasing ``maxFmult`` allows more, lighter products to be created, down to
-   the ``productMinWeighting`` threshold. Decreasing ``maxFmult`` results in fewer,
-   heavier products. ``Fmult`` controls how the consumed reactant weight (the
+   The ``Fmult`` parameter controls how the consumed reactant weight (the
    minimum of the two reactant weights in a pair) is split into multiple product
-   macro-particles while keeping total weight conserved.
+   macro-particles while keeping total weight conserved. Increasing ``maxFmult`` 
+   allows more, lighter products to be created. Decreasing ``maxFmult`` results 
+   in fewer, heavier products.
 
    Algorithm (per fused pair):
 
-   - Start with ``Fmult = maxFmult``
-   - Compute ``productWeighting = minWeighting / Fmult``
-   - If ``productWeighting < productMinWeighting``, reduce ``Fmult`` to
-     ``max(1, minWeighting / productMinWeighting)`` and recompute ``productWeighting``
+   1. Start with ``Fmult = maxFmult``
+   2. Calculate the base fusion probability ``P``
+   3. Adjust ``Fmult`` to ensure valid Monte Carlo sampling:
 
-   The base ``productWeighting`` is then distributed across the two reactant
-   sites and product species using the site fractions ``W₁…W₄``.
+      - If ``P > 1.0``: Set ``Fmult = 1.0`` and ``P = 1.0`` (a warning is issued)
+      - If ``0 < P ≤ 1.0``: Calculate the maximum allowed multiplier as 
+        ``Fmult = min(maxFmult, 0.99/P)`` to ensure ``P * Fmult ≤ 0.99``
+
+   4. Update the fusion probability: ``P = P * Fmult``
+   5. Compute ``productWeighting = minWeighting / Fmult``
+   6. Distribute the ``productWeighting`` across the two reactant sites and 
+      product species using the site fractions ``W₁…W₄``
+
+   This approach ensures that the fusion probability remains below 1.0 (required
+   for proper Monte Carlo sampling) while maximizing the number of product 
+   particles created per fusion event.
 
 See also
 --------
diff --git a/include/picongpu/particles/fusion/InterCollision.hpp b/include/picongpu/particles/fusion/InterCollision.hpp
@@ -94,7 +94,7 @@ namespace picongpu::particles::fusion
             T_Reactant2ParBox reactant2Box,
             T_Product1ParBox product1Box,
             T_Product2ParBox product2Box,
-            IdGenerator& idGen,
+            IdGenerator idGen,
             T_Mapping const mapper,
             T_DeviceHeapHandle deviceHeapHandle,
             T_RngHandle rngHandle,
@@ -187,11 +187,6 @@ namespace picongpu::particles::fusion
                 [&]()
                 {
                     maxNumParticlesInCell = nppc[0];
-
-                    if constexpr(debugFusion)
-                    {
-                        printf("worker %d: maxNumParticlesInCell = %d\n", worker.workerIdx(), maxNumParticlesInCell);
-                    }
                 });
             // don't need sync
 
@@ -500,24 +495,17 @@ namespace picongpu::particles::fusion
                         bool const isWeightingR1Greater = (weightingR1 >= weightingR2);
                         float_X const minWeighting = isWeightingR1Greater ? weightingR2 : weightingR1;
 
-                        // Fusion multiplier logic
-                        float_X Fmult = maxFmult;
-                        float_X productWeighting = minWeighting / Fmult;
-                        if(productWeighting < productMinWeighting)
-                        {
-                            Fmult = std::max(1._X, minWeighting / productMinWeighting);
-                            productWeighting = minWeighting / Fmult;
-                        }
-
-                        float3_X product1Momentum{0._X};
-                        float3_X product2Momentum{0._X};
-
                         // WU: doi.org/10.1063/5.0051178
-                        // P = n_min * n_a / n_ba * Fmult * minWeighting * dt * (sigma*v_rel*gamma_cm) <- inside fuse()
+                        // P = n_min * n_a / n_ba * minWeighting * dt * (Fmult*sigma*v_rel*gamma_cm) <- inside fuse()
                         float_X const probabilityCorrectionFactor
-                            = minReactantDensity * correctionFactor[cellIdx] * Fmult * sim.pic.getDt();
+                            = minReactantDensity * correctionFactor[cellIdx] * sim.pic.getDt();
+                        
+                        // Fusion multiplier - can be changed inside fuse() if probability > 1
+                        float_X Fmult = maxFmult;
 
                         // The actual fusion physics calculation
+                        float3_X product1Momentum{0._X};
+                        float3_X product2Momentum{0._X};
                         T_SrcCollisionFunctor fuser = collisionFunctor;
                         fuser().template fuse<T_Product1ParBox, T_Product2ParBox>(
                             worker,
@@ -526,14 +514,19 @@ namespace picongpu::particles::fusion
                             weightingR1,
                             weightingR2,
                             probabilityCorrectionFactor,
+                            Fmult,
                             product1Momentum,
                             product2Momentum,
                             rngHandle);
 
+
                         // If a reaction occurred, create the product particles
                         if(product1Momentum != float3_X{0._X} || product2Momentum != float3_X{0._X})
                         {
-                            weightingArray[i] += productWeighting; // no atomic needed because i is unique per thread
+                            // because we could change Fmult inside fuser (because the probability might have been >1)
+                            float_X productWeighting = minWeighting / Fmult;
+                            
+                            weightingArray[i] = productWeighting; // no atomic needed because i is unique per thread
 
                             uint32_t freeIndex = alpaka::atomicAdd(
                                 worker.getAcc(),
@@ -618,6 +611,22 @@ namespace picongpu::particles::fusion
                         i < chunkStart + minNumParticles && i < maxNumParticles;
                         i += step)
                     {
+                        // no fusion happened for this pair
+                        if(weightingArray[i] == 0._X)
+                            continue;
+                            
+                        if constexpr(debugFusion)
+                            if(weightingArray[i] < 0._X){
+                                printf("Error: negative weighting in fusion reaction! weightingArray[%d] = %f\n", i, weightingArray[i]);
+                                // print fmult and weightingR1 and weightingR2
+                                float_X weightingR1 = accessor1[i % size1][weighting_];
+                                float_X weightingR2 = accessor2[i % size2][weighting_];
+                                printf("weightingR1 = %f, weightingR2 = %f\n", weightingR1, weightingR2);
+                                printf("Fmult = %f\n", maxFmult);
+                                // print the number of particles in longer list
+                                printf("size1 = %d, size2 = %d, weightingArraySize = %d\n", size1, size2, weightingArraySize);
+                                continue;
+                            }
                         float_X const oldWeighting1 = accessor1[i % size1][weighting_];
                         float_X const oldWeighting2 = accessor2[i % size2][weighting_];
 
@@ -630,9 +639,37 @@ namespace picongpu::particles::fusion
                         accessor2[i % size2][momentum_] *= accessor2[i % size2][weighting_] / oldWeighting2;
 
                         // if the weighting is too low or negative we remove the particle with fillGaps()
-                        accessor1[i % size1][multiMask_] = (accessor1[i % size1][weighting_] > 1e-6);
-                        accessor2[i % size2][multiMask_] = (accessor2[i % size2][weighting_] > 1e-6);
+                        accessor1[i % size1][multiMask_] = (accessor1[i % size1][weighting_] > 1e-6_X);
+                        accessor2[i % size2][multiMask_] = (accessor2[i % size2][weighting_] > 1e-6_X);
+
+                        
+                        // print i and weighting array
+                        if constexpr(debugFusion)
+                            if((((i==0 && alwaysFuseQ) || !alwaysFuseQ) || accessor1[i % size1][multiMask_]==0 || accessor2[i % size2][multiMask_]==0)){
+
+                            printf("worker %d: cell %d, i %d, weightingArray %f, new weighting1 %f, new weighting2 %f, old weighting1 %f, old weighting2 %f, difference 1 %f, difference 2 %f\n",
+                                worker.workerIdx(),
+                                cellIdx,
+                                i,
+                                weightingArray[i],
+                                accessor1[i % size1][weighting_],
+                                accessor2[i % size2][weighting_],
+                                oldWeighting1,
+                                oldWeighting2,
+                                oldWeighting1 - accessor1[i % size1][weighting_],
+                                oldWeighting2 - accessor2[i % size2][weighting_]);
+                                
+                                // print the multimask as well
+                                printf("worker %d: cell %d, i %d, multiMask1 %d, multiMask2 %d\n",
+                                    worker.workerIdx(),
+                                    cellIdx,
+                                    i,
+                                    accessor1[i % size1][multiMask_],
+                                    accessor2[i % size2][multiMask_]);
+                                
+                        }
                     }
+                    worker.sync();
                 }
                 worker.sync();
 
diff --git a/include/picongpu/particles/fusion/detail/Creation.hpp b/include/picongpu/particles/fusion/detail/Creation.hpp
@@ -399,8 +399,8 @@ namespace picongpu::particles::fusion
 
                 // Assign multiMask to indicate these are product particles
                 p1r1[multiMask_] = (W1 > tolerance) ? 1u : 0u;
-                p1r2[multiMask_] = (W3 > tolerance) ? 1u : 0u;
                 p2r1[multiMask_] = (W2 > tolerance) ? 1u : 0u;
+                p1r2[multiMask_] = (W3 > tolerance) ? 1u : 0u;
                 p2r2[multiMask_] = (W4 > tolerance) ? 1u : 0u;
 
                 // Assign momentum (weighted with weights) and weights to product particles
@@ -422,7 +422,7 @@ namespace picongpu::particles::fusion
                     using UniformFloat = pmacc::random::distributions::Uniform<
                         pmacc::random::distributions::uniform::ExcludeOne<precision::float_COLL>::Reduced>;
                     auto rng = rngHandle.template applyDistribution<UniformFloat>();
-                    if(worker.workerIdx() == 0 && rng(worker) < 1e-6)
+                    if(worker.workerIdx() == 0 && rng(worker) < 1e-8)
                     {
                         printf("Charges: %f, %f, %f, %f\n", q1, q2, q3, q4);
                         printf("Masses (A): %f, %f, %f, %f\n", m1, m2, m3, m4);
diff --git a/include/picongpu/particles/fusion/detail/arrayHelpers.hpp b/include/picongpu/particles/fusion/detail/arrayHelpers.hpp
@@ -52,24 +52,20 @@ namespace picongpu::particles::fusion::detail
 
     template<bool debug = false, typename T_worker, typename T_arr>
     DINLINE void maxArrayDestroy(T_worker const& worker, T_arr& arr, int const& size)
-    {
-        uint32_t pow = 1;
-        while(pow < size)
+    {   
+        if(worker.workerIdx() == 0)
         {
-            for(uint32_t i = worker.workerIdx(); pow * (2 * i + 1) < size; i += 2 * pow * worker.numWorkers())
-            {
-                arr[2 * i * pow] = std::max(arr[2 * i * pow], arr[pow * (2 * i + 1)]);
-            }
-            pow <<= 1; //*2
-            worker.sync();
-            if constexpr(debug)
+            auto maxVal = arr[0];
+            for(int i = 1; i < size; ++i)
             {
-                if(worker.workerIdx() == 0)
-                    printArray(arr);
-                worker.sync();
+                if (arr[i] > maxVal)
+                {
+                    maxVal = arr[i];
+                }
             }
+            arr[0] = maxVal;
         }
-        // max is now at arr[0];
+        worker.sync();
     }
 
     template<std::size_t... Is, std::size_t N>
diff --git a/include/picongpu/particles/fusion/relativistic/FusionAlgorithm.hpp b/include/picongpu/particles/fusion/relativistic/FusionAlgorithm.hpp
@@ -199,23 +199,6 @@ namespace picongpu::particles::fusion::relativistic
                 float3_COLL const u1_cm = p1_cm / mP1;
                 float3_COLL const u1_lab = u1_cm + (math::dot(V_cm, u1_cm) * factorA + gamma_cm * gamma_p1_cm) * V_cm;
                 labMomentum1 = u1_lab * mP1;
-                if constexpr(debugFusion)
-                {
-                    printf(
-                        "  Product 1: mass: %f, momentum: %f, %f, %f, energy: %f\n",
-                        mP0,
-                        labMomentum0[0],
-                        labMomentum0[1],
-                        labMomentum0[2],
-                        energy<float_X>(labMomentum0, mP0));
-                    printf(
-                        "  Product 2: mass: %f, momentum: %f, %f, %f, energy: %f\n",
-                        mP1,
-                        labMomentum1[0],
-                        labMomentum1[1],
-                        labMomentum1[2],
-                        energy<float_X>(labMomentum1, mP1));
-                }
             }
 
             /**
@@ -272,6 +255,7 @@ namespace picongpu::particles::fusion::relativistic
              * @param weightingR1 The weighting factor of the first reactant.
              * @param weightingR2 The weighting factor of the second reactant.
              * @param probabilityFactor A factor to adjust the fusion probability.
+             * @param Fmult A reference to a fusion multiplier.
              * @param mom0 The output parameter for the momentum of the first product.
              * @param mom1 The output parameter for the momentum of the second product.
              * @param rngHandle The random number generator handle.
@@ -290,6 +274,7 @@ namespace picongpu::particles::fusion::relativistic
                 float_X weightingR1,
                 float_X weightingR2,
                 float_X probabilityFactor,
+                float_X& Fmult,
                 float3_X& mom0,
                 float3_X& mom1,
                 T_RngHandle& rngHandle)
@@ -324,6 +309,21 @@ namespace picongpu::particles::fusion::relativistic
 
                 float_X sigma_picArea = crossSection(fusionVar.E_r * convToKeV) * millibarn_to_picArea;
                 float_X P = probabilityFactor * sigma_picArea * fusionVar.V_rel_mag * fusionVar.gamma_cm;
+                
+                if(P > 1.0_COLL){
+                    // print warning
+                    printf(
+                        "Warning: Fusion probability exceeds 1.0 (P: %e) the process will be underestimated.\n",
+                        P);
+                    P= 1.0_COLL;
+                    Fmult = 1.0_COLL;
+                }
+                else if(P>0._COLL){
+                // limit P to not more than 0.99_COLL
+                    float_X const maxFmult = 0.99_COLL/P;
+                    Fmult = std::min(Fmult, maxFmult);
+                    P *= Fmult;
+                }
 
 
                 // print with probability 1e-8
@@ -412,6 +412,15 @@ namespace picongpu::particles::fusion::relativistic
                     fusionVar.P<T_Product0Box, T_Product1Box>(dir);
                     mom0 = fusionVar.P0();
                     mom1 = fusionVar.P1();
+                    
+                    if constexpr(debugFusion)
+                    {
+                        if((mom0 == float3_X{0.0_X, 0.0_X, 0.0_X})
+                        || (mom1 == float3_X{0.0_X, 0.0_X, 0.0_X})){
+                            printf("Error: Fusion produced zero momentum products.\n");
+                        }
+                    }
+                        
                 }
                 else
                 {