diff --git a/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl new file mode 100644 index 00000000000..de491a3761b --- /dev/null +++ b/benchmark/CUDA/elixir_euler_taylor_green_vortex.jl @@ -0,0 +1,79 @@ +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the compressible Euler equations + +equations = CompressibleEulerEquations3D(1.4) + +function initial_condition_taylor_green_vortex(x, t, + equations::CompressibleEulerEquations3D) + A = 1.0 # magnitude of speed + Ms = 0.1 # maximum Mach number + + rho = 1.0 + v1 = A * sin(x[1]) * cos(x[2]) * cos(x[3]) + v2 = -A * cos(x[1]) * sin(x[2]) * cos(x[3]) + v3 = 0.0 + p = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms + p = p + + 1.0 / 16.0 * A^2 * rho * + (cos(2 * x[1]) * cos(2 * x[3]) + + 2 * cos(2 * x[2]) + 2 * cos(2 * x[1]) + cos(2 * x[2]) * cos(2 * x[3])) + + return prim2cons(SVector(rho, v1, v2, v3, p), equations) +end + +initial_condition = initial_condition_taylor_green_vortex + +# TODO Undefined external symbol "log" +#volume_flux = flux_ranocha +volume_flux = flux_lax_friedrichs +solver = DGSEM(polydeg = 5, surface_flux = volume_flux) +# TODO flux diff +#volume_integral=VolumeIntegralFluxDifferencing(volume_flux)) + +coordinates_min = (-1.0, -1.0, -1.0) .* pi +coordinates_max = (1.0, 1.0, 1.0) .* pi + +initial_refinement_level = 1 +trees_per_dimension = (4, 4, 4) + +mesh = P4estMesh(trees_per_dimension, polydeg = 1, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + periodicity = true, initial_refinement_level = initial_refinement_level) + +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver) + +############################################################################### +# ODE solvers, callbacks etc. + +tspan = (0.0, 100.0) +ode = semidiscretize(semi, tspan; storage_type = nothing, real_type = nothing) + +summary_callback = SummaryCallback() + +stepsize_callback = StepsizeCallback(cfl = 0.1) + +callbacks = CallbackSet(summary_callback, + stepsize_callback) + +############################################################################### +# run the simulation + +maxiters = 200 +run_profiler = false + +# disable warnings when maxiters is reached +integrator = init(ode, CarpenterKennedy2N54(williamson_condition = false), + dt = 1.0, + save_everystep = false, callback = callbacks, + maxiters = maxiters, verbose = false) +if run_profiler + prof_result = CUDA.@profile solve!(integrator) +else + solve!(integrator) + prof_result = nothing +end + +finalize(mesh) diff --git a/benchmark/CUDA/run.jl b/benchmark/CUDA/run.jl new file mode 100644 index 00000000000..70c840722af --- /dev/null +++ b/benchmark/CUDA/run.jl @@ -0,0 +1,89 @@ +using Trixi +using CUDA +using TimerOutputs +using JSON + +function main(elixir_path) + + # setup + maxiters = 50 + initial_refinement_level = 3 + storage_type = CuArray + real_type = Float64 + + println("Warming up...") + + # start simulation with tiny final time to trigger compilation + duration_compile = @elapsed begin + trixi_include(elixir_path, + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = real_type) + trixi_include(elixir_path, + tspan = (0.0, 1e-14), + storage_type = storage_type, + real_type = Float32) + end + + println("Finished warm-up in $duration_compile seconds\n") + println("Starting simulation...") + + # start the real simulation + duration_elixir = @elapsed trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = real_type) + + # store metrics (on every rank!) + metrics = Dict{String, Float64}("elapsed time" => duration_elixir) + + # read TimerOutputs timings + timer = Trixi.timer() + metrics["total time"] = 1.0e-9 * TimerOutputs.tottime(timer) + metrics["rhs! time"] = 1.0e-9 * TimerOutputs.time(timer["rhs!"]) + + # compute performance index + nrhscalls = Trixi.ncalls(semi.performance_counter) + walltime = 1.0e-9 * take!(semi.performance_counter) + metrics["PID"] = walltime * Trixi.mpi_nranks() / (Trixi.ndofsglobal(semi) * nrhscalls) + + # write json file + open("metrics.out", "w") do f + indent = 2 + JSON.print(f, metrics, indent) + end + + # run profiler + maxiters = 5 + initial_refinement_level = 1 + + println("Running profiler (Float64)...") + trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float64, + run_profiler = true) + + open("profile_float64.txt", "w") do io + show(io, prof_result) + end + + println("Running profiler (Float32)...") + trixi_include(elixir_path, + maxiters = maxiters, + initial_refinement_level = initial_refinement_level, + storage_type = storage_type, + real_type = Float32, + run_profiler = true) + + open("profile_float32.txt", "w") do io + show(io, prof_result) + end +end + +# hardcoded elixir +elixir_path = joinpath(@__DIR__(), "elixir_euler_taylor_green_vortex.jl") + +main(elixir_path) diff --git a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl index 6f9e8e56986..ac3934eca7a 100644 --- a/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl +++ b/examples/p4est_2d_dgsem/elixir_advection_basic_gpu.jl @@ -48,9 +48,8 @@ save_solution = SaveSolutionCallback(interval = 100, stepsize_callback = StepsizeCallback(cfl = 1.6) # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver -callbacks = CallbackSet(summary_callback, stepsize_callback) -# TODO: GPU. The `analysis_callback` needs to be updated for GPU support -# analysis_callback, save_solution, stepsize_callback) +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) ############################################################################### # run the simulation diff --git a/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl new file mode 100644 index 00000000000..801ae4cb6bc --- /dev/null +++ b/examples/p4est_3d_dgsem/elixir_advection_basic_gpu.jl @@ -0,0 +1,60 @@ +# The same setup as tree_3d_dgsem/elixir_advection_basic.jl +# to verify the P4estMesh implementation against TreeMesh + +using OrdinaryDiffEqLowStorageRK +using Trixi + +############################################################################### +# semidiscretization of the linear advection equation + +advection_velocity = (0.2, -0.7, 0.5) +equations = LinearScalarAdvectionEquation3D(advection_velocity) + +# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux +solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs) + +coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z)) +coordinates_max = (1.0, 1.0, 1.0) # maximum coordinates (max(x), max(y), max(z)) + +# Create P4estMesh with 8 x 8 x 8 elements (note `refinement_level=1`) +trees_per_dimension = (4, 4, 4) +mesh = P4estMesh(trees_per_dimension, polydeg = 3, + coordinates_min = coordinates_min, coordinates_max = coordinates_max, + initial_refinement_level = 1) + +# A semidiscretization collects data structures and functions for the spatial discretization +semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, + solver) + +############################################################################### +# ODE solvers, callbacks etc. + +# Create ODE problem with time span from 0.0 to 1.0 +tspan = (0.0, 1.0) +ode = semidiscretize(semi, tspan; real_type = nothing, storage_type = nothing) + +# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup +# and resets the timers +summary_callback = SummaryCallback() + +# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results +analysis_callback = AnalysisCallback(semi, interval = 100) + +# The SaveSolutionCallback allows to save the solution to a file in regular intervals +save_solution = SaveSolutionCallback(interval = 100, + solution_variables = cons2prim) + +# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step +stepsize_callback = StepsizeCallback(cfl = 1.2) + +# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver +callbacks = CallbackSet(summary_callback, analysis_callback, + save_solution, stepsize_callback) + +############################################################################### +# run the simulation + +# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks +sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false); + dt = 0.05, # solve needs some value here but it will be overwritten by the stepsize_callback + ode_default_options()..., callback = callbacks); diff --git a/src/Trixi.jl b/src/Trixi.jl index 21a73867718..095da4f6a4c 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -59,7 +59,8 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using FillArrays: Ones, Zeros using ForwardDiff: ForwardDiff using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace -using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend +using KernelAbstractions: KernelAbstractions, @index, @kernel, get_backend, Backend, + allocate using LinearMaps: LinearMap if _PREFERENCE_LOOPVECTORIZATION using LoopVectorization: LoopVectorization, @turbo, indices diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl index fa18c5af63a..0c4b1bc0b22 100644 --- a/src/callbacks_step/analysis_dg2d.jl +++ b/src/callbacks_step/analysis_dg2d.jl @@ -138,7 +138,7 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, @@ -146,9 +146,19 @@ function calc_error_norms(func, u, t, analyzer, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -210,13 +220,23 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, equations, dg, args...)) @@ -226,7 +246,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, element])) integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] @@ -271,10 +291,19 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}}, equations, dg::DG, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + + # Calculate # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, element, equations, dg, du diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl index 072ffc16096..d9bd08a868d 100644 --- a/src/callbacks_step/analysis_dg3d.jl +++ b/src/callbacks_step/analysis_dg3d.jl @@ -161,14 +161,24 @@ function calc_error_norms(func, u, t, analyzer, return l2_error, linf_error end -function calc_error_norms(func, u, t, analyzer, +function calc_error_norms(func, _u, t, analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, initial_condition, dg::DGSEM, cache, cache_analysis) @unpack vandermonde, weights = analyzer - @unpack node_coordinates, inverse_jacobian = cache.elements @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack node_coordinates, inverse_jacobian = cache.elements + u = _u + else + node_coordinates = Array(cache.elements.node_coordinates) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end + # Set up data structures l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations)) linf_error = copy(l2_error) @@ -234,12 +244,22 @@ function integrate_via_indices(func::Func, u, return integral end -function integrate_via_indices(func::Func, u, +function integrate_via_indices(func::Func, _u, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DGSEM, cache, args...; normalize = true) where {Func} - @unpack weights = dg.basis + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(_u) + if backend isa Nothing # TODO GPU KA CPU backend + @unpack weights = dg.basis + @unpack inverse_jacobian = cache.elements + u = _u + else + weights = Array(dg.basis.weights) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + u = Array(_u) + end # Initialize integral with zeros of the right shape integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...)) @@ -249,7 +269,7 @@ function integrate_via_indices(func::Func, u, @batch reduction=((+, integral), (+, total_volume)) for element in eachelement(dg, cache) for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element])) + volume_jacobian = abs(inv(inverse_jacobian[i, j, k, element])) integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...) total_volume += volume_jacobian * weights[i] * weights[j] * weights[k] @@ -295,10 +315,18 @@ function integrate(func::Func, u, end end -function analyze(::typeof(entropy_timederivative), du, u, t, +function analyze(::typeof(entropy_timederivative), _du, u, t, mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) + # TODO GPU AnalysiCallback currently lives on CPU + backend = trixi_backend(u) + if backend isa Nothing # TODO GPU KA CPU backend + du = _du + else + du = Array(_du) + end + # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl index ac40bc42de0..12f63792281 100644 --- a/src/callbacks_step/save_solution.jl +++ b/src/callbacks_step/save_solution.jl @@ -285,6 +285,11 @@ end element_variables = Dict{Symbol, Any}(), node_variables = Dict{Symbol, Any}(); system = "") + # TODO GPU currently on CPU + backend = trixi_backend(u_ode) + if backend !== nothing + u_ode = Array(u_ode) + end mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array_native(u_ode, mesh, equations, solver, cache) save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl index da984e0b4b8..464777c3684 100644 --- a/src/callbacks_step/stepsize.jl +++ b/src/callbacks_step/stepsize.jl @@ -142,8 +142,9 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - return cfl_advective(t) * max_dt(u, t, mesh, + return cfl_advective(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -153,8 +154,9 @@ function calculate_dt(u_ode, t, cfl_advective::Real, cfl_diffusive::Real, semi::AbstractSemidiscretization) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - return cfl_advective * max_dt(u, t, mesh, + return cfl_advective * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) end @@ -166,14 +168,15 @@ function calculate_dt(u_ode, t, cfl_advective, cfl_diffusive, equations_parabolic = semi.equations_parabolic u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - dt_advective = cfl_advective(t) * max_dt(u, t, mesh, + dt_advective = cfl_advective(t) * max_dt(backend, u, t, mesh, have_constant_speed(equations), equations, solver, cache) cfl_diff = cfl_diffusive(t) if cfl_diff > 0 # Check if diffusive CFL should be considered - dt_diffusive = cfl_diff * max_dt(u, t, mesh, + dt_diffusive = cfl_diff * max_dt(backend, u, t, mesh, have_constant_diffusivity(equations_parabolic), equations, equations_parabolic, solver, cache) diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl index 61a9eb2e7f5..613bf3198b2 100644 --- a/src/callbacks_step/stepsize_dg1d.jl +++ b/src/callbacks_step/stepsize_dg1d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -29,7 +29,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_diffusivity::False, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -52,7 +52,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -72,7 +72,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{1}, constant_diffusivity::True, equations, equations_parabolic::AbstractEquationsParabolic, dg::DG, cache) @@ -91,7 +91,7 @@ function max_dt(u, t, mesh::TreeMesh{1}, return 4 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, @@ -119,7 +119,7 @@ function max_dt(u, t, mesh::StructuredMesh{1}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::StructuredMesh{1}, +function max_dt(backend::Nothing, u, t, mesh::StructuredMesh{1}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl index d0bd3d49f32..d0612cc1d60 100644 --- a/src/callbacks_step/stepsize_dg2d.jl +++ b/src/callbacks_step/stepsize_dg2d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -29,7 +29,7 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -48,174 +48,248 @@ function max_dt(u, t, mesh::TreeMesh{2}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelTreeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelTreeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelTreeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::TreeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), TreeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), TreeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end - -function max_dt(u, t, +function max_dt(backend::Nothing, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors, inverse_jacobian = cache.elements - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = zero(max_scaled_speed) - for j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, element) - lambda1, lambda2 = max_abs_speeds(u_node, equations) + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), equations, dg, + contravariant_vectors, + inverse_jacobian, element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end + return 2 / (nnodes(dg) * max_scaled_speed) +end - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - inv_jacobian = abs(inverse_jacobian[i, j, element]) +# works for both constant and non-constant speed +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed, equations, + dg::DG, contravariant_vectors, + inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_per_element(u, mT, constant_speed, + equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end - max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) - max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) - end +function max_scaled_speed_per_element(u, + mT::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::False, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_lambda1 = max_lambda2 = zero(max_scaled_speed) + for j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, element) + lambda1, lambda2 = max_abs_speeds(u_node, equations) + + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + + max_lambda1 = Base.max(max_lambda1, lambda1_transformed * inv_jacobian) + max_lambda2 = Base.max(max_lambda2, lambda2_transformed * inv_jacobian) + end + return max_lambda1 + max_lambda2 +end + +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, + constant_speed::True, equations, dg::DG, cache) + max_scaled_speed = nextfloat(zero(t)) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) + max_lambda = max_scaled_speed_per_element(u, typeof(mesh), constant_speed, + equations, dg, contravariant_vectors, + inverse_jacobian, element) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, max_lambda1 + max_lambda2) + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) end return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, +function max_dt(backend::Backend, u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, StructuredMeshView{2}}, constant_speed::True, equations, dg::DG, cache) @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) + + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), constant_speed, equations, dg, + contravariant_vectors, inverse_jacobian, ndrange = num_elements) + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) + return 2 / (nnodes(dg) * max_scaled_speed) +end - # to avoid a division by zero if the speed vanishes everywhere, - # e.g. for steady-state linear advection - max_scaled_speed = nextfloat(zero(t)) - +function max_scaled_speed_per_element(u, + ::Type{<:Union{StructuredMesh{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}, + StructuredMeshView{2}}}, + constant_speed::True, equations, dg::DG, + contravariant_vectors, inverse_jacobian, + element) + max_lambda1_loc = max_lambda2_loc = nextfloat(zero(eltype(u))) max_lambda1, max_lambda2 = max_abs_speeds(equations) - - @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - # Local speeds transformed to the reference element - Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, - i, j, element) - lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) - Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, - i, j, element) - lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) - - inv_jacobian = abs(inverse_jacobian[i, j, element]) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - inv_jacobian * - (lambda1_transformed + lambda2_transformed)) - end + for j in eachnode(dg), i in eachnode(dg) + # Local speeds transformed to the reference element + Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, + i, j, element) + lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2) + Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, + i, j, element) + lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2) + + inv_jacobian = abs(inverse_jacobian[i, j, element]) + + max_lambda1_loc = max(max_lambda1_loc, inv_jacobian * lambda1_transformed) + max_lambda2_loc = max(max_lambda2_loc, inv_jacobian * lambda2_transformed) end - return 2 / (nnodes(dg) * max_scaled_speed) + return max_lambda1_loc + max_lambda2_loc end -function max_dt(u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelP4estMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{2}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{2}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{2}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{2}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{2}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl index dc1e7af6be8..3f50d618fd1 100644 --- a/src/callbacks_step/stepsize_dg3d.jl +++ b/src/callbacks_step/stepsize_dg3d.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::False, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -31,7 +31,7 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::TreeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::TreeMesh{3}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection @@ -51,53 +51,96 @@ function max_dt(u, t, mesh::TreeMesh{3}, return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +function max_dt(backend::Nothing, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::False, equations, dg::DG, cache) + # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + @unpack contravariant_vectors, inverse_jacobian = cache.elements @batch reduction=(max, max_scaled_speed) for element in eachelement(dg, cache) - max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - u_node = get_node_vars(u, equations, dg, i, j, k, element) - lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + max_lambda = max_scaled_speed_element(u, typeof(mesh), equations, dg, + contravariant_vectors, inverse_jacobian, + element) + # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate + # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 + max_scaled_speed = Base.max(max_scaled_speed, max_lambda) + end - Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, - i, j, k, element) - lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) - Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, - i, j, k, element) - lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) - Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, - i, j, k, element) - lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + return 2 / (nnodes(dg) * max_scaled_speed) +end - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) +function max_dt(backend::Backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + constant_speed::False, equations, dg::DG, cache) + @unpack contravariant_vectors, inverse_jacobian = cache.elements + num_elements = nelements(dg, cache) + max_scaled_speeds = allocate(backend, eltype(t), num_elements) - max_lambda1 = Base.max(max_lambda1, inv_jacobian * lambda1_transformed) - max_lambda2 = Base.max(max_lambda2, inv_jacobian * lambda2_transformed) - max_lambda3 = Base.max(max_lambda3, inv_jacobian * lambda3_transformed) - end + kernel! = max_scaled_speed_KAkernel!(backend) + kernel!(max_scaled_speeds, u, typeof(mesh), equations, dg, contravariant_vectors, + inverse_jacobian; + ndrange = num_elements) - # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate - # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 - max_scaled_speed = Base.max(max_scaled_speed, - max_lambda1 + max_lambda2 + max_lambda3) - end + # TODO GPU dt on CPU? (time integration happens on CPU) + max_scaled_speed = max(nextfloat(zero(t)), maximum(max_scaled_speeds)) return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, +@kernel function max_scaled_speed_KAkernel!(max_scaled_speeds, u, meshT, equations, + dg, contravariant_vectors, inverse_jacobian) + element = @index(Global) + max_scaled_speeds[element] = max_scaled_speed_element(u, meshT, equations, dg, + contravariant_vectors, + inverse_jacobian, + element) +end + +function max_scaled_speed_element(u, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, equations, dg, + contravariant_vectors, inverse_jacobian, element) + max_lambda1 = max_lambda2 = max_lambda3 = zero(eltype(u)) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + u_node = get_node_vars(u, equations, dg, i, j, k, element) + lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations) + + Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, + i, j, k, element) + lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3) + Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, + i, j, k, element) + lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3) + Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, + i, j, k, element) + lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3) + + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) + + max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed) + max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed) + max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed) + end + return max_lambda1 + max_lambda2 + max_lambda3 +end + +function max_dt(backend, u, t, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, constant_speed::True, equations, dg::DG, cache) # to avoid a division by zero if the speed vanishes everywhere, # e.g. for steady-state linear advection max_scaled_speed = nextfloat(zero(t)) - @unpack contravariant_vectors = cache.elements + @unpack contravariant_vectors, inverse_jacobian = cache.elements + if backend !== nothing + # TODO: Port to GPU + contravariant_vectors = Array(cache.elements.contravariant_vectors) + inverse_jacobian = Array(cache.elements.inverse_jacobian) + end max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations) @@ -116,7 +159,7 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 + Ja33 * max_lambda3) - inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element]) + inv_jacobian = abs(inverse_jacobian[i, j, k, element]) # Use `Base.max` to prevent silent failures, as `max` from `@fastmath` doesn't propagate # `NaN`s properly. See https://github.com/trixi-framework/Trixi.jl/pull/2445#discussion_r2336812323 @@ -130,68 +173,68 @@ function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3} return 2 / (nnodes(dg) * max_scaled_speed) end -function max_dt(u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelP4estMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelP4estMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::P4estMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), P4estMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), P4estMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::False, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] return dt end -function max_dt(u, t, mesh::ParallelT8codeMesh{3}, +function max_dt(backend::Nothing, u, t, mesh::ParallelT8codeMesh{3}, constant_speed::True, equations, dg::DG, cache) # call the method accepting a general `mesh::T8codeMesh{3}` # TODO: MPI, we should improve this; maybe we should dispatch on `u` # and create some MPI array type, overloading broadcasting and mapreduce etc. # Then, this specific array type should also work well with DiffEq etc. dt = invoke(max_dt, - Tuple{typeof(u), typeof(t), T8codeMesh{3}, + Tuple{typeof(backend), typeof(u), typeof(t), T8codeMesh{3}, typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)}, - u, t, mesh, constant_speed, equations, dg, cache) + backend, u, t, mesh, constant_speed, equations, dg, cache) # Base.min instead of min needed, see comment in src/auxiliary/math.jl dt = MPI.Allreduce!(Ref(dt), Base.min, mpi_comm())[] diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl index 0b77f04e7c1..a05e8988605 100644 --- a/src/semidiscretization/semidiscretization_euler_gravity.jl +++ b/src/semidiscretization/semidiscretization_euler_gravity.jl @@ -307,6 +307,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) u_euler = wrap_array(u_ode, semi_euler) u_gravity = wrap_array(cache.u_ode, semi_gravity) du_gravity = wrap_array(cache.du_ode, semi_gravity) + backend = trixi_backend(u_ode) # set up main loop finalstep = false @@ -318,7 +319,7 @@ function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode) @unpack equations = semi_gravity while !finalstep dtau = @trixi_timeit timer() "calculate dtau" begin - cfl * max_dt(u_gravity, tau, semi_gravity.mesh, + cfl * max_dt(backend, u_gravity, tau, semi_gravity.mesh, have_constant_speed(equations), equations, semi_gravity.solver, semi_gravity.cache) end diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl index a13ccb4cadd..d67db7f8c02 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic.jl @@ -399,10 +399,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter, runtime) diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl index f8d01b0da9a..ef760bdf891 100644 --- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl +++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl @@ -347,10 +347,11 @@ function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t) u = wrap_array(u_ode, mesh, equations, solver, cache) du = wrap_array(du_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) # TODO: Taal decide, do we need to pass the mesh? time_start = time_ns() - @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, + @trixi_timeit timer() "rhs!" rhs!(backend, du, u, t, mesh, equations, boundary_conditions, source_terms, solver, cache) runtime = time_ns() - time_start put!(semi.performance_counter.counters[1], runtime) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index 9c33a26af71..9d730b2675e 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -709,6 +709,13 @@ end return u_ll, u_rr end +# As above but dispatches on an type argument +@inline function get_surface_node_vars(u, equations, ::Type{<:DG}, indices...) + u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations)))) + u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations)))) + return u_ll, u_rr +end + @inline function set_node_vars!(u, u_node, equations, solver::DG, indices...) for v in eachvariable(equations) u[v, indices...] = u_node[v] @@ -873,54 +880,46 @@ function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{ return nothing end -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Nothing, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) @unpack node_coordinates = cache.elements + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) @threaded for element in eachelement(dg, cache) compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, - element) + element, node_indices) end return nothing end -function compute_coefficients!(backend::Backend, u, func, t, mesh::AbstractMesh{2}, +function compute_coefficients!(backend::Backend, u, func, t, + mesh::Union{AbstractMesh{2}, AbstractMesh{3}}, equations, dg::DG, cache) nelements(dg, cache) == 0 && return nothing + @unpack node_coordinates = cache.elements - kernel! = compute_coefficients_kernel!(backend) - kernel!(u, func, t, equations, dg, node_coordinates, + node_indices = CartesianIndices(ntuple(_ -> nnodes(dg), ndims(mesh))) + + kernel! = compute_coefficients_KAkernel!(backend) + kernel!(u, func, t, equations, dg, node_coordinates, node_indices, ndrange = nelements(dg, cache)) return nothing end -@kernel function compute_coefficients_kernel!(u, func, t, equations, - dg::DG, node_coordinates) +@kernel function compute_coefficients_KAkernel!(u, func, t, equations, + dg::DG, node_coordinates, node_indices) element = @index(Global) - compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element) + compute_coefficients_element!(u, func, t, equations, dg, node_coordinates, element, + node_indices) end function compute_coefficients_element!(u, func, t, equations, dg::DG, - node_coordinates, element) - for j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(node_coordinates, equations, dg, i, - j, element) + node_coordinates, element, node_indices) + for indices in node_indices + x_node = get_node_coords(node_coordinates, equations, dg, indices, element) u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, element) - end - - return nothing -end - -function compute_coefficients!(backend::Nothing, u, func, t, mesh::AbstractMesh{3}, - equations, dg::DG, cache) - @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, - j, k, element) - u_node = func(x_node, t, equations) - set_node_vars!(u, u_node, equations, dg, i, j, k, element) - end + set_node_vars!(u, u_node, equations, dg, indices, element) end return nothing diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl index 5a1036487af..5917a63c74c 100644 --- a/src/solvers/dgmulti/dg.jl +++ b/src/solvers/dgmulti/dg.jl @@ -240,7 +240,7 @@ function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge}) end # for the stepsize callback -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -263,7 +263,7 @@ function max_dt(u, t, mesh::DGMultiMesh, return 2 * dt_min * dt_polydeg_scaling(dg) end -function max_dt(u, t, mesh::DGMultiMesh, +function max_dt(backend, u, t, mesh::DGMultiMesh, constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS} @unpack md = mesh @@ -662,7 +662,7 @@ function calc_sources!(du, u, t, source_terms, return nothing end -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMulti, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl index 0c7530a558b..ab9f8876238 100644 --- a/src/solvers/dgmulti/flux_differencing.jl +++ b/src/solvers/dgmulti/flux_differencing.jl @@ -616,7 +616,7 @@ end # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!` # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3). # Also called by DGMultiFluxDiff{<:GaussSBP} solvers. -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) @@ -661,7 +661,7 @@ end # integral, e.g., an entropy conservative/stable discretization. The implementation of `rhs!` # for such schemes is very similar to the implementation of `rhs!` for standard DG methods, # but specializes `calc_volume_integral`. -function rhs!(du, u, t, mesh, equations, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiffSBP, cache) where {BC, Source} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl index f02d05e5a97..8e3e628ed84 100644 --- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl +++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl @@ -582,7 +582,7 @@ end # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`, # since `invert_jacobian!` is also used in other places (e.g., parabolic terms). -function rhs!(du, u, t, mesh, equations, boundary_conditions::BC, +function rhs!(backend, du, u, t, mesh, equations, boundary_conditions::BC, source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC} @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache) diff --git a/src/solvers/dgsem/calc_volume_integral.jl b/src/solvers/dgsem/calc_volume_integral.jl index 5739a28b202..84c914c340f 100644 --- a/src/solvers/dgsem/calc_volume_integral.jl +++ b/src/solvers/dgsem/calc_volume_integral.jl @@ -14,7 +14,7 @@ end # The following `calc_volume_integral!` functions are # dimension and meshtype agnostic, i.e., valid for all 1D, 2D, and 3D meshes. -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralWeakForm, dg::DGSEM, cache) @@ -27,7 +27,7 @@ function calc_volume_integral!(du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DGSEM, cache) @@ -40,7 +40,7 @@ function calc_volume_integral!(du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralShockCapturingHG, dg::DGSEM, cache) @@ -79,7 +79,7 @@ function calc_volume_integral!(du, u, mesh, return nothing end -function calc_volume_integral!(du, u, mesh, +function calc_volume_integral!(backend::Nothing, du, u, mesh, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DGSEM, cache) diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl index 2f660bd4bca..5544f525481 100644 --- a/src/solvers/dgsem_p4est/containers.jl +++ b/src/solvers/dgsem_p4est/containers.jl @@ -933,6 +933,18 @@ end end end +@inline function indices2direction2d(indices) + if indices[1] === :begin + return 1 + elseif indices[1] === :end + return 2 + elseif indices[2] === :begin + return 3 + else # if indices[2] === :end + return 4 + end +end + include("containers_2d.jl") include("containers_3d.jl") include("containers_parallel.jl") diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl index fc87127e5a7..11b19e19ffd 100644 --- a/src/solvers/dgsem_p4est/dg_2d.jl +++ b/src/solvers/dgsem_p4est/dg_2d.jl @@ -63,125 +63,215 @@ end end end -function prolong2interfaces!(cache, u, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, +function prolong2interfaces!(backend::Nothing, cache, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the index of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] + prolong2interfaces_per_interface!(interfaces.u, u, interface, typeof(mesh), + equations, neighbor_ids, node_indices, + index_range) + end + return nothing +end + +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG) + @unpack interfaces = cache + ninterfaces(interfaces) == 0 && return nothing + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) + + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interfaces_u, u, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, + node_indices, index_range) + interface = @index(Global) + prolong2interfaces_per_interface!(interfaces_u, u, interface, mT, equations, + neighbor_ids, node_indices, index_range) +end + +function prolong2interfaces_per_interface!(interfaces_u, u, interface, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, neighbor_ids, node_indices, + index_range) + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[1, v, i, interface] = u[v, i_primary, j_primary, + primary_element] + end + i_primary += i_primary_step + j_primary += j_primary_step + end - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and a step size to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range) - i_primary = i_primary_start - j_primary = j_primary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, - primary_element] - end - i_primary += i_primary_step - j_primary += j_primary_step + i_secondary = i_secondary_start + j_secondary = j_secondary_start + for i in index_range + for v in eachvariable(equations) + interfaces_u[2, v, i, interface] = u[v, i_secondary, j_secondary, + secondary_element] end + i_secondary += i_secondary_step + j_secondary += j_secondary_step + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and a step size to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] + return nothing +end - i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], - index_range) +function calc_interface_flux!(backend::Nothing, surface_flux_values, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) - i_secondary = i_secondary_start - j_secondary = j_secondary_start - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, - secondary_element] - end - i_secondary += i_secondary_step - j_secondary += j_secondary_step - end + @threaded for interface in eachinterface(dg, cache) + calc_interface_flux_per_interface!(surface_flux_values, typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) end return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Backend, surface_flux_values, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) + ninterfaces(cache.interfaces) == 0 && return nothing @unpack neighbor_ids, node_indices = cache.interfaces @unpack contravariant_vectors = cache.elements index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, + equations, surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange = ninterfaces(cache.interfaces)) + + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, + st::Type{<:DG}, u_interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_per_interface!(surface_flux_values, mt, + have_nonconservative_terms, equations, + surface_integral, st, u_interface, + interface, neighbor_ids, node_indices, + contravariant_vectors, index_range) +end + +function calc_interface_flux_per_interface!(surface_flux_values, + mt::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, + equations, surface_integral, st::Type{<:DG}, + u_interface, interface, neighbor_ids, + node_indices, contravariant_vectors, + index_range) index_end = last(index_range) - @threaded for interface in eachinterface(dg, cache) - # Get element and side index information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) + # Get element and side index information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction2d(primary_indices) - # Create the local i,j indexing on the primary element used to pull normal direction information - i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], - index_range) - j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], - index_range) + # Create the local i,j indexing on the primary element used to pull normal direction information + i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], + index_range) + j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], + index_range) - i_primary = i_primary_start - j_primary = j_primary_start - - # Get element and side index information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - - # Initiate the secondary index to be used in the surface for loop. - # This index on the primary side will always run forward but - # the secondary index might need to run backwards for flipped sides. - if :i_backward in secondary_indices - node_secondary = index_end - node_secondary_step = -1 - else - node_secondary = 1 - node_secondary_step = 1 - end + i_primary = i_primary_start + j_primary = j_primary_start - for node in eachnode(dg) - # Get the normal direction on the primary element. - # Contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - node, primary_direction, primary_element, - node_secondary, secondary_direction, secondary_element) - - # Increment primary element indices to pull the normal direction - i_primary += i_primary_step - j_primary += j_primary_step - # Increment the surface node index along the secondary element - node_secondary += node_secondary_step - end + # Get element and side index information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction2d(secondary_indices) + + # Initiate the secondary index to be used in the surface for loop. + # This index on the primary side will always run forward but + # the secondary index might need to run backwards for flipped sides. + if :i_backward in secondary_indices + node_secondary = index_end + node_secondary_step = -1 + else + node_secondary = 1 + node_secondary_step = 1 + end + + for node in index_range + # Get the normal direction on the primary element. + # Contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, mt, have_nonconservative_terms, + equations, surface_integral, st, u_interface, interface, + normal_direction, node, primary_direction, + primary_element, node_secondary, + secondary_direction, secondary_element) + + # Increment primary element indices to pull the normal direction + i_primary += i_primary_step + j_primary += j_primary_step + # Increment the surface node index along the secondary element + node_secondary += node_secondary_step end return nothing @@ -189,19 +279,22 @@ end # Inlined version of the interface flux computation for conservation laws @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, P4estMeshView{2}, - T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, have_nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, + primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -216,20 +309,22 @@ end # Inlined version of the interface flux computation for equations with conservative and nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + meshT::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, - interface_index, normal_direction, - primary_node_index, primary_direction_index, + surface_integral, st::Type{<:DG}, + u_interface, interface_index, + normal_direction, primary_node_index, + primary_direction_index, primary_element_index, - secondary_node_index, secondary_direction_index, + secondary_node_index, + secondary_direction_index, secondary_element_index) @unpack surface_flux = surface_integral - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_flux, equations), equations, - surface_integral, dg, cache, + surface_integral, st, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, @@ -239,20 +334,19 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, - surface_integral, dg::DG, cache, + surface_integral, st::Type{<:DG}, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, primary_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -266,32 +360,31 @@ end # Note the factor 0.5 necessary for the nonconservative fluxes based on # the interpretation of global SBP operators coupled discontinuously via # central fluxes/SATs - surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (flux_[v] + - 0.5f0 * - noncons_primary[v]) - surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] + + surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = Float64(flux_[v] + 0.5f0 * - noncons_secondary[v]) + noncons_primary[v]) + surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = Float64(-(flux_[v] + + 0.5f0 * + noncons_secondary[v])) end return nothing end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{2}, T8codeMesh{2}}, + ::Type{<:Union{P4estMesh{2}, T8codeMesh{2}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, - surface_integral, dg::DG, cache, + surface_integral, st::Type{<:DG}, u_interface, interface_index, normal_direction, primary_node_index, primary_direction_index, primary_element_index, secondary_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, st, primary_node_index, interface_index) flux_left, flux_right = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -755,47 +848,86 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, surface_integral::SurfaceIntegralWeakForm, dg::DGSEM, cache) - @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements - # Note that all fluxes have been computed with outward-pointing normal vectors. - # Access the factors only once before beginning the loop to increase performance. - # We also use explicit assignments instead of `+=` to let `@muladd` turn these - # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] @threaded for element in eachelement(dg, cache) - for l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, element] = (du[v, 1, l, element] + - surface_flux_values[v, l, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + - surface_flux_values[v, l, 2, element] * - factor_2) - - # surface at -y - du[v, l, 1, element] = (du[v, l, 1, element] + - surface_flux_values[v, l, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + - surface_flux_values[v, l, 4, element] * - factor_2) - end - end + calc_surface_integral_per_element!(du, typeof(mesh), equations, + surface_integral, dg, + surface_flux_values, element) end +end + +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, + surface_flux_values, ndrange = nelements(dg, cache)) + return nothing +end +@kernel function calc_surface_integral_KAkernel!(du, + mT::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, + surface_flux_values) + element = @index(Global) + calc_surface_integral_per_element!(du, mT, equations, surface_integral, + dg, surface_flux_values, element) +end + +function calc_surface_integral_per_element!(du, + ::Type{<:Union{P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, surface_flux_values, + element) + # Note that all fluxes have been computed with outward-pointing normal vectors. + # Access the factors only once before beginning the loop (outside this function) + # to increase performance. We also use explicit assignments instead of `+=` + # to let `@muladd` turn these into FMAs (see comment at the top of the file). + factor_1 = dg.basis.boundary_interpolation[1, 1] + factor_2 = dg.basis.boundary_interpolation[nnodes(dg), 2] + for l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, element] = (du[v, 1, l, element] + + surface_flux_values[v, l, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] + + surface_flux_values[v, l, 2, element] * + factor_2) + + # surface at -y + du[v, l, 1, element] = (du[v, l, 1, element] + + surface_flux_values[v, l, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] + + surface_flux_values[v, l, 4, element] * + factor_2) + end + end return nothing end end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl index 778bccff717..e470bd4b1e1 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl @@ -138,7 +138,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{P4estMesh{2}, P4estMesh{3}}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end @@ -171,14 +171,14 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces. # This reuses `prolong2interfaces` for the purely hyperbolic case. @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end # Calculate interface fluxes for the gradient. # This reuses `calc_interface_flux!` for the purely hyperbolic case. @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache_parabolic.elements.surface_flux_values, + calc_interface_flux!(nothing, cache_parabolic.elements.surface_flux_values, mesh, False(), # False() = no nonconservative terms equations_parabolic, dg.surface_integral, dg, cache_parabolic) diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl index 5079086c964..5790a6ed75b 100644 --- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl @@ -140,7 +140,7 @@ end @inline function calc_mpi_interface_flux!(surface_flux_values, mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, interface_index, normal_direction, interface_node_index, local_side, @@ -321,7 +321,7 @@ end @inline function calc_mpi_mortar_flux!(fstar_primary, fstar_secondary, mesh::Union{ParallelP4estMesh{2}, ParallelT8codeMesh{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, mortar_index, position_index, normal_direction, node_index) diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl index 88444995ba3..c92a69777ef 100644 --- a/src/solvers/dgsem_p4est/dg_3d.jl +++ b/src/solvers/dgsem_p4est/dg_3d.jl @@ -92,85 +92,116 @@ end return (i1, i2) end -function prolong2interfaces!(cache, u, +function prolong2interfaces!(backend::Nothing, cache, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG) @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Copy solution data from the primary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - # Note that in the current implementation, the interface will be - # "aligned at the primary element", i.e., the indices of the primary side - # will always run forwards. - primary_element = interfaces.neighbor_ids[1, interface] - primary_indices = interfaces.node_indices[1, interface] - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[1, v, i, j, interface] = u[v, - i_primary, j_primary, - k_primary, - primary_element] - end - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i + prolong2interfaces_interface!(interfaces.u, u, typeof(mesh), equations, + neighbor_ids, node_indices, index_range, + interface) + end + return nothing +end + +function prolong2interfaces!(backend::Backend, cache, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG) + @unpack interfaces = cache + @unpack neighbor_ids, node_indices = cache.interfaces + index_range = eachnode(dg) + + kernel! = prolong2interfaces_KAkernel!(backend) + kernel!(interfaces.u, u, typeof(mesh), equations, neighbor_ids, node_indices, + index_range, + ndrange = ninterfaces(interfaces)) + return nothing +end + +@kernel function prolong2interfaces_KAkernel!(interface_u, u, meshT, equations, + neighbor_ids, node_indices, index_range) + interface = @index(Global) + prolong2interfaces_interface!(interface_u, u, meshT, equations, neighbor_ids, + node_indices, index_range, interface) +end + +function prolong2interfaces_interface!(u_interface, u, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, + equations, neighbor_ids, node_indices, + index_range, interface) + # Copy solution data from the primary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + # Note that in the current implementation, the interface will be + # "aligned at the primary element", i.e., the indices of the primary side + # will always run forwards. + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[1, v, i, j, interface] = u[v, + i_primary, j_primary, + k_primary, + primary_element] end - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i end + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + end - # Copy solution data from the secondary element using "delayed indexing" with - # a start value and two step sizes to get the correct face and orientation. - secondary_element = interfaces.neighbor_ids[2, interface] - secondary_indices = interfaces.node_indices[2, interface] - - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], - index_range) - k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], - index_range) - - i_secondary = i_secondary_start - j_secondary = j_secondary_start - k_secondary = k_secondary_start - for j in eachnode(dg) - for i in eachnode(dg) - for v in eachvariable(equations) - interfaces.u[2, v, i, j, interface] = u[v, - i_secondary, j_secondary, - k_secondary, - secondary_element] - end - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - k_secondary += k_secondary_step_i + # Copy solution data from the secondary element using "delayed indexing" with + # a start value and two step sizes to get the correct face and orientation. + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], + index_range) + k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], + index_range) + + i_secondary = i_secondary_start + j_secondary = j_secondary_start + k_secondary = k_secondary_start + for j in index_range + for i in index_range + for v in eachvariable(equations) + u_interface[2, v, i, j, interface] = u[v, + i_secondary, j_secondary, + k_secondary, + secondary_element] end - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j - k_secondary += k_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i + k_secondary += k_secondary_step_i end + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j + k_secondary += k_secondary_step_j end - return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, have_nonconservative_terms, equations, surface_integral, dg::DG, cache) @@ -179,93 +210,139 @@ function calc_interface_flux!(surface_flux_values, index_range = eachnode(dg) @threaded for interface in eachinterface(dg, cache) - # Get element and side information on the primary element - primary_element = neighbor_ids[1, interface] - primary_indices = node_indices[1, interface] - primary_direction = indices2direction(primary_indices) - - i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], - index_range) - j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], - index_range) - k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], - index_range) - - i_primary = i_primary_start - j_primary = j_primary_start - k_primary = k_primary_start - - # Get element and side information on the secondary element - secondary_element = neighbor_ids[2, interface] - secondary_indices = node_indices[2, interface] - secondary_direction = indices2direction(secondary_indices) - secondary_surface_indices = surface_indices(secondary_indices) - - # Get the surface indexing on the secondary element. - # Note that the indices of the primary side will always run forward but - # the secondary indices might need to run backwards for flipped sides. - i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], - index_range) - j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], - index_range) - i_secondary = i_secondary_start - j_secondary = j_secondary_start + calc_interface_flux_interface!(surface_flux_values, + typeof(mesh), + have_nonconservative_terms, + equations, surface_integral, typeof(dg), + cache.interfaces.u, neighbor_ids, node_indices, + contravariant_vectors, index_range, interface) + end + return nothing +end + +function calc_interface_flux!(backend::Backend, surface_flux_values, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + have_nonconservative_terms, + equations, surface_integral, dg::DG, cache) + @unpack neighbor_ids, node_indices = cache.interfaces + @unpack contravariant_vectors = cache.elements + index_range = eachnode(dg) + + kernel! = calc_interface_flux_KAkernel!(backend) + kernel!(surface_flux_values, typeof(mesh), have_nonconservative_terms, equations, + surface_integral, typeof(dg), cache.interfaces.u, + neighbor_ids, node_indices, contravariant_vectors, index_range, + ndrange = ninterfaces(cache.interfaces)) + return nothing +end + +@kernel function calc_interface_flux_KAkernel!(surface_flux_values, meshT, + have_nonconservative_terms, equations, + surface_integral, solverT, u_interface, + neighbor_ids, node_indices, + contravariant_vectors, index_range) + interface = @index(Global) + calc_interface_flux_interface!(surface_flux_values, + meshT, + have_nonconservative_terms, + equations, surface_integral, solverT, u_interface, + neighbor_ids, node_indices, contravariant_vectors, + index_range, interface) +end + +function calc_interface_flux_interface!(surface_flux_values, + meshT::Type{<:Union{P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, + equations, surface_integral, + solverT::Type{<:DG}, u_interface, neighbor_ids, + node_indices, contravariant_vectors, + index_range, interface) + # Get element and side information on the primary element + primary_element = neighbor_ids[1, interface] + primary_indices = node_indices[1, interface] + primary_direction = indices2direction(primary_indices) + + i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], + index_range) + j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], + index_range) + k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], + index_range) + + i_primary = i_primary_start + j_primary = j_primary_start + k_primary = k_primary_start + + # Get element and side information on the secondary element + secondary_element = neighbor_ids[2, interface] + secondary_indices = node_indices[2, interface] + secondary_direction = indices2direction(secondary_indices) + secondary_surface_indices = surface_indices(secondary_indices) + + # Get the surface indexing on the secondary element. + # Note that the indices of the primary side will always run forward but + # the secondary indices might need to run backwards for flipped sides. + i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], + index_range) + j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], + index_range) + i_secondary = i_secondary_start + j_secondary = j_secondary_start + + for j in index_range + for i in index_range + # Get the normal direction from the primary element. + # Note, contravariant vectors at interfaces in negative coordinate direction + # are pointing inwards. This is handled by `get_normal_direction`. + normal_direction = get_normal_direction(primary_direction, + contravariant_vectors, + i_primary, j_primary, k_primary, + primary_element) + + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, + equations, + surface_integral, solverT, u_interface, + interface, normal_direction, + i, j, primary_direction, primary_element, + i_secondary, j_secondary, secondary_direction, + secondary_element) - for j in eachnode(dg) - for i in eachnode(dg) - # Get the normal direction from the primary element. - # Note, contravariant vectors at interfaces in negative coordinate direction - # are pointing inwards. This is handled by `get_normal_direction`. - normal_direction = get_normal_direction(primary_direction, - contravariant_vectors, - i_primary, j_primary, k_primary, - primary_element) - - calc_interface_flux!(surface_flux_values, mesh, - have_nonconservative_terms, - equations, - surface_integral, dg, cache, - interface, normal_direction, - i, j, primary_direction, primary_element, - i_secondary, j_secondary, secondary_direction, - secondary_element) - - # Increment the primary element indices - i_primary += i_primary_step_i - j_primary += j_primary_step_i - k_primary += k_primary_step_i - # Increment the secondary element surface indices - i_secondary += i_secondary_step_i - j_secondary += j_secondary_step_i - end # Increment the primary element indices - i_primary += i_primary_step_j - j_primary += j_primary_step_j - k_primary += k_primary_step_j + i_primary += i_primary_step_i + j_primary += j_primary_step_i + k_primary += k_primary_step_i # Increment the secondary element surface indices - i_secondary += i_secondary_step_j - j_secondary += j_secondary_step_j + i_secondary += i_secondary_step_i + j_secondary += j_secondary_step_i end + # Increment the primary element indices + i_primary += i_primary_step_j + j_primary += j_primary_step_j + k_primary += k_primary_step_j + # Increment the secondary element surface indices + i_secondary += i_secondary_step_j + j_secondary += j_secondary_step_j end - return nothing end # Inlined function for interface flux computation for conservative flux terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::False, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -282,19 +359,21 @@ end # Inlined function for interface flux computation for flux + nonconservative terms @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + meshT::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - calc_interface_flux!(surface_flux_values, mesh, have_nonconservative_terms, + calc_interface_flux!(surface_flux_values, meshT, have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), - equations, surface_integral, dg, cache, interface_index, + equations, surface_integral, solverT, u_interface, + interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, @@ -303,21 +382,21 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces surface_flux, nonconservative_flux = surface_integral.surface_flux - - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -344,21 +423,22 @@ end end @inline function calc_interface_flux!(surface_flux_values, - mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, - primary_j_node_index, interface_index) + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, + interface_index) flux_left, flux_right = surface_flux(u_ll, u_rr, normal_direction, equations) @@ -464,7 +544,7 @@ end # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::False, equations, + have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache, i_index, j_index, k_index, i_node_index, j_node_index, direction_index, @@ -500,13 +580,13 @@ end # inlined version of the boundary flux calculation along a physical interface @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache, i_index, j_index, k_index, i_node_index, j_node_index, direction_index, element_index, boundary_index) calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh, - nonconservative_terms, + have_nonconservative_terms, combine_conservative_and_nonconservative_fluxes(surface_integral.surface_flux, equations), equations, @@ -518,7 +598,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::False, equations, surface_integral, dg::DG, cache, i_index, j_index, @@ -561,7 +641,7 @@ end @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, - nonconservative_terms::True, + have_nonconservative_terms::True, combine_conservative_and_nonconservative_fluxes::True, equations, surface_integral, dg::DG, cache, i_index, j_index, @@ -927,7 +1007,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{P4estMesh{3}, T8codeMesh{3}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -935,51 +1015,86 @@ function calc_surface_integral!(du, u, @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements + @threaded for element in eachelement(dg, cache) + calc_surface_integral_element!(du, typeof(mesh), + equations, + surface_integral, dg, surface_flux_values, + element) + end + return nothing +end + +function calc_surface_integral!(backend::Backend, du, u, + mesh::Union{P4estMesh{3}, T8codeMesh{3}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, cache) + @unpack boundary_interpolation = dg.basis + @unpack surface_flux_values = cache.elements + + kernel! = calc_surface_integral_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, surface_integral, dg, surface_flux_values, + ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function calc_surface_integral_KAkernel!(du, meshT, equations, + surface_integral, dg, + surface_flux_values) + element = @index(Global) + calc_surface_integral_element!(du, meshT, + equations, + surface_integral, dg, surface_flux_values, element) +end + +function calc_surface_integral_element!(du, + ::Type{<:Union{P4estMesh{3}, T8codeMesh{3}}}, + equations, + surface_integral::SurfaceIntegralWeakForm, + dg::DGSEM, surface_flux_values, element) # Note that all fluxes have been computed with outward-pointing normal vectors. # Access the factors only once before beginning the loop to increase performance. # We also use explicit assignments instead of `+=` to let `@muladd` turn these # into FMAs (see comment at the top of the file). - factor_1 = boundary_interpolation[1, 1] - factor_2 = boundary_interpolation[nnodes(dg), 2] - @threaded for element in eachelement(dg, cache) - for m in eachnode(dg), l in eachnode(dg) - for v in eachvariable(equations) - # surface at -x - du[v, 1, l, m, element] = (du[v, 1, l, m, element] + - surface_flux_values[v, l, m, 1, element] * - factor_1) - - # surface at +x - du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + - surface_flux_values[v, l, m, 2, - element] * - factor_2) - - # surface at -y - du[v, l, 1, m, element] = (du[v, l, 1, m, element] + - surface_flux_values[v, l, m, 3, element] * - factor_1) - - # surface at +y - du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + - surface_flux_values[v, l, m, 4, - element] * - factor_2) - - # surface at -z - du[v, l, m, 1, element] = (du[v, l, m, 1, element] + - surface_flux_values[v, l, m, 5, element] * - factor_1) - - # surface at +z - du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + - surface_flux_values[v, l, m, 6, - element] * - factor_2) - end + # TODO GPU: dg is adapted, accessing scalars outside of kernel is therefor not useful + factor_1 = dg.basis.boundary_interpolation[1, 1] + factor_2 = dg.basis.boundary_interpolation[nnodes(dg), 2] + for m in eachnode(dg), l in eachnode(dg) + for v in eachvariable(equations) + # surface at -x + du[v, 1, l, m, element] = (du[v, 1, l, m, element] + + surface_flux_values[v, l, m, 1, element] * + factor_1) + + # surface at +x + du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] + + surface_flux_values[v, l, m, 2, + element] * + factor_2) + + # surface at -y + du[v, l, 1, m, element] = (du[v, l, 1, m, element] + + surface_flux_values[v, l, m, 3, element] * + factor_1) + + # surface at +y + du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] + + surface_flux_values[v, l, m, 4, + element] * + factor_2) + + # surface at -z + du[v, l, m, 1, element] = (du[v, l, m, 1, element] + + surface_flux_values[v, l, m, 5, element] * + factor_1) + + # surface at +z + du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] + + surface_flux_values[v, l, m, 6, + element] * + factor_2) end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl index bc4ac8f875b..0befe4143fa 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl @@ -48,14 +48,14 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a # specialization for AbstractEquationsParabolic. @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache_parabolic.elements.surface_flux_values, + calc_interface_flux!(nothing, cache_parabolic.elements.surface_flux_values, mesh, False(), # False() = no nonconservative terms equations_parabolic, dg.surface_integral, dg, cache_parabolic) @@ -202,21 +202,22 @@ end end # This version is used for parabolic gradient computations -@inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3}, +@inline function calc_interface_flux!(surface_flux_values, + ::Type{<:Union{P4estMesh{3}}}, have_nonconservative_terms::False, equations::AbstractEquationsParabolic, - surface_integral, dg::DG, cache, + surface_integral, solverT::Type{<:DG}, + u_interface, interface_index, normal_direction, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index) - @unpack u = cache.interfaces @unpack surface_flux = surface_integral - u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, - primary_j_node_index, + u_ll, u_rr = get_surface_node_vars(u_interface, equations, solverT, + primary_i_node_index, primary_j_node_index, interface_index) flux_ = 0.5f0 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl index 5314beec7b8..188560fa95f 100644 --- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl +++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl @@ -5,7 +5,7 @@ @muladd begin #! format: noindent -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{ParallelP4estMesh{3}, ParallelT8codeMesh{3}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -33,19 +33,19 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -95,11 +95,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache) + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, + cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl index 6cc2791c27e..8828c32666f 100644 --- a/src/solvers/dgsem_structured/dg.jl +++ b/src/solvers/dgsem_structured/dg.jl @@ -35,7 +35,7 @@ function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionP @assert isperiodic(mesh) end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{StructuredMesh, StructuredMeshView{2}}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -44,7 +44,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -64,12 +64,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl index cb98c45aed3..433d34e199f 100644 --- a/src/solvers/dgsem_structured/dg_1d.jl +++ b/src/solvers/dgsem_structured/dg_1d.jl @@ -69,7 +69,7 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, mesh::StructuredMesh{1}, +function apply_jacobian!(backend::Nothing, du, mesh::StructuredMesh{1}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl index 81e0a6a9e0f..dc2dc3a119b 100644 --- a/src/solvers/dgsem_structured/dg_2d.jl +++ b/src/solvers/dgsem_structured/dg_2d.jl @@ -5,6 +5,50 @@ @muladd begin #! format: noindent +function calc_volume_integral!(::Nothing, du, u, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + @unpack contravariant_vectors = cache.elements + @threaded for element in eachelement(dg, cache) + weak_form_kernel_per_element!(du, u, element, typeof(mesh), + have_nonconservative_terms, equations, dg, + contravariant_vectors) + end + return nothing +end + +function calc_volume_integral!(backend::Backend, du, u, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack contravariant_vectors = cache.elements + kernel! = weak_form_KAkernel!(backend) + kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, dg, + contravariant_vectors, ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function weak_form_KAkernel!(du, u, + mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms, equations, + dg::DGSEM, contravariant_vectors) + element = @index(Global) + weak_form_kernel_per_element!(du, u, element, mT, have_nonconservative_terms, + equations, dg, contravariant_vectors) +end #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -12,17 +56,19 @@ see `flux_differencing_kernel!`. This treatment is required to achieve, e.g., entropy-stability or well-balancedness. See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# -@inline function weak_form_kernel!(du, u, - element, - mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, - UnstructuredMesh2D, P4estMesh{2}, - P4estMeshView{2}, T8codeMesh{2}}, - have_nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha = true) +@inline function weak_form_kernel_per_element!(du, u, element, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + have_nonconservative_terms::False, + equations, dg::DGSEM, + contravariant_vectors, alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements for j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, element) @@ -679,23 +725,55 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements - @threaded for element in eachelement(dg, cache) - for j in eachnode(dg), i in eachnode(dg) - factor = -inverse_jacobian[i, j, element] + apply_jacobian_per_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) + end +end - for v in eachvariable(equations) - du[v, i, j, element] *= factor - end +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{2}, StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, P4estMeshView{2}, + T8codeMesh{2}}, + equations, dg::DG, cache) + nelements(dg, cache) == 0 && return nothing + @unpack inverse_jacobian = cache.elements + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange = nelements(dg, cache)) +end + +@kernel function apply_jacobian_KAkernel!(du, + mT::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, + P4estMesh{2}, + P4estMeshView{2}, + T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian) + element = @index(Global) + apply_jacobian_per_element!(du, mT, equations, dg, inverse_jacobian, element) +end + +function apply_jacobian_per_element!(du, + ::Type{<:Union{StructuredMesh{2}, + StructuredMeshView{2}, + UnstructuredMesh2D, P4estMesh{2}, + P4estMeshView{2}, T8codeMesh{2}}}, + equations, dg::DG, inverse_jacobian, element) + for j in eachnode(dg), i in eachnode(dg) + factor = -inverse_jacobian[i, j, element] + + for v in eachvariable(equations) + du[v, i, j, element] *= factor end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl index 88d069b502a..32f02ee4da4 100644 --- a/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_structured/dg_2d_subcell_limiters.jl @@ -122,7 +122,7 @@ end # @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, mesh::Union{StructuredMesh{2}, P4estMesh{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, cache) where { @@ -336,7 +336,7 @@ end # terms that can be written as a product of local and jump contributions. @inline function calcflux_fhat!(fhat1_L, fhat1_R, fhat2_L, fhat2_R, u, mesh::Union{StructuredMesh{2}, P4estMesh{2}}, - nonconservative_terms::True, equations, + have_nonconservative_terms::True, equations, volume_flux::Tuple{F_CONS, F_NONCONS}, dg::DGSEM, element, cache) where { diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl index afc5a21d99e..6600f99ca52 100644 --- a/src/solvers/dgsem_structured/dg_3d.jl +++ b/src/solvers/dgsem_structured/dg_3d.jl @@ -5,6 +5,50 @@ @muladd begin #! format: noindent +function calc_volume_integral!(backend::Nothing, du, u, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + @unpack contravariant_vectors = cache.elements + @threaded for element in eachelement(dg, cache) + weak_form_kernel_element!(du, u, element, typeof(mesh), + have_nonconservative_terms, equations, + dg, contravariant_vectors) + end + return nothing +end + +function calc_volume_integral!(backend::Backend, du, u, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}, + have_nonconservative_terms, equations, + volume_integral::VolumeIntegralWeakForm, + dg::DGSEM, cache) + nelements(dg, cache) == 0 && return nothing + @unpack contravariant_vectors = cache.elements + + kernel! = weak_form_KAkernel!(backend) + kernel!(du, u, typeof(mesh), have_nonconservative_terms, equations, dg, + contravariant_vectors, + ndrange = nelements(dg, cache)) + return nothing +end + +@kernel function weak_form_KAkernel!(du, u, + meshT::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms, + equations, + dg::DGSEM, contravariant_vectors) + element = @index(Global) + weak_form_kernel_element!(du, u, element, meshT, + have_nonconservative_terms, equations, + dg, contravariant_vectors) +end + #= `weak_form_kernel!` is only implemented for conserved terms as non-conservative terms should always be discretized in conjunction with a flux-splitting scheme, @@ -12,16 +56,17 @@ see `flux_differencing_kernel!`. This treatment is required to achieve, e.g., entropy-stability or well-balancedness. See also https://github.com/trixi-framework/Trixi.jl/issues/1671#issuecomment-1765644064 =# -@inline function weak_form_kernel!(du, u, - element, - mesh::Union{StructuredMesh{3}, P4estMesh{3}, - T8codeMesh{3}}, - have_nonconservative_terms::False, equations, - dg::DGSEM, cache, alpha = true) +@inline function weak_form_kernel_element!(du, u, + element, + ::Type{<:Union{StructuredMesh{3}, + P4estMesh{3}, + T8codeMesh{3}}}, + have_nonconservative_terms::False, equations, + dg::DGSEM, contravariant_vectors, + alpha = true) # true * [some floating point value] == [exactly the same floating point value] # This can (hopefully) be optimized away due to constant propagation. @unpack derivative_dhat = dg.basis - @unpack contravariant_vectors = cache.elements for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) u_node = get_node_vars(u, equations, dg, i, j, k, element) @@ -867,19 +912,45 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple, return nothing end -function apply_jacobian!(du, +function apply_jacobian!(backend::Nothing, du, mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, dg::DG, cache) + @unpack inverse_jacobian = cache.elements @threaded for element in eachelement(dg, cache) - for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) - factor = -cache.elements.inverse_jacobian[i, j, k, element] + apply_jacobian_element!(du, typeof(mesh), equations, dg, inverse_jacobian, + element) + end + return nothing +end - for v in eachvariable(equations) - du[v, i, j, k, element] *= factor - end +function apply_jacobian!(backend::Backend, du, + mesh::Union{StructuredMesh{3}, P4estMesh{3}, T8codeMesh{3}}, + equations, dg::DG, cache) + @unpack inverse_jacobian = cache.elements + + kernel! = apply_jacobian_KAkernel!(backend) + kernel!(du, typeof(mesh), equations, dg, inverse_jacobian, + ndrange = nelements(cache.elements)) + return nothing +end + +@kernel function apply_jacobian_KAkernel!(du, meshT, equations, dg::DG, + inverse_jacobian) + element = @index(Global) + apply_jacobian_element!(du, meshT, equations, dg, inverse_jacobian, element) +end + +function apply_jacobian_element!(du, + ::Type{<:Union{StructuredMesh{3}, P4estMesh{3}, + T8codeMesh{3}}}, + equations, dg, inverse_jacobian, element) + for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg) + factor = -inverse_jacobian[i, j, k, element] + + for v in eachvariable(equations) + du[v, i, j, k, element] *= factor end end - return nothing end end # @muladd diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl index 7186587c40b..5b8c9334d7c 100644 --- a/src/solvers/dgsem_tree/dg_1d.jl +++ b/src/solvers/dgsem_tree/dg_1d.jl @@ -64,7 +64,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::TreeMesh{1}, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -73,7 +73,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -104,12 +104,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -239,7 +240,8 @@ end return nothing end -function calc_volume_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_volume_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, have_nonconservative_terms, equations, volume_integral::VolumeIntegralPureLGLFiniteVolumeO2, dg::DGSEM, cache) @@ -405,7 +407,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, dg::DG) +function prolong2interfaces!(cache, u, mesh::TreeMesh{1}, equations, + dg::DG) @unpack interfaces = cache @unpack neighbor_ids = interfaces interfaces_u = interfaces.u @@ -612,7 +615,8 @@ function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:A return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, surface_integral, dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements @@ -638,7 +642,7 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1 return nothing end -function apply_jacobian!(du, mesh::TreeMesh{1}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{1}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl index b8fd31d37df..0fa4c8f1c44 100644 --- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl @@ -90,7 +90,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl index 59f192ba266..953a2ed78b8 100644 --- a/src/solvers/dgsem_tree/dg_2d.jl +++ b/src/solvers/dgsem_tree/dg_2d.jl @@ -104,7 +104,7 @@ end # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer? -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}, P4estMeshView{2}, T8codeMesh{2}, TreeMesh{3}, P4estMesh{3}, T8codeMesh{3}}, equations, @@ -115,19 +115,19 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -159,12 +159,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin @@ -441,7 +442,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{2}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u @@ -468,7 +470,7 @@ function prolong2interfaces!(cache, u, mesh::TreeMesh{2}, equations, dg::DG) return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -502,7 +504,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) @@ -1024,7 +1026,7 @@ end return nothing end -function calc_surface_integral!(du, u, +function calc_surface_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, StructuredMeshView{2}}, equations, surface_integral::SurfaceIntegralWeakForm, @@ -1067,7 +1069,7 @@ function calc_surface_integral!(du, u, return nothing end -function apply_jacobian!(du, mesh::TreeMesh{2}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{2}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl index cbbc8ea6593..af5bf87336e 100644 --- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl @@ -103,7 +103,7 @@ function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, TreeMesh{3}}, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations_parabolic, + calc_surface_integral!(nothing, du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic) end @@ -918,7 +918,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl index 381ca713d82..614af8e0da1 100644 --- a/src/solvers/dgsem_tree/dg_2d_parallel.jl +++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl @@ -447,7 +447,7 @@ function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}, ParallelT8codeMesh{2}}, equations, boundary_conditions, source_terms::Source, @@ -476,7 +476,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -484,12 +484,12 @@ function rhs!(du, u, t, # Prolong solution to interfaces # TODO: Taal decide order of arguments, consistent vs. modified cache first? @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache, u, mesh, equations, dg) + prolong2interfaces!(backend, cache, u, mesh, equations, dg) end # Calculate interface fluxes @trixi_timeit timer() "interface flux" begin - calc_interface_flux!(cache.elements.surface_flux_values, mesh, + calc_interface_flux!(backend, cache.elements.surface_flux_values, mesh, have_nonconservative_terms(equations), equations, dg.surface_integral, dg, cache) end @@ -540,12 +540,13 @@ function rhs!(du, u, t, # Calculate surface integrals @trixi_timeit timer() "surface integral" begin - calc_surface_integral!(du, u, mesh, equations, + calc_surface_integral!(backend, du, u, mesh, equations, dg.surface_integral, dg, cache) end # Apply Jacobian from mapping to reference element - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl index 2cdf23acbbd..19514efebd2 100644 --- a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl +++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl @@ -66,7 +66,7 @@ function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}}, end # Subcell limiting currently only implemented for certain mesh types -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}, P4estMesh{2}, P4estMesh{3}}, have_nonconservative_terms, equations, diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl index b8def478249..2ed174ee727 100644 --- a/src/solvers/dgsem_tree/dg_3d.jl +++ b/src/solvers/dgsem_tree/dg_3d.jl @@ -432,7 +432,8 @@ end return nothing end -function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, dg::DG) +function prolong2interfaces!(backend::Nothing, cache, u, mesh::TreeMesh{3}, equations, + dg::DG) @unpack interfaces = cache @unpack orientations, neighbor_ids = interfaces interfaces_u = interfaces.u @@ -470,7 +471,7 @@ function prolong2interfaces!(cache, u, mesh::TreeMesh{3}, equations, dg::DG) return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral, dg::DG, cache) @@ -505,7 +506,7 @@ function calc_interface_flux!(surface_flux_values, return nothing end -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::True, equations, surface_integral, dg::DG, cache) @@ -1246,7 +1247,8 @@ end return nothing end -function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}}, +function calc_surface_integral!(backend::Nothing, du, u, + mesh::Union{TreeMesh{3}, StructuredMesh{3}}, equations, surface_integral, dg::DGSEM, cache) @unpack boundary_interpolation = dg.basis @unpack surface_flux_values = cache.elements @@ -1298,7 +1300,7 @@ function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3 return nothing end -function apply_jacobian!(du, mesh::TreeMesh{3}, +function apply_jacobian!(backend::Nothing, du, mesh::TreeMesh{3}, equations, dg::DG, cache) @unpack inverse_jacobian = cache.elements diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl index fa6f42561c5..02c89e2aed3 100644 --- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl +++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl @@ -1104,7 +1104,7 @@ function calc_gradient!(gradients, u_transformed, t, # Prolong solution to interfaces @trixi_timeit timer() "prolong2interfaces" begin - prolong2interfaces!(cache_parabolic, u_transformed, mesh, + prolong2interfaces!(nothing, cache_parabolic, u_transformed, mesh, equations_parabolic, dg) end diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl index 1c785765d73..0ffa99c8240 100644 --- a/src/solvers/dgsem_unstructured/dg_2d.jl +++ b/src/solvers/dgsem_unstructured/dg_2d.jl @@ -34,7 +34,7 @@ function create_cache(mesh::UnstructuredMesh2D, equations, return cache end -function rhs!(du, u, t, +function rhs!(backend, du, u, t, mesh::UnstructuredMesh2D, equations, boundary_conditions, source_terms::Source, dg::DG, cache) where {Source} @@ -43,7 +43,7 @@ function rhs!(du, u, t, # Calculate volume integral @trixi_timeit timer() "volume integral" begin - calc_volume_integral!(du, u, mesh, + calc_volume_integral!(backend, du, u, mesh, have_nonconservative_terms(equations), equations, dg.volume_integral, dg, cache) end @@ -80,7 +80,8 @@ function rhs!(du, u, t, # Apply Jacobian from mapping to reference element # Note! this routine is reused from dgsem_structured/dg_2d.jl - @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache) + @trixi_timeit timer() "Jacobian" apply_jacobian!(backend, du, mesh, equations, dg, + cache) # Calculate source terms @trixi_timeit timer() "source terms" begin diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl index 93336c2f70d..c6388aa79e0 100644 --- a/src/solvers/fdsbp_tree/fdsbp_1d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::TreeMesh{1}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -87,7 +87,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, @@ -139,7 +139,7 @@ function calc_volume_integral!(du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -166,7 +166,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -220,7 +220,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{1}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{1}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -248,7 +248,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh1D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh1D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl index bd561f860ce..c1486303cbd 100644 --- a/src/solvers/fdsbp_tree/fdsbp_2d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::Union{TreeMesh{2}, UnstructuredMesh2D}, equations, end # 2D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -96,7 +96,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, @@ -159,7 +159,7 @@ function calc_volume_integral!(du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -202,7 +202,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -214,7 +214,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{2}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, @@ -260,7 +260,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{2}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{2}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -304,7 +304,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh2D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh2D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl index b21b8d0eea3..1b380618688 100644 --- a/src/solvers/fdsbp_tree/fdsbp_3d.jl +++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl @@ -40,7 +40,7 @@ function create_cache(mesh::TreeMesh{3}, equations, end # 3D volume integral contributions for `VolumeIntegralStrongForm` -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -103,7 +103,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, @@ -181,7 +181,7 @@ function calc_volume_integral!(du, u, return nothing end -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralStrongForm, dg::DG, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -238,7 +238,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralStrongForm, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 @@ -250,7 +250,7 @@ end # already separates the solution information into right-traveling and # left-traveling information. So we only need to compute the appropriate # flux information at each side of an interface. -function calc_interface_flux!(surface_flux_values, +function calc_interface_flux!(backend::Nothing, surface_flux_values, mesh::TreeMesh{3}, have_nonconservative_terms::False, equations, surface_integral::SurfaceIntegralUpwind, @@ -297,7 +297,7 @@ end # in the specialized `calc_interface_flux` routine. These SATs are still of # a strong form penalty type, except that the interior flux at a particular # side of the element are computed in the upwind direction. -function calc_surface_integral!(du, u, mesh::TreeMesh{3}, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh{3}, equations, surface_integral::SurfaceIntegralUpwind, dg::FDSBP, cache) inv_weight_left = inv(left_boundary_weight(dg.basis)) @@ -355,7 +355,7 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3}, end # Periodic FDSBP operators need to use a single element without boundaries -function calc_surface_integral!(du, u, mesh::TreeMesh3D, +function calc_surface_integral!(backend::Nothing, du, u, mesh::TreeMesh3D, equations, surface_integral::SurfaceIntegralUpwind, dg::PeriodicFDSBP, cache) @assert nelements(dg, cache) == 1 diff --git a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl index ac7e4c36758..5b3bd95b8cd 100644 --- a/src/solvers/fdsbp_unstructured/fdsbp_2d.jl +++ b/src/solvers/fdsbp_unstructured/fdsbp_2d.jl @@ -28,7 +28,7 @@ end # 2D volume integral contributions for `VolumeIntegralStrongForm` # OBS! This is the standard (not de-aliased) form of the volume integral. # So it is not provably stable for variable coefficients due to the the metric terms. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralStrongForm, @@ -91,7 +91,7 @@ end # the finite difference stencils. Thus, the D^- operator acts on the positive # part of the flux splitting f^+ and the D^+ operator acts on the negative part # of the flux splitting f^-. -function calc_volume_integral!(du, u, +function calc_volume_integral!(backend::Nothing, du, u, mesh::UnstructuredMesh2D, have_nonconservative_terms::False, equations, volume_integral::VolumeIntegralUpwind, diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl index 40a6c4d6be5..78b029ae3c0 100644 --- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl +++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl @@ -57,8 +57,9 @@ function calculate_cfl(ode_algorithm::AbstractPairedExplicitRK, ode) mesh, equations, solver, cache = mesh_equations_solver_cache(semi) u = wrap_array(u_ode, mesh, equations, solver, cache) + backend = trixi_backend(u_ode) - cfl_number = dt_opt / max_dt(u, t0, mesh, + cfl_number = dt_opt / max_dt(backend, u, t0, mesh, have_constant_speed(equations), equations, solver, cache) return cfl_number diff --git a/test/runtests.jl b/test/runtests.jl index 8f35e1fb58d..df348546130 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -113,7 +113,8 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3) @time if TRIXI_TEST == "all" || TRIXI_TEST == "CUDA" import CUDA if CUDA.functional() - include("test_cuda.jl") + include("test_cuda_2d.jl") + include("test_cuda_3d.jl") else @warn "Unable to run CUDA tests on this machine" end diff --git a/test/test_cuda.jl b/test/test_cuda_2d.jl similarity index 89% rename from test/test_cuda.jl rename to test/test_cuda_2d.jl index 87c32ed1e73..c13c0a4af2b 100644 --- a/test/test_cuda.jl +++ b/test/test_cuda_2d.jl @@ -5,11 +5,14 @@ using Trixi include("test_trixi.jl") +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") + # Start with a clean environment: remove Trixi.jl output directory if it exists outdir = "out" isdir(outdir) && rm(outdir, recursive = true) -EXAMPLES_DIR = joinpath(examples_dir(), "p4est_2d_dgsem") +@testset "CUDA 2D" begin +#! format: noindent @trixi_testset "elixir_advection_basic_gpu.jl native" begin @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), @@ -39,12 +42,11 @@ end using CUDA @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), # Expected errors are exactly the same as with TreeMesh! - l2=nothing, # TODO: GPU. [Float32(8.311947673061856e-6)], - linf=nothing, # TODO: GPU. [Float32(6.627000273229378e-5)], + l2=[Float32(8.311947673061856e-6)], + linf=[Float32(6.627000273229378e-5)], RealT=Float32, real_type=Float32, - storage_type=CuArray, - sol=nothing,) # TODO: GPU. Remove this once we can run the simulation on the GPU + storage_type=CuArray) # # Ensure that we do not have excessive memory allocations # # (e.g., from type instabilities) # @test_allocations(Trixi.rhs!, semi, sol, 1000) @@ -65,5 +67,5 @@ end # Clean up afterwards: delete Trixi.jl output directory @test_nowarn isdir(outdir) && rm(outdir, recursive = true) - +end end # module diff --git a/test/test_cuda_3d.jl b/test/test_cuda_3d.jl new file mode 100644 index 00000000000..f4281e880e4 --- /dev/null +++ b/test/test_cuda_3d.jl @@ -0,0 +1,73 @@ +module TestCUDA + +using Test +using Trixi + +include("test_trixi.jl") + +EXAMPLES_DIR = joinpath(examples_dir(), "p4est_3d_dgsem") + +# Start with a clean environment: remove Trixi.jl output directory if it exists +outdir = "out" +isdir(outdir) && rm(outdir, recursive = true) + +@testset "CUDA 3D" begin +#! format: noindent + +@trixi_testset "elixir_advection_basic_gpu.jl native" begin + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors are exactly the same as with TreeMesh! + l2=[0.00016263963870641478], + linf=[0.0014537194925779984]) + # Ensure that we do not have excessive memory allocations + # (e.g., from type instabilities) + let + t = sol.t[end] + u_ode = sol.u[end] + du_ode = similar(u_ode) + @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000 + end + @test real(ode.p.solver) == Float64 + @test real(ode.p.solver.basis) == Float64 + @test real(ode.p.solver.mortar) == Float64 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa Array + @test ode.p.solver.basis.derivative_matrix isa Array + + @test Trixi.storage_type(ode.p.cache.elements) === Array + @test Trixi.storage_type(ode.p.cache.interfaces) === Array + @test Trixi.storage_type(ode.p.cache.boundaries) === Array + @test Trixi.storage_type(ode.p.cache.mortars) === Array +end + +@trixi_testset "elixir_advection_basic_gpu.jl Float32 / CUDA" begin + # Using CUDA inside the testset since otherwise the bindings are hiddend by the anonymous modules + using CUDA + @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic_gpu.jl"), + # Expected errors similar to reference on CPU + l2=[Float32(0.00016263963870641478)], + linf=[Float32(0.0014537194925779984)], + RealT=Float32, + real_type=Float32, + storage_type=CuArray) + @test real(ode.p.solver) == Float32 + @test real(ode.p.solver.basis) == Float32 + @test real(ode.p.solver.mortar) == Float32 + # TODO: remake ignores the mesh itself as well + @test real(ode.p.mesh) == Float64 + + @test ode.u0 isa CuArray + @test ode.p.solver.basis.derivative_matrix isa CuArray + + @test Trixi.storage_type(ode.p.cache.elements) === CuArray + @test Trixi.storage_type(ode.p.cache.interfaces) === CuArray + @test Trixi.storage_type(ode.p.cache.boundaries) === CuArray + @test Trixi.storage_type(ode.p.cache.mortars) === CuArray +end + +# Clean up afterwards: delete Trixi.jl output directory +@test_nowarn isdir(outdir) && rm(outdir, recursive = true) +end +end # module