|
12 | 12 |
|
13 | 13 | #include <AMReX_OpenMP.H> |
14 | 14 |
|
| 15 | +#include <algorithm> |
| 16 | +#include <cstdlib> |
| 17 | +#include <functional> |
15 | 18 | #include <iostream> |
16 | 19 |
|
17 | 20 | namespace amrex { |
@@ -1887,6 +1890,173 @@ bool match (const BoxArray& x, const BoxArray& y) |
1887 | 1890 | } |
1888 | 1891 | } |
1889 | 1892 |
|
| 1893 | +BoxArray decompose (Box const& domain, int nboxes, |
| 1894 | + Array<bool,AMREX_SPACEDIM> const& decomp) |
| 1895 | +{ |
| 1896 | + auto ndecomp = std::count(decomp.begin(), decomp.end(), true); |
| 1897 | + |
| 1898 | + if (nboxes <= 1 || ndecomp == 0) { |
| 1899 | + return BoxArray(domain); |
| 1900 | + } |
| 1901 | + |
| 1902 | + Box const& ccdomain = amrex::enclosedCells(domain); |
| 1903 | + IntVect const& ncells = ccdomain.length(); |
| 1904 | + IntVect nprocs(1); |
| 1905 | + |
| 1906 | + if (ndecomp == 1) { |
| 1907 | + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { |
| 1908 | + if (decomp[idim]) { |
| 1909 | + nprocs[idim] = nboxes; |
| 1910 | + } |
| 1911 | + } |
| 1912 | + } else { |
| 1913 | + // Factorization of nboxes |
| 1914 | + Vector<int> factors; |
| 1915 | + { |
| 1916 | + int x = 2; |
| 1917 | + int n = nboxes; |
| 1918 | + while (x*x <= n) { |
| 1919 | + std::div_t dv = std::div(n, x); |
| 1920 | + if (dv.rem == 0) { |
| 1921 | + factors.push_back(x); |
| 1922 | + n = dv.quot; |
| 1923 | + } else { |
| 1924 | + ++x; |
| 1925 | + } |
| 1926 | + } |
| 1927 | + if (n != 1) { |
| 1928 | + factors.push_back(n); |
| 1929 | + } |
| 1930 | + AMREX_ALWAYS_ASSERT(nboxes == std::accumulate(factors.begin(), factors.end(), |
| 1931 | + 1, std::multiplies<>())); |
| 1932 | + } |
| 1933 | + |
| 1934 | + struct ProcDim |
| 1935 | + { |
| 1936 | + int nproc; |
| 1937 | + int idim; |
| 1938 | + Vector<int> procs; |
| 1939 | + ProcDim (int np, int dim) : nproc(np), idim(dim) {} |
| 1940 | + }; |
| 1941 | + |
| 1942 | + Vector<ProcDim> procdim; |
| 1943 | + procdim.reserve(AMREX_SPACEDIM); |
| 1944 | + |
| 1945 | + Array<Long,AMREX_SPACEDIM> nblocks; |
| 1946 | + |
| 1947 | + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { |
| 1948 | + if (decomp[idim]) { |
| 1949 | + nblocks[idim] = ncells[idim]; |
| 1950 | + procdim.emplace_back(1,idim); |
| 1951 | + } else { |
| 1952 | + nblocks[idim] = 0; // This dimension will not be decomposed. |
| 1953 | + } |
| 1954 | + } |
| 1955 | + |
| 1956 | + auto comp = [&] (ProcDim const& a, ProcDim const& b) { |
| 1957 | + if (nblocks[a.idim]*b.nproc < |
| 1958 | + nblocks[b.idim]*a.nproc) { |
| 1959 | + return true; |
| 1960 | + } else if (nblocks[a.idim]*b.nproc > |
| 1961 | + nblocks[b.idim]*a.nproc) { |
| 1962 | + return false; |
| 1963 | + } else { |
| 1964 | + return a.procs.size() > b.procs.size(); |
| 1965 | + } |
| 1966 | + }; |
| 1967 | + |
| 1968 | + int nprocs_tot = 1; |
| 1969 | + while (!factors.empty()) { |
| 1970 | + std::sort(procdim.begin(), procdim.end(), comp); |
| 1971 | + auto f = factors.back(); |
| 1972 | + factors.pop_back(); |
| 1973 | + procdim.back().nproc *= f; |
| 1974 | + procdim.back().procs.push_back(f); |
| 1975 | + nprocs_tot *= f; |
| 1976 | + if (nprocs_tot == nboxes) { |
| 1977 | + break; |
| 1978 | + } |
| 1979 | + } |
| 1980 | + |
| 1981 | + // swap to see if the decomposition can be improved. |
| 1982 | + while (true) |
| 1983 | + { |
| 1984 | + std::sort(procdim.begin(), procdim.end(), comp); |
| 1985 | + auto fit = std::find_if(procdim.begin(),procdim.end(), |
| 1986 | + [] (ProcDim const& x) { return x.nproc > 1; }); |
| 1987 | + if (fit == procdim.end()) { break; } // This should not actually happen. |
| 1988 | + auto& light = *fit; |
| 1989 | + auto& heavy = procdim.back(); |
| 1990 | + Long w0 = nblocks[light.idim] * heavy.nproc; |
| 1991 | + Long w1 = nblocks[heavy.idim] * light.nproc; |
| 1992 | + if (w0 >= w1) { break; } |
| 1993 | + bool swapped = false; |
| 1994 | + for (auto& f0 : light.procs) { |
| 1995 | + for (auto& f1 : heavy.procs) { |
| 1996 | + if ((f0 > f1) && (w0*f0 < w1*f1)) { |
| 1997 | + light.nproc /= f0; |
| 1998 | + light.nproc *= f1; |
| 1999 | + heavy.nproc /= f1; |
| 2000 | + heavy.nproc *= f0; |
| 2001 | + std::swap(f0,f1); |
| 2002 | + swapped = true; |
| 2003 | + break; |
| 2004 | + } |
| 2005 | + } |
| 2006 | + if (swapped) { break;} |
| 2007 | + } |
| 2008 | + if (!swapped) { break; } |
| 2009 | + } |
| 2010 | + |
| 2011 | + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { |
| 2012 | + if (!decomp[idim]) { |
| 2013 | + procdim.emplace_back(1,idim); |
| 2014 | + } |
| 2015 | + } |
| 2016 | + for (auto const& pd : procdim) { |
| 2017 | + nprocs[pd.idim] = pd.nproc; |
| 2018 | + } |
| 2019 | + } |
| 2020 | + |
| 2021 | + AMREX_ALWAYS_ASSERT(AMREX_D_TERM(nprocs[0],*nprocs[1],*nprocs[2]) == nboxes); |
| 2022 | + |
| 2023 | + IntVect const domlo = ccdomain.smallEnd(); |
| 2024 | + IntVect const sz = ncells / nprocs; |
| 2025 | + IntVect const extra = ncells - sz*nprocs; |
| 2026 | + auto ixtyp = domain.ixType(); |
| 2027 | + BoxList bl(ixtyp); |
| 2028 | +#if (AMREX_SPACEDIM == 3) |
| 2029 | + for (int k = 0; k < nprocs[2]; ++k) { |
| 2030 | + // The first extra[2] blocks get one extra cell with a total of |
| 2031 | + // sz[2]+1. The rest get sz[2] cells. The decomposition in y |
| 2032 | + // and x directions are similar. |
| 2033 | + int klo = (k < extra[2]) ? k*(sz[2]+1) : (k*sz[2]+extra[2]); |
| 2034 | + int khi = (k < extra[2]) ? klo+(sz[2]+1)-1 : klo+sz[2]-1; |
| 2035 | + klo += domlo[2]; |
| 2036 | + khi += domlo[2]; |
| 2037 | +#endif |
| 2038 | +#if (AMREX_SPACEDIM >= 2) |
| 2039 | + for (int j = 0; j < nprocs[1]; ++j) { |
| 2040 | + int jlo = (j < extra[1]) ? j*(sz[1]+1) : (j*sz[1]+extra[1]); |
| 2041 | + int jhi = (j < extra[1]) ? jlo+(sz[1]+1)-1 : jlo+sz[1]-1; |
| 2042 | + jlo += domlo[1]; |
| 2043 | + jhi += domlo[1]; |
| 2044 | +#endif |
| 2045 | + for (int i = 0; i < nprocs[0]; ++i) { |
| 2046 | + int ilo = (i < extra[0]) ? i*(sz[0]+1) : (i*sz[0]+extra[0]); |
| 2047 | + int ihi = (i < extra[0]) ? ilo+(sz[0]+1)-1 : ilo+sz[0]-1; |
| 2048 | + ilo += domlo[0]; |
| 2049 | + ihi += domlo[0]; |
| 2050 | + Box b{IntVect(AMREX_D_DECL(ilo,jlo,klo)), |
| 2051 | + IntVect(AMREX_D_DECL(ihi,jhi,khi))}; |
| 2052 | + if (b.ok()) { |
| 2053 | + bl.push_back(b.convert(ixtyp)); |
| 2054 | + } |
| 2055 | + AMREX_D_TERM(},},}) |
| 2056 | + |
| 2057 | + return BoxArray(std::move(bl)); |
| 2058 | +} |
| 2059 | + |
1890 | 2060 | std::ostream& |
1891 | 2061 | operator<< (std::ostream& os, const BoxArray::RefID& id) |
1892 | 2062 | { |
|
0 commit comments