diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index bae786e4b..3f93144da 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -33,7 +33,7 @@ jobs: -v $GITHUB_WORKSPACE:/workspace \ -v ~/.cache/pip:/root/.cache/pip \ pyscf/gpu4pyscf-devel:pyscf-2.8 \ - /bin/bash -c "cd /workspace && pip3 install -r requirements.txt && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache" + /bin/bash -c "cd /workspace && pip3 install -r requirements.txt && pip3 install MCfun && source build.sh && pytest -m 'not slow and not benchmark' --cov=/workspace --durations=50 && rm -rf .pytest_cache" multi-gpu: runs-on: [self-hosted, Linux, X64, 2T4] diff --git a/gpu4pyscf/dft/gks.py b/gpu4pyscf/dft/gks.py index 8d83e3f92..bd9259a6f 100644 --- a/gpu4pyscf/dft/gks.py +++ b/gpu4pyscf/dft/gks.py @@ -12,19 +12,166 @@ # See the License for the specific language governing permissions and # limitations under the License. +import cupy as cp +from pyscf import lib from pyscf.dft import gks -from gpu4pyscf.dft import numint +from gpu4pyscf.dft import numint2c from gpu4pyscf.dft import rks from gpu4pyscf.scf.ghf import GHF +from gpu4pyscf.lib import logger +from gpu4pyscf.lib import utils +from gpu4pyscf.lib.cupy_helper import tag_array -class GKS(gks.GKS, GHF): - from gpu4pyscf.lib.utils import to_cpu, to_gpu, device + +def get_veff(ks, mol=None, dm=None, dm_last=0, vhf_last=0, hermi=1): + '''Coulomb + XC functional + + .. note:: + This function will change the ks object. + + Args: + ks : an instance of :class:`RKS` + XC functional are controlled by ks.xc attribute. Attribute + ks.grids might be initialized. + dm : ndarray or list of ndarrays + A density matrix or a list of density matrices + + Kwargs: + dm_last : ndarray or a list of ndarrays or 0 + The density matrix baseline. If not 0, this function computes the + increment of HF potential w.r.t. the reference HF potential matrix. + vhf_last : ndarray or a list of ndarrays or 0 + The reference Vxc potential matrix. + hermi : int + Whether J, K matrix is hermitian + + | 0 : no hermitian or symmetric + | 1 : hermitian + | 2 : anti-hermitian + + Returns: + matrix Veff = J + Vxc. Veff can be a list matrices, if the input + dm is a list of density matrices. + ''' + if mol is None: mol = ks.mol + if dm is None: dm = ks.make_rdm1() + ks.initialize_grids(mol, dm) + + t0 = (logger.process_clock(), logger.perf_counter()) + + ground_state = isinstance(dm, cp.ndarray) and dm.ndim == 2 + + if hermi == 2: # because rho = 0 + n, exc, vxc = 0, 0, 0 + else: + max_memory = ks.max_memory - lib.current_memory()[0] + ni = ks._numint + if ni.collinear[0].lower() != 'm': + raise NotImplementedError('Only multi-colinear GKS is implemented') + n, exc, vxc = ni.get_vxc(mol, ks.grids, ks.xc, dm, + hermi=hermi, max_memory=max_memory) + logger.debug(ks, 'nelec by numeric integration = %s', n) + if ks.do_nlc(): + if ni.libxc.is_nlc(ks.xc): + xc = ks.xc + else: + assert ni.libxc.is_nlc(ks.nlc) + xc = ks.nlc + n, enlc, vnlc = ni.nr_nlc_vxc(mol, ks.nlcgrids, xc, dm, + hermi=hermi, max_memory=max_memory) + exc += enlc + vxc += vnlc + logger.debug(ks, 'nelec with nlc grids = %s', n) + t0 = logger.timer(ks, 'vxc', *t0) + + incremental_jk = (ks._eri is None and ks.direct_scf and + getattr(vhf_last, 'vj', None) is not None) + if incremental_jk: + _dm = cp.asarray(dm) - cp.asarray(dm_last) + else: + _dm = dm + if not ni.libxc.is_hybrid_xc(ks.xc): + vk = None + vj = ks.get_j(mol, _dm, hermi) + if incremental_jk: + vj += vhf_last.vj + vxc += vj + else: + omega, alpha, hyb = ni.rsh_and_hybrid_coeff(ks.xc, spin=mol.spin) + if omega == 0: + vj, vk = ks.get_jk(mol, _dm, hermi) + vk *= hyb + elif alpha == 0: # LR=0, only SR exchange + vj = ks.get_j(mol, _dm, hermi) + vk = ks.get_k(mol, _dm, hermi, omega=-omega) + vk *= hyb + elif hyb == 0: # SR=0, only LR exchange + vj = ks.get_j(mol, _dm, hermi) + vk = ks.get_k(mol, _dm, hermi, omega=omega) + vk *= alpha + else: # SR and LR exchange with different ratios + vj, vk = ks.get_jk(mol, _dm, hermi) + vk *= hyb + vklr = ks.get_k(mol, _dm, hermi, omega=omega) + vklr *= (alpha - hyb) + vk += vklr + if incremental_jk: + vj += vhf_last.vj + vk += vhf_last.vk + vxc += vj - vk + + if ground_state: + exc -= cp.einsum('ij,ji', dm, vk).real * .5 + + if ground_state: + ecoul = cp.einsum('ij,ji', dm, vj).real * .5 + else: + ecoul = None + + vxc = tag_array(vxc, ecoul=ecoul, exc=exc, vj=vj, vk=vk) + return vxc + + +class GKS(rks.KohnShamDFT, GHF): + to_gpu = utils.to_gpu + device = utils.device def __init__(self, mol, xc='LDA,VWN'): - raise NotImplementedError + GHF.__init__(self, mol) + rks.KohnShamDFT.__init__(self, xc) + self._numint = numint2c.NumInt2C() + + def dump_flags(self, verbose=None): + GHF.dump_flags(self, verbose) + rks.KohnShamDFT.dump_flags(self, verbose) + logger.info(self, 'collinear = %s', self._numint.collinear) + if self._numint.collinear[0] == 'm': + logger.info(self, 'mcfun spin_samples = %s', self._numint.spin_samples) + logger.info(self, 'mcfun collinear_thrd = %s', self._numint.collinear_thrd) + logger.info(self, 'mcfun collinear_samples = %s', self._numint.collinear_samples) + return self + + @property + def collinear(self): + return self._numint.collinear + @collinear.setter + def collinear(self, val): + self._numint.collinear = val + @property + def spin_samples(self): + return self._numint.spin_samples + @spin_samples.setter + def spin_samples(self, val): + self._numint.spin_samples = val + + get_veff = get_veff reset = rks.RKS.reset energy_elec = rks.RKS.energy_elec - get_veff = NotImplemented nuc_grad_method = NotImplemented to_hf = NotImplemented + + def to_cpu(self): + mf = gks.GKS(self.mol) + utils.to_cpu(self, out=mf) + return mf diff --git a/gpu4pyscf/dft/mcfun_gpu.py b/gpu4pyscf/dft/mcfun_gpu.py new file mode 100644 index 000000000..ed010311c --- /dev/null +++ b/gpu4pyscf/dft/mcfun_gpu.py @@ -0,0 +1,380 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import warnings +import cupy as cp +import numpy as np +from pyscf.dft.LebedevGrid import MakeAngularGrid + +MAX_GRIDS_PER_TASK = 4096 + +def eval_xc_eff(func, rho_tm, deriv=1, spin_samples=770, + collinear_threshold=None, collinear_samples=200): + '''Multi-collinear effective potential and effective kernel. + + Parameters + ---------- + func : Function to evaluate collinear functionals. + The function signature is + exc, vxc, fxc, ... = func((rho, s), deriv) + The input rho and s have shape + * (1, Ngrids) for LDA functionals + * (4, Ngrids) for GGA functionals where the four variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * (5, Ngrids) for meta-GGA functionals where the five variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + The returns of func should have the shape + * (2, Nvar, Ngrids) for vxc + * (2, Nvar, 2, Nvar, Ngrids) for fxc + * (2, Nvar, 2, Nvar, 2, Nvar, Ngrids) for kxc + The dimension Nvar can be + * 1 for LDA + * 4 for GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + Note: for GGA functionals the required returns have different conventions + to the returns of libxc or xcfun in functional derivatives. Here func + needs to return the derivatives to rho, nabla_x(rho), nabla_y(rho), + nabla_z(rho) while libxc (or xcfun) returns derivatives to rho, sigma_uu, + sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). Please + see example 04-xc_wrapper.py for the transformation between the two + conventions. + rho_tm : cp array with shape (4, Nvar, Ngrids) + rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be + * 1 for LDA functionals + * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + deriv : int, optional + Functional derivatives. The current version (the kernel) supports maximum value 2. + spin_samples : int, optional + Number of grid points on spherical surface + collinear_threshold : float, optional + if specified, filters the points of strongly polarized spins and calls the + eval_xc_collinear_spin for these points. Recommended value 0.99. + collinear_samples : int, optional + Number of samples for eval_xc_collinear_spin. + + Returns + ------- + XC functional and derivatives for each grid. + * [exc] if deriv = 0 + * [exc, vxc] if deriv = 1 + * [exc, vxc, fxc] if deriv = 2 + ''' + + assert deriv < 5 + rho_tm = cp.asarray(rho_tm) + if rho_tm.dtype != cp.double: + raise RuntimeError('rho and m must be real') + ngrids = rho_tm.shape[-1] + + results = [] + for p0, p1 in _prange(0, ngrids, MAX_GRIDS_PER_TASK): + r = _eval_xc_lebedev(func, rho_tm[...,p0:p1], deriv, spin_samples, + collinear_threshold, collinear_samples) + results.append(r) + + return [None if x[0] is None else cp.concatenate(x, axis=-1) for x in zip(*results)] + + +def eval_xc_collinear_spin(func, rho_tm, deriv, spin_samples): + '''Multi-collinear functional derivatives for collinear spins + + Parameters + ---------- + func : Function to evaluate collinear functionals. + The function signature is + exc, vxc, fxc, ... = func((rho, s), deriv) + The input rho and s have shape + * (1, Ngrids) for LDA functionals + * (4, Ngrids) for GGA functionals where the four variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * (5, Ngrids) for meta-GGA functionals where the five variables are + rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + The returns of func should have the shape + * (2, Nvar, Ngrids) for vxc + * (2, Nvar, 2, Nvar, Ngrids) for fxc + * (2, Nvar, 2, Nvar, 2, Nvar, Ngrids) for kxc + The dimension Nvar can be + * 1 for LDA + * 4 for GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + Note: for GGA functionals the required returns have different conventions + to the returns of libxc or xcfun in functional derivatives. Here func + needs to return the derivatives to rho, nabla_x(rho), nabla_y(rho), + nabla_z(rho) while libxc (or xcfun) returns derivatives to rho, sigma_uu, + sigma_ud, sigma_dd (sigma_ud = nabla(rho_u) dot nabla(rho_d)). + rho_tm : cp array with shape (4, Nvar, Ngrids) + rho_tm[0] is density. rho_tm[1:4] is the magnetization spin vector. Nvar can be + * 1 for LDA functionals + * 4 for GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho) + * 5 for meta-GGA functionals: rho, nabla_x(rho), nabla_y(rho), nabla_z(rho), tau + deriv : int, optional + Functional derivatives. The current version (the kernel) supports maximum value 2. + spin_samples : int, optional + Number of grid points on principal axis + + Returns + ------- + XC functional and derivatives for each grid. + * [exc] if deriv = 0 + * [exc, vxc] if deriv = 1 + * [exc, vxc, fxc] if deriv = 2 + ''' + ngrids = rho_tm.shape[-1] + # samples on z=cos(theta) and their weights between [0, 1] + sgridz, weights = _make_paxis_samples(spin_samples) + blksize = int(cp.ceil(1e5 / ngrids)) * 8 + + if rho_tm.ndim == 2: + nvar = 1 + else: + nvar = rho_tm.shape[1] + + rho_ts = _project_spin_paxis(rho_tm) + + # TODO: filter s, nabla(s) ~= 0 + m = rho_tm[1:].reshape(3, nvar, ngrids) + s = rho_ts[1].reshape(nvar, ngrids)[0] + omega = cp.where(s == 0, 0, m[:, 0] / s) + + xc_orig = func(rho_ts, deriv) + exc_eff = xc_orig[0] + exc_eff = exc_eff[:,0] + + omega = omega.reshape(3, ngrids) + if deriv > 0: + vxc = xc_orig[1].reshape(2, nvar, ngrids) + vxc_eff = cp.vstack((vxc[:1], cp.einsum('xg,rg->rxg', vxc[1], omega))) + + if deriv > 1: + # spin-conserve part + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids) + fxc_eff = cp.empty((4, nvar, 4, nvar, ngrids)) + fxc_eff[0,:,0] = fxc[0,:,0] + fz1 = cp.einsum('xyg,rg->rxyg', fxc[1,:,0], omega) + fxc_eff[1:,:,0 ] = fz1 + fxc_eff[0 ,:,1:] = fz1.transpose(1,0,2,3) + tmp = cp.einsum('xyg,rg->rxyg', fxc[1,:,1,:], omega) + fxc_eff[1:,:,1:] = cp.einsum('rxyg,sg->rxsyg', tmp, omega) + + # spin-flip part + fxc_sf = 0 + for p0, p1 in _prange(0, weights.size, blksize): + rho = _project_spin_paxis(rho_tm, sgridz[p0:p1]) + fxc = func(rho, deriv)[2] + fxc = fxc.reshape(2, nvar, 2, nvar, ngrids, p1 - p0) + # only considers the xx+yy part + fxc_sf += fxc[1,:,1].dot(weights[p0:p1]) + + for i in range(1, 4): + fxc_eff[i,:,i] += fxc_sf + tmp = cp.einsum('xyg,rg->rxyg', fxc_sf, omega) + fxc_eff[1:,:,1:] -= cp.einsum('rxyg,sg->rxsyg', tmp, omega) + + ret = [exc_eff] + if deriv > 0: + ret.append(vxc_eff) + if deriv > 1: + ret.append(fxc_eff) + if deriv > 2: + raise NotImplementedError + return ret + +def _eval_xc_lebedev(func, rho_tm, deriv, spin_samples, + collinear_threshold=None, collinear_samples=200): + '''Multi-collinear effective potential and effective kernel with projection + samples on spherical surface (the Lebedev grid samples) + ''' + ngrids = rho_tm.shape[-1] + sgrids, weights = _make_sph_samples(spin_samples) + sgrids = cp.asarray(sgrids) + weights = cp.asarray(weights) + blksize = int(cp.ceil(1e4 / ngrids)) * 8 + # import pdb + # pdb.set_trace() + if rho_tm.ndim == 2: + nvar = 1 + else: + nvar = rho_tm.shape[1] + exc_eff = vxc_eff = fxc_eff = kxc_eff = 0 + for p0, p1 in _prange(0, weights.size, blksize): + nsg = p1 - p0 + p_sgrids = sgrids[p0:p1] + p_weights = weights[p0:p1] + rho = _project_spin_sph(rho_tm, p_sgrids) + + xc_orig = func(rho, deriv+1) + + exc = xc_orig[0].reshape(ngrids, nsg) + vxc = xc_orig[1].reshape(2, nvar, ngrids, nsg) + + rho = rho.reshape(2, nvar, ngrids, nsg) + s = rho[1] + rho_pure = rho[0,0] + exc_rho = exc * rho_pure + cp.einsum('xgo,xgo->go', vxc[1], s) + exc_eff += cp.einsum('go,o->g', exc_rho, p_weights) + + if deriv > 0: + fxc = xc_orig[2].reshape(2, nvar, 2, nvar, ngrids, nsg) + # vs * 2 + s*f_s_st + vxc[1] *= 2 + vxc += cp.einsum('xbygo,xgo->bygo', fxc[1], s) + c_tm = _ts2tm_transformation(p_sgrids) + cw_tm = c_tm * p_weights + vxc_eff += cp.einsum('rao,axgo->rxg', cw_tm, vxc) + + if deriv > 1: + kxc = xc_orig[3].reshape(2, nvar, 2, nvar, 2, nvar, ngrids, nsg) + fxc[1,:,1] *= 3 + fxc[0,:,1] *= 2 + fxc[1,:,0] *= 2 + fxc += cp.einsum('xbyczgo,xgo->byczgo', kxc[1], s) + fxc = cp.einsum('rao,axbygo->rxbygo', c_tm, fxc) + fxc_eff += cp.einsum('sbo,rxbygo->rxsyg', cw_tm, fxc) + + if deriv > 2: + lxc = xc_orig[4].reshape(2, nvar, 2, nvar, 2, nvar, 2, nvar, ngrids, nsg) + kxc[1,:,1,:,1] *= 4 + kxc[1,:,1,:,0] *= 3 + kxc[1,:,0,:,1] *= 3 + kxc[0,:,1,:,1] *= 3 + kxc[1,:,0,:,0] *= 2 + kxc[0,:,1,:,0] *= 2 + kxc[0,:,0,:,1] *= 2 + + kxc += cp.einsum('wbxcydzgo,wgo->bxcydzgo', lxc[1], s) + kxc = cp.einsum('rao,axbyczgo->rxbyczgo', c_tm, kxc) + kxc = cp.einsum('sbo,rxbyczgo->rxsyczgo', c_tm, kxc) + # kxc = cp.einsum('rao,sbo,axbyczgo->rxsyczgo', c_tm, c_tm,kxc) + kxc_eff += cp.einsum('tco,rxsyczgo->rxsytzg', cw_tm, kxc) + + # exc in libxc is defined as Exc per particle. exc_eff calculated above is exc*rho. + # Divide exc_eff by rho so as to follow the convention of libxc + if rho_tm.ndim == 2: + rho_pure = rho_tm[0] + else: + rho_pure = rho_tm[0,0] + + exc_eff[rho_pure == 0] = 0 + exc_eff[rho_pure != 0] /= rho_pure[rho_pure != 0] + + # Strongly spin-polarized points (rho ~= |m|) can be considered as collinear spins + if collinear_threshold is not None: + rho, s = _project_spin_paxis(rho_tm.reshape(4, nvar, ngrids)[:,0]) + cs_idx = cp.where(s >= rho * collinear_threshold)[0] + if cs_idx.size > 0: + xc_cs = eval_xc_collinear_spin(func, rho_tm[...,cs_idx], deriv, + collinear_samples) + print("debug 1") + print("exc_eff", exc_eff.shape) + print("cs_idx", cs_idx.shape) + print("xc_cs", len(xc_cs)) + print("xc_cs[0]", xc_cs[0].shape) + print("rho_tm", rho_tm.shape) + exc_eff[...,cs_idx] = xc_cs[0] + if deriv > 0: + vxc_eff[...,cs_idx] = xc_cs[1] + if deriv > 1: + fxc_eff[...,cs_idx] = xc_cs[2] + if deriv > 2: + kxc_eff[...,cs_idx] = xc_cs[3] + + ret = [exc_eff] + if deriv > 0: + ret.append(vxc_eff) + if deriv > 1: + ret.append(fxc_eff) + if deriv > 2: + ret.append(kxc_eff) + if deriv > 3: + raise NotImplementedError + return ret + +def _make_sph_samples(spin_samples): + '''Integration samples on spherical surface''' + ang_grids = MakeAngularGrid(spin_samples) + directions = ang_grids[:,:3].copy(order='F') + weights = ang_grids[:,3].copy() + return directions, weights + +def _prange(start, end, step): + '''Partitions range into segments: i0:i1, i1:i2, i2:i3, ...''' + if start < end: + for i in range(start, end, step): + yield i, min(i+step, end) + +def _project_spin_sph(rho_tm, sgrids): + '''Projects spin onto spherical surface''' + rho = rho_tm[0] + m = rho_tm[1:] + nsg = sgrids.shape[0] + ngrids = rho.shape[-1] + if rho_tm.ndim == 2: + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:, cp.newaxis] + rho_ts[1] = cp.einsum('mg,om->go', m, sgrids) + rho_ts = rho_ts.reshape(2, ngrids*nsg) + else: + nvar = rho_tm.shape[1] + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:, :, cp.newaxis] + rho_ts[1] = cp.einsum('mxg,om->xgo', m, sgrids) + rho_ts = rho_ts.reshape(2, nvar, ngrids*nsg) + return rho_ts + +def _ts2tm_transformation(sgrids): + ''' + Transformation that projects v_ts(rho,s) to rho/m representation (rho,mx,my,mz) + ''' + nsg = sgrids.shape[0] + c_tm = cp.zeros((4, 2, nsg)) + c_tm[0,0] = 1 + c_tm[1:,1] = sgrids.T + return c_tm + +def _make_paxis_samples(spin_samples): + '''Samples on principal axis between [0, 1]''' + rt, wt = np.polynomial.legendre.leggauss(spin_samples) + rt = cp.asarray(rt) + wt = cp.asarray(wt) + rt = rt * .5 + .5 + wt *= .5 # normalized to 1 + return rt, wt + +def _project_spin_paxis(rho_tm, sgridz=None): + '''Projects spins onto the principal axis''' + rho = rho_tm[0] + m = rho_tm[1:] + + s = cp.linalg.norm(m, axis=0) + if sgridz is None: + rho_ts = cp.stack([rho, s]) + else: + ngrids = rho.shape[-1] + nsg = sgridz.shape[0] + if rho_tm.ndim == 2: + rho_ts = cp.empty((2, ngrids, nsg)) + rho_ts[0] = rho[:,cp.newaxis] + rho_ts[1] = s[:,cp.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, ngrids * nsg) + else: + print('222') + nvar = rho_tm.shape[1] + rho_ts = cp.empty((2, nvar, ngrids, nsg)) + rho_ts[0] = rho[:,:,cp.newaxis] + rho_ts[1] = s[:,:,cp.newaxis] * sgridz + rho_ts = rho_ts.reshape(2, nvar, ngrids * nsg) + return rho_ts + diff --git a/gpu4pyscf/dft/numint.py b/gpu4pyscf/dft/numint.py index 55c25dfab..81e1be013 100644 --- a/gpu4pyscf/dft/numint.py +++ b/gpu4pyscf/dft/numint.py @@ -2429,6 +2429,74 @@ def unsort_orbitals(self, sorted_mat, axis=[], out=None): out[tuple(fancy_index)] = sorted_mat return out + def sort_orbitals_2c(self, mat, axis=[]): + ''' Transform given axis of a 2-component matrix (GKS) into sorted AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao, + representing alpha (:nao) and beta (nao:) components. Both components + are sorted using the same AO sorting index. + ''' + idx = self._ao_idx + nao = len(idx) + + # Create the 2-component sorting index: + # [sorted_alpha_indices, sorted_beta_indices] + # E.g., if idx = [0, 2, 1] (nao=3), + # idx_2c = [0, 2, 1, 3+0, 3+2, 3+1] = [0, 2, 1, 3, 5, 4] + # We use numpy to build the index, consistent with self._ao_idx + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * mat.ndim + fancy_index = [] + for dim, n in enumerate(mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component sorting") + indices = idx_2c + else: + # Use cupy.arange for non-sorted axes, as in the original sort_orbitals + indices = cupy.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + # Perform the sorting using advanced indexing + return mat[tuple(fancy_index)] + + def unsort_orbitals_2c(self, sorted_mat, axis=[], out=None): + ''' Transform given axis of a 2-component matrix from sorted AO to original AO + + This assumes the axes specified in 'axis' have a dimension of 2*nao. + This is the inverse operation of sort_orbitals_2c. + ''' + idx = self._ao_idx + nao = len(idx) + + # The 2-component index is created identically to sort_orbitals_2c + idx_2c = np.concatenate([idx, idx + nao]) + + shape_ones = (1,) * sorted_mat.ndim + fancy_index = [] + for dim, n in enumerate(sorted_mat.shape): + if dim in axis: + # Check if the dimension matches the 2-component size + if n != 2 * nao: + raise ValueError(f"Axis {dim} has dimension {n}, expected {2*nao} for 2-component unsorting") + indices = idx_2c + else: + indices = cupy.arange(n) + + idx_shape = shape_ones[:dim] + (-1,) + shape_ones[dim+1:] + fancy_index.append(indices.reshape(idx_shape)) + + if out is None: + out = cupy.empty_like(sorted_mat) + + # Perform the unsorting assignment + out[tuple(fancy_index)] = sorted_mat + return out + class GTOValEnvVars(ctypes.Structure): _fields_ = [ ("natm", ctypes.c_int), diff --git a/gpu4pyscf/dft/numint2c.py b/gpu4pyscf/dft/numint2c.py new file mode 100644 index 000000000..cdc78e087 --- /dev/null +++ b/gpu4pyscf/dft/numint2c.py @@ -0,0 +1,499 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' +Numerical integration functions for (2-component) GKS with real AO basis +''' + +import functools +import numpy as np +import cupy as cp +from pyscf import lib +from pyscf.dft import numint2c +from gpu4pyscf.dft import numint, mcfun_gpu +from gpu4pyscf.dft.numint import _dot_ao_dm, _dot_ao_ao, _scale_ao +from gpu4pyscf.dft import xc_deriv +from gpu4pyscf.lib import utils +from pyscf import __config__ + + +def eval_rho(mol, ao, dm, non0tab=None, xctype='LDA', hermi=0, + with_lapl=True, verbose=None): + nao = ao.shape[-2] + assert dm.ndim == 2 and nao * 2 == dm.shape[0] + if not isinstance(dm, cp.ndarray): + dm = cp.asarray(dm) + + nao, ngrids = ao.shape[-2:] + xctype = xctype.upper() + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc + + if xctype == 'LDA': + c0a = _dot_ao_dm(mol, ao, dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao, dm[nao:], non0tab, shls_slice, ao_loc) + rho_m = _contract_rho_m((ao, ao), (c0a, c0b), hermi, True) + elif xctype == 'GGA': + # first 4 ~ (rho, m), second 4 ~ (0th order, dx, dy, dz) + if hermi: + rho_m = cp.empty((4, 4, ngrids)) + else: + rho_m = cp.empty((4, 4, ngrids), dtype=cp.complex128) + c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) + c0 = (c0a, c0b) + rho_m[:,0] = _contract_rho_m((ao[0], ao[0]), c0, hermi, True) + for i in range(1, 4): + rho_m[:,i] = _contract_rho_m((ao[i], ao[i]), c0, hermi, False) + if hermi: + rho_m[:,1:4] *= 2 # *2 for |ao> dm < dx ao| + |dx ao> dm < ao| + else: + for i in range(1, 4): + c1a = _dot_ao_dm(mol, ao[i], dm[:nao], non0tab, shls_slice, ao_loc) + c1b = _dot_ao_dm(mol, ao[i], dm[nao:], non0tab, shls_slice, ao_loc) + rho_m[:,i] += _contract_rho_m((ao[0], ao[0]), (c1a, c1b), hermi, False) + else: # meta-GGA + if hermi: + dtype = cp.double + else: + dtype = cp.complex128 + if with_lapl: + rho_m = cp.empty((4, 6, ngrids), dtype=dtype) + tau_idx = 5 + else: + rho_m = cp.empty((4, 5, ngrids), dtype=dtype) + tau_idx = 4 + c0a = _dot_ao_dm(mol, ao[0], dm[:nao], non0tab, shls_slice, ao_loc) + c0b = _dot_ao_dm(mol, ao[0], dm[nao:], non0tab, shls_slice, ao_loc) + c0 = (c0a, c0b) + rho_m[:,0] = _contract_rho_m((ao[0], ao[0]), c0, hermi, True) + rho_m[:,tau_idx] = 0 + for i in range(1, 4): + c1a = _dot_ao_dm(mol, ao[i], dm[:nao], non0tab, shls_slice, ao_loc) + c1b = _dot_ao_dm(mol, ao[i], dm[nao:], non0tab, shls_slice, ao_loc) + rho_m[:,tau_idx] += _contract_rho_m((ao[i], ao[i]), (c1a, c1b), hermi, True) + + rho_m[:,i] = _contract_rho_m((ao[i], ao[i]), c0, hermi, False) + if hermi: + rho_m[:,i] *= 2 + else: + rho_m[:,i] += _contract_rho_m((ao[0], ao[0]), (c1a, c1b), hermi, False) + if with_lapl: + # TODO: rho_m[:,4] = \nabla^2 rho + raise NotImplementedError + # tau = 1/2 (\nabla f)^2 + rho_m[:,tau_idx] *= .5 + return rho_m + +def _gks_mcol_vxc(ni, mol, grids, xc_code, dms, relativity=0, hermi=0, + max_memory=2000, verbose=None): + xctype = ni._xc_type(xc_code) + shls_slice = (0, mol.nbas) + ao_loc = mol.ao_loc_nr() + n2c = dms[0].shape[-1] + nao = n2c // 2 + opt = ni.gdftopt + _sorted_mol = opt._sorted_mol + + nelec = 0 + excsum = 0 + vmat = cp.zeros((n2c,n2c), dtype=cp.complex128) + + if xctype in ('LDA', 'GGA', 'MGGA'): + f_eval_mat = { + ('LDA' , 'n'): (_ncol_lda_vxc_mat , 0), + ('LDA' , 'm'): (_mcol_lda_vxc_mat , 0), + ('GGA' , 'm'): (_mcol_gga_vxc_mat , 1), + ('MGGA', 'm'): (_mcol_mgga_vxc_mat, 1), + } + fmat, ao_deriv = f_eval_mat[(xctype, ni.collinear[0])] + + if ni.collinear[0] == 'm': # mcol + eval_xc = ni.mcfun_eval_xc_adapter(xc_code) + else: + raise NotImplementedError('locally-collinear vxc is not implemented') + + for ao, mask, weight, coords \ + in ni.block_loop(_sorted_mol, grids, nao, ao_deriv, max_memory): + mask_2c = np.concatenate([mask, mask + nao]) + dm_mask = dms[mask_2c[:,None],mask_2c] + rho = eval_rho(_sorted_mol, ao, dm_mask, non0tab=None, xctype=xctype, hermi=hermi, + with_lapl=False, verbose=None) + exc, vxc = eval_xc(xc_code, rho, deriv=1, xctype=xctype)[:2] + if xctype == 'LDA': + den = rho[0] * weight + else: + den = rho[0,0] * weight + nelec += den.sum() + excsum += cp.dot(den, exc) + vmat += fmat(mol, ao, weight, rho, vxc, mask, shls_slice, + ao_loc, hermi) + + elif xctype == 'HF': + pass + else: + raise NotImplementedError(f'numint2c.get_vxc for functional {xc_code}') + + if hermi: + vmat = vmat + vmat.conj().transpose(1,0) + + return nelec, excsum, vmat + +def _gks_mcol_fxc(ni, mol, grids, xc_code, dm0, dms, relativity=0, hermi=0, + rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): + raise NotImplementedError('non-collinear lda fxc') + +def _ncol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of non-collinear LDA''' + # NOTE vxc in u/d representation + raise NotImplementedError('non-collinear lda vxc mat') + + +# * Mcfun requires functional derivatives to total-density and spin-density. +# * Make it a global function than a closure so as to be callable by multiprocessing +def __mcfun_fn_eval_xc(ni, xc_code, xctype, rho, deriv): + t, s = rho + if not isinstance(t, cp.ndarray): + t = cp.asarray(t) + if not isinstance(s, cp.ndarray): + s = cp.asarray(s) + rho = cp.stack([(t + s) * .5, (t - s) * .5]) + spin = 1 + evfk = ni.eval_xc_eff(xc_code, rho, deriv=deriv, xctype=xctype, spin=spin) + evfk = list(evfk) + for order in range(1, deriv+1): + if evfk[order] is not None: + evfk[order] = xc_deriv.ud2ts(evfk[order]) + return evfk + +def mcfun_eval_xc_adapter(ni, xc_code): + '''Wrapper to generate the eval_xc function required by mcfun''' + + xctype = ni._xc_type(xc_code) + fn_eval_xc = functools.partial(__mcfun_fn_eval_xc, ni, xc_code, xctype) + def eval_xc_eff(xc_code, rho, deriv=1, omega=None, xctype=None, + verbose=None, spin=None): + return mcfun_gpu.eval_xc_eff( + fn_eval_xc, rho, deriv, spin_samples=ni.spin_samples, + collinear_threshold=ni.collinear_thrd, + collinear_samples=ni.collinear_samples) + return eval_xc_eff + +def _mcol_lda_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear LDA''' + wv = weight * vxc + if hermi: + wv *= .5 # * .5 because of v+v.conj().T in r_vxc + wr, wmx, wmy, wmz = wv + + # einsum('g,g,xgi,xgj->ij', vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sx, vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sy, vxc, weight, ao, ao) + # + einsum('xy,g,g,xgi,ygj->ij', sz, vxc, weight, ao, ao) + aow = None + aow = _scale_ao(ao, wmx[0], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, wmy[0], out=aow) # My + tmpy = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + if hermi: + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = cp.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + tmpx = tmpy = None + aow = _scale_ao(ao, wr[0]+wmz[0], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao, wr[0]-wmz[0], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao, aow, mask, shls_slice, ao_loc) + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) + return mat + +def _mcol_gga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear LDA''' + wv = weight * vxc + if hermi: + wv[:,0] *= .5 # * .5 because of v+v.conj().T in r_vxc + wr, wmx, wmy, wmz = wv + + aow = None + aow = _scale_ao(ao[:4], wr[:4]+wmz[:4], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wr[:4]-wmz[:4], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmx[:4], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmy[:4], out=aow) # My + tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + if hermi: + assert vxc.dtype == cp.double + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = cp.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx + tmpx += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], wmy[1:4].conj(), out=aow) # My + tmpy += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + aow = _scale_ao(ao[1:4], (wr[1:4]+wmz[1:4]).conj(), out=aow) # Mz + mataa += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz + matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) + return mat + +def _tau_dot(mol, bra, ket, wv, mask, shls_slice, ao_loc): + '''nabla_ao dot nabla_ao + numpy.einsum('p,xpi,xpj->ij', wv, bra[1:4].conj(), ket[1:4]) + ''' + aow = _scale_ao(ket[1], wv) + mat = _dot_ao_ao(mol, bra[1], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ket[2], wv, aow) + mat += _dot_ao_ao(mol, bra[2], aow, mask, shls_slice, ao_loc) + aow = _scale_ao(ket[3], wv, aow) + mat += _dot_ao_ao(mol, bra[3], aow, mask, shls_slice, ao_loc) + return mat + +def _mcol_mgga_vxc_mat(mol, ao, weight, rho, vxc, mask, shls_slice, ao_loc, hermi): + '''Vxc matrix of multi-collinear MGGA''' + wv = weight * vxc + tau_idx = 4 + wv[:,tau_idx] *= .5 # *.5 for 1/2 in tau + if hermi: + wv[:,0] *= .5 # * .5 because of v+v.conj().T in r_vxc + wv[:,tau_idx] *= .5 + wr, wmx, wmy, wmz = wv + + aow = None + aow = _scale_ao(ao[:4], wr[:4]+wmz[:4], out=aow) # Mz + mataa = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + mataa += _tau_dot(mol, ao, ao, wr[tau_idx]+wmz[tau_idx], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wr[:4]-wmz[:4], out=aow) # Mz + matbb = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + matbb += _tau_dot(mol, ao, ao, wr[tau_idx]-wmz[tau_idx], mask, shls_slice, ao_loc) + + aow = _scale_ao(ao[:4], wmx[:4], out=aow) # Mx + tmpx = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + tmpx += _tau_dot(mol, ao, ao, wmx[tau_idx], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[:4], wmy[:4], out=aow) # My + tmpy = _dot_ao_ao(mol, ao[0], aow, mask, shls_slice, ao_loc) + tmpy += _tau_dot(mol, ao, ao, wmy[tau_idx], mask, shls_slice, ao_loc) + if hermi: + assert vxc.dtype == cp.double + # conj(mx+my*1j) == mx-my*1j, tmpx and tmpy should be real + matba = (tmpx + tmpx.T) + (tmpy + tmpy.T) * 1j + matab = cp.zeros_like(matba) + else: + # conj(mx+my*1j) != mx-my*1j, tmpx and tmpy should be complex + aow = _scale_ao(ao[1:4], wmx[1:4].conj(), out=aow) # Mx + tmpx += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], wmy[1:4].conj(), out=aow) # My + tmpy += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + matba = tmpx + tmpy * 1j + matab = tmpx - tmpy * 1j + aow = _scale_ao(ao[1:4], (wr[1:4]+wmz[1:4]).conj(), out=aow) # Mz + mataa += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + aow = _scale_ao(ao[1:4], (wr[1:4]-wmz[1:4]).conj(), out=aow) # Mz + matbb += _dot_ao_ao(mol, aow, ao[0], mask, shls_slice, ao_loc) + + row1 = cp.concatenate([mataa, matab], axis=1) + row2 = cp.concatenate([matba, matbb], axis=1) + + mat = cp.concatenate([row1, row2], axis=0) + return mat + +def _mcol_lda_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + raise NotImplementedError('non-collinear lda fxc') + +def _mcol_gga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + raise NotImplementedError('non-collinear gga fxc') + +def _mcol_mgga_fxc_mat(mol, ao, weight, rho0, rho1, fxc, + mask, shls_slice, ao_loc, hermi): + raise NotImplementedError('non-collinear mgga fxc') + +def _contract_rho_m(bra, ket, hermi=0, bra_eq_ket=False): + ''' + hermi indicates whether the density matrix is hermitian. + bra_eq_ket indicates whether bra and ket basis are the same AOs. + ''' + # rho = einsum('xgi,ij,xgj->g', ket, dm, bra.conj()) + # mx = einsum('xy,ygi,ij,xgj->g', sx, ket, dm, bra.conj()) + # my = einsum('xy,ygi,ij,xgj->g', sy, ket, dm, bra.conj()) + # mz = einsum('xy,ygi,ij,xgj->g', sz, ket, dm, bra.conj()) + ket_a, ket_b = ket + bra_a, bra_b = bra + nao = min(ket_a.shape[-2], bra_a.shape[-2]) + ngrids = ket_a.shape[-1] + if hermi: + raa = cp.einsum('ip,ip->p', bra_a.real, ket_a[:nao].real) + raa+= cp.einsum('ip,ip->p', bra_a.imag, ket_a[:nao].imag) + rab = cp.einsum('ip,ip->p', bra_a.conj(), ket_b[:nao]) + rbb = cp.einsum('ip,ip->p', bra_b.real, ket_b[nao:].real) + rbb+= cp.einsum('ip,ip->p', bra_b.imag, ket_b[nao:].imag) + rho_m = cp.empty((4, ngrids)) + rho_m[0,:] = raa + rbb # rho + rho_m[1,:] = rab.real # mx + rho_m[2,:] = rab.imag # my + rho_m[3,:] = raa - rbb # mz + if bra_eq_ket: + rho_m[1,:] *= 2 + rho_m[2,:] *= 2 + else: + rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) + rho_m[1,:] += rba.real + rho_m[2,:] -= rba.imag + else: + raa = cp.einsum('ip,ip->p', bra_a.conj(), ket_a[:nao]) + rba = cp.einsum('ip,ip->p', bra_b.conj(), ket_a[nao:]) + rab = cp.einsum('ip,ip->p', bra_a.conj(), ket_b[:nao]) + rbb = cp.einsum('ip,ip->p', bra_b.conj(), ket_b[nao:]) + rho_m = cp.empty((4, ngrids), dtype=cp.complex128) + rho_m[0,:] = raa + rbb # rho + rho_m[1,:] = rab + rba # mx + rho_m[2,:] = (rba - rab) * 1j # my + rho_m[3,:] = raa - rbb # mz + return rho_m + + +class NumInt2C(lib.StreamObject, numint.LibXCMixin): + '''Numerical integration methods for 2-component basis (used by GKS)''' + _keys = {'gdftopt'} + to_gpu = utils.to_gpu + device = utils.device + + gdftopt = None + + # collinear schemes: + # 'col' (collinear, by default) + # 'ncol' (non-collinear, also known as locally collinear) + # 'mcol' (multi-collinear) + collinear = getattr(__config__, 'dft_numint_RnumInt_collinear', 'col') + spin_samples = getattr(__config__, 'dft_numint_RnumInt_spin_samples', 770) + collinear_thrd = getattr(__config__, 'dft_numint_RnumInt_collinear_thrd', 0.99) + collinear_samples = getattr(__config__, 'dft_numint_RnumInt_collinear_samples', 200) + + eval_ao = staticmethod(numint.eval_ao) + eval_rho = staticmethod(eval_rho) + + def build(self, mol, coords): + self.gdftopt = numint._GDFTOpt.from_mol(mol) + self.grid_blksize = None + self.non0ao_idx = {} + return self + + def eval_rho1(self, mol, ao, dm, screen_index=None, xctype='LDA', hermi=0, + with_lapl=True, cutoff=None, ao_cutoff=None, pair_mask=None, + verbose=None): + return self.eval_rho(mol, ao, dm, screen_index, xctype, hermi, + with_lapl, verbose=verbose) + + def eval_rho2(self, mol, ao, mo_coeff, mo_occ, non0tab=None, xctype='LDA', + with_lapl=True, verbose=None): + '''Calculate the electron density for LDA functional and the density + derivatives for GGA functional in the framework of 2-component basis. + ''' + if not isinstance(mo_occ, cp.ndarray): + mo_occ = cp.asarray(mo_occ) + if not isinstance(mo_coeff, cp.ndarray): + mo_coeff = cp.asarray(mo_coeff) + if self.collinear[0] in ('n', 'm'): + # TODO: + dm = cp.dot(mo_coeff * mo_occ, mo_coeff.conj().T) + hermi = 1 + rho = self.eval_rho(mol, ao, dm, non0tab, xctype, hermi, with_lapl, verbose) + return rho + + raise NotImplementedError(self.collinear) + + def cache_xc_kernel(self, mol, grids, xc_code, mo_coeff, mo_occ, spin=0, + max_memory=2000): + '''Compute the 0th order density, Vxc and fxc. They can be used in TDDFT, + DFT hessian module etc. + ''' + raise NotImplementedError("Kxc calculation is not supported.") + + def get_rho(self, mol, dm, grids, max_memory=2000): + '''Density in real space + ''' + nao = dm.shape[-1] // 2 + dm_a = dm[:nao,:nao].real + dm_b = dm[nao:,nao:].real + ni = self._to_numint1c() + return ni.get_rho(mol, dm_a+dm_b, grids, max_memory) + + _gks_mcol_vxc = _gks_mcol_vxc + _gks_mcol_fxc = _gks_mcol_fxc + + @lib.with_doc(numint.nr_rks.__doc__) + def nr_vxc(self, mol, grids, xc_code, dms, relativity=0, hermi=1, + max_memory=2000, verbose=None): + if not isinstance(dms, cp.ndarray): + dms = cp.asarray(dms) + if self.collinear[0] in ('m',): # mcol or ncol + opt = getattr(self, 'gdftopt', None) + if opt is None: + self.build(mol, grids.coords) + opt = self.gdftopt + assert dms.ndim == 2 + dms = cp.asarray(dms) + dms = opt.sort_orbitals_2c(dms, axis=[0,1]) + n, exc, vmat = self._gks_mcol_vxc(mol, grids, xc_code, dms, relativity, + hermi, max_memory, verbose) + vmat = opt.unsort_orbitals_2c(vmat, axis=[0,1]) + else: + raise NotImplementedError("Locally collinear and collinear is not implemented") + return n.sum(), exc, vmat + get_vxc = nr_gks_vxc = nr_vxc + + @lib.with_doc(numint.nr_nlc_vxc.__doc__) + def nr_nlc_vxc(self, mol, grids, xc_code, dm, spin=0, relativity=0, hermi=1, + max_memory=2000, verbose=None): + raise NotImplementedError('non-collinear nlc vxc') + + @lib.with_doc(numint.nr_rks_fxc.__doc__) + def nr_fxc(self, mol, grids, xc_code, dm0, dms, spin=0, relativity=0, hermi=0, + rho0=None, vxc=None, fxc=None, max_memory=2000, verbose=None): + raise NotImplementedError('non-collinear fxc') + get_fxc = nr_gks_fxc = nr_fxc + + def _init_xcfuns(self, xc_code, spin=0): + return numint._init_xcfuns(xc_code, spin) + eval_xc_eff = numint.eval_xc_eff + mcfun_eval_xc_adapter = mcfun_eval_xc_adapter + + block_loop = numint.NumInt.block_loop + reset = numint.NumInt.reset + + def _to_numint1c(self): + '''Converts to the associated class to handle collinear systems''' + return self.view(numint.NumInt) + + def to_cpu(self): + ni = numint2c.NumInt2C() + return ni \ No newline at end of file diff --git a/gpu4pyscf/dft/tests/test_gks.py b/gpu4pyscf/dft/tests/test_gks.py new file mode 100644 index 000000000..74a999f93 --- /dev/null +++ b/gpu4pyscf/dft/tests/test_gks.py @@ -0,0 +1,147 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from pyscf import gto +from pyscf import lib +from gpu4pyscf.dft import gks +from pyscf.dft import gks as gks_cpu +try: + import mcfun +except ImportError: + mcfun = None + + +def setUpModule(): + global mol, mol1 + mol = gto.Mole() + mol.atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''' + mol.spin = None + mol.basis = 'sto3g' + mol.output = '/dev/null' + mol.build() + + mol1 = gto.M( + atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''', + charge = 1, + spin = 1, + basis = 'sto3g', + output = '/dev/null' + ) + + +def tearDownModule(): + global mol, mol1 + mol.stdout.close() + mol1.stdout.close() + del mol, mol1 + + +class KnownValues(unittest.TestCase): + def test_mcol_gks_lda(self): + + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'lda,' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.0600297733097, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.421986983504258, 5) + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'lda,vwn' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + + def test_mcol_gks_gga(self): + + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'pbe' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -75.2256398121708, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -26.81184613393452, 5) + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'pbe' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.869954771937, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.92164954706164, 5) # pyscf result + + def test_mcol_gks_hyb(self): + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'b3lyp' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -75.312587317089, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.2469582128507, 5) # pyscf result + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'b3lyp' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.9528036305753, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -28.49145406025193, 5) # pyscf result + + def test_mcol_gks_mgga(self): + mf_gpu = gks.GKS(mol) + mf_gpu.xc = 'm06l' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 6 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -75.3053691716776, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.03099891671804, 5) # pyscf result + + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'm06l' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + self.assertAlmostEqual(eks4_gpu, -74.9468853267496, 6) # pyscf result + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -28.188215296679516, 5) # pyscf result + + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_to_cpu(self): + mf_gpu = gks.GKS(mol1) + mf_gpu.xc = 'lda,vwn' + mf_gpu.collinear = 'mcol' + mf_gpu._numint.spin_samples = 50 + eks4_gpu = mf_gpu.kernel() + + mf_cpu = mf_gpu.to_cpu() + eks4_cpu = mf_cpu.kernel() + + self.assertAlmostEqual(eks4_gpu, eks4_cpu, 6) + self.assertAlmostEqual(eks4_gpu, -74.3741809222222, 6) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), lib.fp(mf_cpu.mo_energy), 5) + self.assertAlmostEqual(lib.fp(mf_gpu.mo_energy.get()), -27.63368769213053, 5) + + +if __name__ == "__main__": + print("Test GKS") + unittest.main() diff --git a/gpu4pyscf/dft/tests/test_numint2c.py b/gpu4pyscf/dft/tests/test_numint2c.py new file mode 100644 index 000000000..4eb5ff678 --- /dev/null +++ b/gpu4pyscf/dft/tests/test_numint2c.py @@ -0,0 +1,392 @@ +# Copyright 2021-2024 The PySCF Developers. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import pyscf +import cupy +from pyscf import lib, scf +from pyscf.dft.numint2c import NumInt2C as pyscf_numint2c +from pyscf.dft.numint import NumInt as pyscf_numint +from gpu4pyscf.dft import Grids +from gpu4pyscf.dft import numint2c +from gpu4pyscf.dft import numint +from pyscf.dft import numint2c as pyscf_numint2c_file +from gpu4pyscf.dft.numint2c import NumInt2C +from gpu4pyscf.dft.numint import NumInt +from gpu4pyscf import dft +from gpu4pyscf.dft import gen_grid +try: + import mcfun +except ImportError: + mcfun = None + +def setUpModule(): + global mol, grids_cpu, grids_gpu, dm, dm0, dm1, mo_occ, mo_coeff + mol = pyscf.M( + atom = ''' +O 0.000000 0.000000 0.117790 +H 0.000000 0.755453 -0.471161 +H 0.000000 -0.755453 -0.471161''', + basis = 'ccpvdz', + charge = 1, + spin = 1, # = 2S = spin_up - spin_down + output = '/dev/null' + ) + + np.random.seed(2) + mf = scf.GHF(mol) + mf.kernel() + dm1 = mf.make_rdm1().copy() + dm = dm1 + mo_coeff = mf.mo_coeff + mo_occ = mf.mo_occ + dm0 = (mo_coeff*mo_occ).dot(mo_coeff.T) + + grids_gpu = Grids(mol) + grids_gpu.level = 1 + grids_gpu.build() + + grids_cpu = grids_gpu.to_cpu() + grids_cpu.weights = cupy.asnumpy(grids_gpu.weights) + grids_cpu.coords = cupy.asnumpy(grids_gpu.coords) + +def tearDownModule(): + global mol, grids_cpu, grids_gpu + mol.stdout.close() + del mol, grids_cpu, grids_gpu + +LDA = 'LDA_C_VWN' +GGA_PBE = 'GGA_C_PBE' +MGGA_M06 = 'MGGA_C_M06' + +class KnownValues(unittest.TestCase): + + def test_eval_rho(self): + np.random.seed(1) + dm = np.random.random(dm0.shape) + np.random.random(dm0.shape)*1.0j + dm = dm + dm.conj().T + ni_gpu = NumInt2C() + ni_cpu = pyscf_numint2c() + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + + rho = ni_gpu.eval_rho(mol, ao_gpu, dm, xctype=xctype, hermi=0, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm, xctype=xctype, hermi=0, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + rho = ni_gpu.eval_rho(mol, ao_gpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + ref = ni_cpu.eval_rho(mol, ao_cpu, dm0, xctype=xctype, hermi=1, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_eval_rho2(self): + np.random.seed(1) + mo_coeff_test = np.random.random(mo_coeff.shape) + np.random.random(mo_coeff.shape)*1.0j + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + ni_cpu = pyscf_numint2c() + ni_cpu.collinear='m' + for xctype in ('LDA', 'GGA', 'MGGA'): + deriv = 1 + if xctype == 'LDA': + deriv = 0 + ao_gpu = ni_gpu.eval_ao(mol, grids_gpu.coords, deriv=deriv, transpose=False) + ao_cpu = ni_cpu.eval_ao(mol, grids_cpu.coords, deriv=deriv) + + rho = ni_gpu.eval_rho2(mol, ao_gpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + ref = ni_cpu.eval_rho2(mol, ao_cpu, mo_coeff_test, mo_occ, xctype=xctype, with_lapl=False) + self.assertAlmostEqual(abs(rho[...,:grids_cpu.size].get() - ref).max(), 0, 10) + + def test_get_rho(self): + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + + np.random.seed(1) + ni_gpu_1c = NumInt() + dm_test = np.random.random(dm0.shape) + np.random.random(dm0.shape)*1.0j + dm_test = dm_test + dm_test.T.conj() + + n2c = dm_test.shape[0] + nao = n2c//2 + dm_1c_test = dm_test[:nao,:nao] + dm_test[nao:,nao:] + rho_gpu = ni_gpu.get_rho(mol, dm_test, grids_gpu) + rho_1c_gpu = ni_gpu_1c.get_rho(mol, dm_1c_test.real, grids_gpu) + self.assertAlmostEqual(abs(rho_gpu.get() - rho_1c_gpu.get()).max(), 0, 10) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_eval_xc_eff(self): + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + ni_cpu = pyscf_numint2c() + ni_cpu.collinear='m' + np.random.seed(1) + dm = dm0*1.0 + dm0 * 0.1j + dm = dm + dm.T.conj() + for xc_code in (LDA, GGA_PBE, MGGA_M06): + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, xc_code, dm) + n_cpu, exc_cpu, vmat_cpu = ni_cpu.nr_vxc(mol, grids_cpu, xc_code, dm) + self.assertAlmostEqual(abs(n_gpu.get() - n_cpu).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - exc_cpu).max(), 0, 10) + self.assertAlmostEqual(abs(vmat_gpu.get() - vmat_cpu).max(), 0, 10) + + def test_eval_xc_eff_fp(self): + ni_gpu = NumInt2C() + ni_gpu.collinear='m' + np.random.seed(1) + dm = dm0*1.0 + dm0 * 0.1j + dm = dm + dm.T.conj() + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, LDA, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -1.310501342423071).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.20448306536588537-6.75460139752253e-21j)).max(), 0, 10) + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, GGA_PBE, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -0.7237150857425112).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.05446425800187435-4.486282070082083e-21j)).max(), 0, 10) + + n_gpu, exc_gpu, vmat_gpu = ni_gpu.nr_vxc(mol, grids_gpu, MGGA_M06, dm) + self.assertAlmostEqual(abs(n_gpu.get() - 17.9999262659497).max(), 0, 10) + self.assertAlmostEqual(abs(exc_gpu.get() - -0.7703982586705045).max(), 0, 10) + self.assertAlmostEqual(abs(lib.fp(vmat_gpu.get()) + - (-0.18688247306409317+7.50400133342109e-20j)).max(), 0, 10) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_mcol_lda_vxc_mat(self): + xc_code = 'lda,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_lda_vxc_mat(mol, ao.transpose(1,0).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + def test_mcol_lda_vxc_mat_fp(self): + xc_code = 'lda,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .0001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=0, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='LDA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='LDA')[1] + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_lda_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - + (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - + (-9.596802282363786+0.000531850990220307j)).max(), 0, 13) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_mcol_gga_vxc_mat(self): + xc_code = 'pbe,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_gga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + def test_mcol_gga_vxc_mat_fp(self): + xc_code = 'pbe,' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + np.random.rand(n2c, n2c) * .001j + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='GGA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='GGA')[1] + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_gga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) - + (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) - + (-9.624252836509262+0.005273283360578891j)).max(), 0, 13) + + @unittest.skipIf(mcfun is None, "mcfun library not found.") + def test_mcol_mgga_vxc_mat(self): + xc_code = 'tpss' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) + ni_CPU = pyscf_numint2c() + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + ni_CPU.collinear = 'mcol' + eval_xc_cpu = ni_CPU.mcfun_eval_xc_adapter(xc_code) + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_cpu = eval_xc_cpu(xc_code, rho.get(), deriv=1)[1] + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] + mask = np.ones((8, mol.nbas), dtype=np.uint8) + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_cpu = pyscf_numint2c_file._mcol_mgga_vxc_mat(mol, ao.transpose(0,2,1).get(), weight, + rho.get(), vxc_cpu.copy(), mask, shls_slice, ao_loc, 0) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(v0_gpu.get() - v1_gpu.get()).max(), 0, 13) + self.assertAlmostEqual(abs(v0_gpu.get() - v0_cpu).max(), 0, 13) + + def test_mcol_mgga_vxc_mat_fp(self): + xc_code = 'tpss' + + nao = mol.nao + n2c = nao * 2 + ao_loc = mol.ao_loc + np.random.seed(12) + dm = np.random.rand(n2c, n2c) * .001 + dm += np.eye(n2c) + dm = dm + dm.T + ngrids = 8 + coords = np.random.rand(ngrids,3) + weight = np.random.rand(ngrids) + + ao = numint.eval_ao(mol, coords, deriv=1, transpose=False) + rho = numint2c.eval_rho(mol, ao, dm, xctype='MGGA', hermi=1, with_lapl=False) + ni_GPU = NumInt2C() + ni_GPU.collinear = 'mcol' + eval_xc_GPU = ni_GPU.mcfun_eval_xc_adapter(xc_code) + vxc_gpu = eval_xc_GPU(xc_code, rho, deriv=1, xctype='MGGA')[1] + mask_gpu = cupy.array([i for i in range(mol.nbas)]) + + shls_slice = (0, mol.nbas) + v0_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 0) + v1_gpu = numint2c._mcol_mgga_vxc_mat(mol, ao, cupy.asarray(weight), rho, vxc_gpu.copy(), + mask_gpu, shls_slice, ao_loc, 1) + v1_gpu = v1_gpu + v1_gpu.conj().T + self.assertAlmostEqual(abs(lib.fp(v1_gpu.get()) + - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) + self.assertAlmostEqual(abs(lib.fp(v0_gpu.get()) + - (-11.359687631112195+6.13828986953566e-22j)).max(), 0, 13) + + +if __name__ == "__main__": + print("Full Tests for dft numint2c") + unittest.main() + diff --git a/gpu4pyscf/scf/ghf.py b/gpu4pyscf/scf/ghf.py index cd8336d7f..a2fcc5613 100644 --- a/gpu4pyscf/scf/ghf.py +++ b/gpu4pyscf/scf/ghf.py @@ -22,6 +22,15 @@ from gpu4pyscf.lib.cupy_helper import asarray, return_cupy_array from gpu4pyscf.lib import utils +def _from_rhf_init_dm(dma, breaksym=True): + dma = dma * .5 + dm = block_diag(dma, dma) + if breaksym: + nao = dma.shape[0] + idx, idy = cp.diag_indices(nao) + dm[idx+nao,idy] = dm[idx,idy+nao] = dma.diagonal() * .05 + return dm + class GHF(hf.SCF): to_gpu = utils.to_gpu device = utils.device @@ -55,7 +64,7 @@ class GHF(hf.SCF): def get_init_guess(self, mol=None, key='minao', **kwargs): dma = hf.RHF.get_init_guess(self, mol, key, **kwargs) - return block_diag(dma, dma) + return _from_rhf_init_dm(dma) def get_hcore(self, mol=None): if mol is None: mol = self.mol @@ -85,6 +94,7 @@ def get_jk(self, mol=None, dm=None, hermi=0, with_j=True, with_k=True, return vj, vk def get_j(self, mol=None, dm=None, hermi=1, omega=None): + assert hermi == 1, 'hermi must be 1' dm = asarray(dm) dm_shape = dm.shape nso = dm.shape[-1] @@ -92,8 +102,8 @@ def get_j(self, mol=None, dm=None, hermi=1, omega=None): dm = dm.reshape(-1,nso,nso) n_dm = dm.shape[0] dm = dm[:,:nao,:nao] + dm[:,nao:,nao:] - jtmp = hf.SCF.get_j(self, mol, dm, hermi, omega) - vj = cp.zeros((n_dm,nso,nso)) + jtmp = hf.SCF.get_j(self, mol, dm.real, hermi, omega) + vj = cp.zeros((n_dm,nso,nso), dtype=dm.dtype) vj[:,:nao,:nao] = vj[:,nao:,nao:] = jtmp return vj.reshape(dm_shape) @@ -108,14 +118,27 @@ def get_k(self, mol=None, dm=None, hermi=1, omega=None): dmbb = dm[:,nao:,nao:] dmab = dm[:,:nao,nao:] dmba = dm[:,nao:,:nao] - dm = cp.vstack((dmaa, dmbb, dmab, dmba)) - ktmp = super().get_k(mol, dm, hermi=0, omega=omega) - ktmp = ktmp.reshape(4,n_dm,nao,nao) - vk = cp.zeros((n_dm,nso,nso), dm.dtype) - vk[:,:nao,:nao] = ktmp[0] - vk[:,nao:,nao:] = ktmp[1] - vk[:,:nao,nao:] = ktmp[2] - vk[:,nao:,:nao] = ktmp[3] + if dm.dtype == cp.complex128: + dm_real = cp.vstack((dmaa.real, dmbb.real, dmab.real, dmba.real)) + ktmp_real = super().get_k(mol, dm_real, hermi=0, omega=omega) + ktmp_real = ktmp_real.reshape(4,n_dm,nao,nao) + dm_imag = cp.vstack((dmaa.imag, dmbb.imag, dmab.imag, dmba.imag)) + ktmp_imag = super().get_k(mol, dm_imag, hermi=0, omega=omega) + ktmp_imag = ktmp_imag.reshape(4,n_dm,nao,nao) + vk = cp.zeros((n_dm,nso,nso), dm.dtype) + vk[:,:nao,:nao] = ktmp_real[0] + 1j*ktmp_imag[0] + vk[:,nao:,nao:] = ktmp_real[1] + 1j*ktmp_imag[1] + vk[:,:nao,nao:] = ktmp_real[2] + 1j*ktmp_imag[2] + vk[:,nao:,:nao] = ktmp_real[3] + 1j*ktmp_imag[3] + else: + dm = cp.vstack((dmaa, dmbb, dmab, dmba)) + ktmp = super().get_k(mol, dm, hermi=0, omega=omega) + ktmp = ktmp.reshape(4,n_dm,nao,nao) + vk = cp.zeros((n_dm,nso,nso), dm.dtype) + vk[:,:nao,:nao] = ktmp[0] + vk[:,nao:,nao:] = ktmp[1] + vk[:,:nao,nao:] = ktmp[2] + vk[:,nao:,:nao] = ktmp[3] return vk.reshape(dm_shape) def get_veff(mf, mol=None, dm=None, dm_last=None, vhf_last=None, hermi=1): diff --git a/gpu4pyscf/scf/hf.py b/gpu4pyscf/scf/hf.py index f384f78dd..fbce65008 100644 --- a/gpu4pyscf/scf/hf.py +++ b/gpu4pyscf/scf/hf.py @@ -145,6 +145,8 @@ def get_fock(mf, h1e=None, s1e=None, vhf=None, dm=None, cycle=-1, diis=None, if dm is None: dm = mf.make_rdm1() s1e = cupy.asarray(s1e) dm = cupy.asarray(dm) + if dm.dtype == cupy.complex128: + s1e = s1e.astype(cupy.complex128) if diis_start_cycle is None: diis_start_cycle = mf.diis_start_cycle if level_shift_factor is None: @@ -661,6 +663,8 @@ def eig(self, fock, s): x = None if hasattr(self, 'overlap_canonical_decomposed_x') and self.overlap_canonical_decomposed_x is not None: x = cupy.asarray(self.overlap_canonical_decomposed_x) + if fock.dtype == cupy.complex128: + s = s.astype(cupy.complex128) if x is None: mo_energy, mo_coeff = eigh(fock, s) return mo_energy, mo_coeff diff --git a/gpu4pyscf/scf/tests/test_ghf.py b/gpu4pyscf/scf/tests/test_ghf.py index 3bea9800a..d82b77e2e 100644 --- a/gpu4pyscf/scf/tests/test_ghf.py +++ b/gpu4pyscf/scf/tests/test_ghf.py @@ -14,21 +14,108 @@ import unittest import pyscf +import numpy +import cupy as cp + + +def setUpModule(): + global mol, mol1 + mol = pyscf.gto.Mole() + mol.atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''' + mol.spin = None + mol.basis = 'cc-pvdz' + mol.verbose = 7 + mol.output = '/dev/null' + mol.build() + + mol1 = pyscf.gto.M( + verbose = 0, + atom = ''' + O 0 0 0 + H 0. -0.757 0.587 + H 0. 0.757 0.587''', + charge = 1, + spin = 1, + basis = 'cc-pvdz', + output = '/dev/null') + + +def tearDownModule(): + global mol, mol1 + mol.stdout.close() + mol1.stdout.close() + del mol, mol1 class KnownValues(unittest.TestCase): def test_ghf_scf(self): - mol = pyscf.M(atom=''' -O 0 0 0 -H 0 -0.757 0.587 -H 0 0.757 0.587''', basis = 'cc-pvdz') mf = mol.GHF().to_gpu() assert mf.device == 'gpu' e_tot = mf.kernel() e_ref = mf.to_cpu().kernel() assert abs(e_tot - e_ref) < 1e-5 - #def test_ghf_x2c(self): - # pass + def test_ghf_scf_complex_dm(self): + mf = mol.GHF().to_gpu() + assert mf.device == 'gpu' + e_tot = mf.kernel() + dm1 = mf.make_rdm1() + numpy.random.seed(1) + n2c = mol.nao_nr() * 2 + dm_perturb = numpy.random.random((n2c,n2c)) + 1j*numpy.random.random((n2c,n2c)) + dm_perturb = dm_perturb + dm_perturb.T.conj() + dm_perturb = cp.asarray(dm_perturb) + dm_1 = dm1 + dm_perturb*0.001 + e_ref = mf.to_cpu().kernel(dm_1.get()) + e_tot = mf.kernel(dm_1) + assert abs(e_tot - e_ref) < 1e-5 + + def test_get_jk_complex(self): + mf = mol.GHF().to_gpu() + nao = mol.nao_nr()*2 + numpy.random.seed(1) + d1 = numpy.random.random((nao,nao)) + 1j*numpy.random.random((nao,nao)) + d = d1 + d1.T.conj() + d_real = d.real + vj_gpu = mf.get_j(mol, d_real) + vk_gpu = mf.get_k(mol, d) + + mf_cpu = mf.to_cpu() + vj_cpu = mf_cpu.get_j(mol, d_real) + vk_cpu = mf_cpu.get_k(mol, d) + + assert numpy.allclose(vj_gpu, vj_cpu) + assert numpy.allclose(vk_gpu, vk_cpu) + + def test_get_jk_real(self): + mf = mol.GHF().to_gpu() + + nao = mol.nao_nr()*2 + numpy.random.seed(1) + d1 = numpy.random.random((nao,nao)) + d = d1 + d1.T.conj() + d_real = d.real + vj_gpu = mf.get_j(mol, d_real) + vk_gpu = mf.get_k(mol, d) + + mf_cpu = mf.to_cpu() + vj_cpu = mf_cpu.get_j(mol, d_real) + vk_cpu = mf_cpu.get_k(mol, d) + + assert numpy.allclose(vj_gpu, vj_cpu) + assert numpy.allclose(vk_gpu, vk_cpu) + + def test_to_cpu(self): + mf = mol.GHF().to_gpu() + assert mf.device == 'gpu' + e_tot = mf.kernel() + mf = mf.to_cpu() + assert getattr(mf, 'device', None) is None + e_ref = mf.kernel() + assert abs(e_tot - e_ref) < 1e-5 + if __name__ == "__main__": print("Full Tests for ghf")