Skip to content

Commit 65dcf10

Browse files
wxj6000sunqm
authored andcommitted
copy instead of casting
1 parent cbb33a4 commit 65dcf10

File tree

1 file changed

+61
-14
lines changed

1 file changed

+61
-14
lines changed

gpu4pyscf/lib/gdft/libxc.cu

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,47 @@ static void _add_out(double *out, const double *buf, double coef, int np, int di
6868

6969
extern "C" {
7070

71+
__host__
72+
void copy_gga2lda(xc_gga_out_params *gga, xc_lda_out_params *lda){
73+
lda->zk = gga->zk;
74+
lda->vrho = gga->vrho;
75+
lda->v2rho2 = gga->v2rho2;
76+
lda->v3rho3 = gga->v3rho3;
77+
lda->v4rho4 = gga->v4rho4;
78+
}
79+
80+
__host__
81+
void copy_mgga2lda(xc_mgga_out_params *mgga, xc_lda_out_params *lda){
82+
lda->zk = mgga->zk;
83+
lda->vrho = mgga->vrho;
84+
lda->v2rho2 = mgga->v2rho2;
85+
lda->v3rho3 = mgga->v3rho3;
86+
lda->v4rho4 = mgga->v4rho4;
87+
}
88+
89+
__host__
90+
void copy_mgga2gga(xc_mgga_out_params *mgga, xc_gga_out_params *gga){
91+
gga->zk = mgga->zk;
92+
93+
gga->vrho = mgga->vrho;
94+
gga->vsigma = mgga->vsigma;
95+
96+
gga->v2rho2 = mgga->v2rho2;
97+
gga->v2rhosigma = mgga->v2rhosigma;
98+
gga->v2sigma2 = mgga->v2sigma2;
99+
100+
gga->v3rho3 = mgga->v3rho3;
101+
gga->v3rho2sigma = mgga->v3rho2sigma;
102+
gga->v3rhosigma2 = mgga->v3rhosigma2;
103+
gga->v3sigma3 = mgga->v3sigma3;
104+
105+
gga->v4rho4 = mgga->v4rho4;
106+
gga->v4rho3sigma = mgga->v4rho3sigma;
107+
gga->v4rho2sigma2 = mgga->v4rho2sigma2;
108+
gga->v4rhosigma3 = mgga->v4rhosigma3;
109+
gga->v4sigma4 = mgga->v4sigma4;
110+
}
111+
71112
__host__
72113
void _memset_lda(xc_lda_out_params *out, int order, int np, const xc_dimensions *dim){
73114
if(order >= 0) cudaMemset(out->zk, 0, sizeof(double)*np*dim->zk);
@@ -225,20 +266,20 @@ int _xc_lda(const xc_func_type *func, int np, int order, const double *rho,
225266
__host__
226267
int _xc_gga(const xc_func_type *func, int np, int order, const double *rho, const double *sigma,
227268
xc_gga_out_params *out){
228-
269+
229270
if(func->info->gga == NULL){
230271
fprintf(stderr, "Nested xc functional is not supported\n");
231272
return 1;
232273
}
233-
274+
234275
//xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions));
235276
//memcpy(dim, &(func->dim), sizeof(xc_dimensions));
236277
//DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1);
237278
if(order < 0) return 0;
238279
const xc_dimensions *dim = &(func->dim);
239280
_memset_gga(out, order, np, dim);
240281
//FREE(dim);
241-
282+
242283
cudaError_t err = cudaGetLastError();
243284
if (err != cudaSuccess) {
244285
fprintf(stderr, "CUDA Error of memset_gga: %s\n", cudaGetErrorString(err));
@@ -271,7 +312,7 @@ int _xc_mgga(const xc_func_type *func, int np, int order, const double *rho, con
271312
fprintf(stderr, "Nested xc functional is not supported\n");
272313
return 1;
273314
}
274-
315+
275316
//xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions));
276317
//memcpy(dim, &(func->dim), sizeof(xc_dimensions));
277318
//DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1);
@@ -310,7 +351,7 @@ int GDFT_xc_lda(cudaStream_t stream,
310351
xc_lda_out_params *out, xc_lda_out_params *buf)
311352
{
312353
int ierr = 0;
313-
354+
314355
int order = -1;
315356
if(out->zk != NULL) order = 0;
316357
if(out->vrho != NULL) order = 1;
@@ -338,11 +379,11 @@ int GDFT_xc_lda(cudaStream_t stream,
338379

339380
dim3 threads(THREADS);
340381
dim3 blocks((np+THREADS-1)/THREADS);
341-
382+
342383
for (int ii=0; ii< n_func_aux; ii++){
343384
xc_func_type *aux = func->func_aux[ii];
344385
double coef = func->mix_coef[ii];
345-
386+
346387
/* Evaluate the functional */
347388
switch(aux->info->family){
348389
case XC_FAMILY_LDA:{
@@ -396,13 +437,15 @@ int GDFT_xc_gga(cudaStream_t stream,
396437
/* Evaluate the functional */
397438
switch(aux->info->family){
398439
case XC_FAMILY_LDA:{
399-
xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf);
440+
xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc(sizeof(xc_lda_out_params));
441+
copy_gga2lda(buf, out_lda);
400442
ierr = _xc_lda(aux, np, order, rho, out_lda);
401443
ADD_LDA;
444+
free(out_lda);
402445
break;
403446
}
404447
case XC_FAMILY_GGA:{
405-
xc_gga_out_params *out_gga = (xc_gga_out_params *)(buf);
448+
xc_gga_out_params *out_gga = buf;
406449
ierr = _xc_gga(aux, np, order, rho, sigma, out_gga);
407450
ADD_GGA;
408451
break;
@@ -447,27 +490,31 @@ int GDFT_xc_mgga(cudaStream_t stream,
447490

448491
dim3 threads(THREADS);
449492
dim3 blocks((np+THREADS-1)/THREADS);
450-
493+
451494
for (int ii=0; ii< n_func_aux; ii++){
452495
xc_func_type *aux = func->func_aux[ii];
453496
double coef = func->mix_coef[ii];
454-
497+
455498
/* Evaluate the functional */
456499
switch(aux->info->family){
457500
case XC_FAMILY_LDA:{
458-
xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf);
501+
xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc(sizeof(xc_lda_out_params));
502+
copy_mgga2lda(buf, out_lda);
459503
ierr = _xc_lda(aux, np, order, rho, out_lda);
460504
ADD_LDA;
505+
free(out_lda);
461506
break;
462507
}
463508
case XC_FAMILY_GGA:{
464-
xc_gga_out_params *out_gga = (xc_gga_out_params *)(buf);
509+
xc_gga_out_params *out_gga = (xc_gga_out_params *) malloc(sizeof(xc_gga_out_params));
510+
copy_mgga2gga(buf, out_gga);
465511
ierr = _xc_gga(aux, np, order, rho, sigma, out_gga);
466512
ADD_GGA;
513+
free(out_gga);
467514
break;
468515
}
469516
case XC_FAMILY_MGGA:{
470-
xc_mgga_out_params *out_mgga = (xc_mgga_out_params *)(buf);
517+
xc_mgga_out_params *out_mgga = buf;
471518
ierr = _xc_mgga(aux, np, order, rho, sigma, lapl, tau, out_mgga);
472519
ADD_MGGA;
473520
break;

0 commit comments

Comments
 (0)