@@ -68,6 +68,47 @@ static void _add_out(double *out, const double *buf, double coef, int np, int di
6868
6969extern " C" {
7070
71+ __host__
72+ void copy_gga2lda (xc_gga_out_params *gga, xc_lda_out_params *lda){
73+ lda->zk = gga->zk ;
74+ lda->vrho = gga->vrho ;
75+ lda->v2rho2 = gga->v2rho2 ;
76+ lda->v3rho3 = gga->v3rho3 ;
77+ lda->v4rho4 = gga->v4rho4 ;
78+ }
79+
80+ __host__
81+ void copy_mgga2lda (xc_mgga_out_params *mgga, xc_lda_out_params *lda){
82+ lda->zk = mgga->zk ;
83+ lda->vrho = mgga->vrho ;
84+ lda->v2rho2 = mgga->v2rho2 ;
85+ lda->v3rho3 = mgga->v3rho3 ;
86+ lda->v4rho4 = mgga->v4rho4 ;
87+ }
88+
89+ __host__
90+ void copy_mgga2gga (xc_mgga_out_params *mgga, xc_gga_out_params *gga){
91+ gga->zk = mgga->zk ;
92+
93+ gga->vrho = mgga->vrho ;
94+ gga->vsigma = mgga->vsigma ;
95+
96+ gga->v2rho2 = mgga->v2rho2 ;
97+ gga->v2rhosigma = mgga->v2rhosigma ;
98+ gga->v2sigma2 = mgga->v2sigma2 ;
99+
100+ gga->v3rho3 = mgga->v3rho3 ;
101+ gga->v3rho2sigma = mgga->v3rho2sigma ;
102+ gga->v3rhosigma2 = mgga->v3rhosigma2 ;
103+ gga->v3sigma3 = mgga->v3sigma3 ;
104+
105+ gga->v4rho4 = mgga->v4rho4 ;
106+ gga->v4rho3sigma = mgga->v4rho3sigma ;
107+ gga->v4rho2sigma2 = mgga->v4rho2sigma2 ;
108+ gga->v4rhosigma3 = mgga->v4rhosigma3 ;
109+ gga->v4sigma4 = mgga->v4sigma4 ;
110+ }
111+
71112__host__
72113void _memset_lda (xc_lda_out_params *out, int order, int np, const xc_dimensions *dim){
73114 if (order >= 0 ) cudaMemset (out->zk , 0 , sizeof (double )*np*dim->zk );
@@ -225,20 +266,20 @@ int _xc_lda(const xc_func_type *func, int np, int order, const double *rho,
225266__host__
226267int _xc_gga (const xc_func_type *func, int np, int order, const double *rho, const double *sigma,
227268 xc_gga_out_params *out){
228-
269+
229270 if (func->info ->gga == NULL ){
230271 fprintf (stderr, " Nested xc functional is not supported\n " );
231272 return 1 ;
232273 }
233-
274+
234275 // xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions));
235276 // memcpy(dim, &(func->dim), sizeof(xc_dimensions));
236277 // DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1);
237278 if (order < 0 ) return 0 ;
238279 const xc_dimensions *dim = &(func->dim );
239280 _memset_gga (out, order, np, dim);
240281 // FREE(dim);
241-
282+
242283 cudaError_t err = cudaGetLastError ();
243284 if (err != cudaSuccess) {
244285 fprintf (stderr, " CUDA Error of memset_gga: %s\n " , cudaGetErrorString (err));
@@ -271,7 +312,7 @@ int _xc_mgga(const xc_func_type *func, int np, int order, const double *rho, con
271312 fprintf (stderr, " Nested xc functional is not supported\n " );
272313 return 1 ;
273314 }
274-
315+
275316 // xc_dimensions* dim = (xc_dimensions *) malloc(sizeof(xc_dimensions));
276317 // memcpy(dim, &(func->dim), sizeof(xc_dimensions));
277318 // DEVICE_INIT(xc_dimensions, dim, &(func->dim), 1);
@@ -310,7 +351,7 @@ int GDFT_xc_lda(cudaStream_t stream,
310351 xc_lda_out_params *out, xc_lda_out_params *buf)
311352{
312353 int ierr = 0 ;
313-
354+
314355 int order = -1 ;
315356 if (out->zk != NULL ) order = 0 ;
316357 if (out->vrho != NULL ) order = 1 ;
@@ -338,11 +379,11 @@ int GDFT_xc_lda(cudaStream_t stream,
338379
339380 dim3 threads (THREADS);
340381 dim3 blocks ((np+THREADS-1 )/THREADS);
341-
382+
342383 for (int ii=0 ; ii< n_func_aux; ii++){
343384 xc_func_type *aux = func->func_aux [ii];
344385 double coef = func->mix_coef [ii];
345-
386+
346387 /* Evaluate the functional */
347388 switch (aux->info ->family ){
348389 case XC_FAMILY_LDA:{
@@ -396,13 +437,15 @@ int GDFT_xc_gga(cudaStream_t stream,
396437 /* Evaluate the functional */
397438 switch (aux->info ->family ){
398439 case XC_FAMILY_LDA:{
399- xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf);
440+ xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc (sizeof (xc_lda_out_params));
441+ copy_gga2lda (buf, out_lda);
400442 ierr = _xc_lda (aux, np, order, rho, out_lda);
401443 ADD_LDA;
444+ free (out_lda);
402445 break ;
403446 }
404447 case XC_FAMILY_GGA:{
405- xc_gga_out_params *out_gga = (xc_gga_out_params *)( buf) ;
448+ xc_gga_out_params *out_gga = buf;
406449 ierr = _xc_gga (aux, np, order, rho, sigma, out_gga);
407450 ADD_GGA;
408451 break ;
@@ -447,27 +490,31 @@ int GDFT_xc_mgga(cudaStream_t stream,
447490
448491 dim3 threads (THREADS);
449492 dim3 blocks ((np+THREADS-1 )/THREADS);
450-
493+
451494 for (int ii=0 ; ii< n_func_aux; ii++){
452495 xc_func_type *aux = func->func_aux [ii];
453496 double coef = func->mix_coef [ii];
454-
497+
455498 /* Evaluate the functional */
456499 switch (aux->info ->family ){
457500 case XC_FAMILY_LDA:{
458- xc_lda_out_params *out_lda = (xc_lda_out_params *)(buf);
501+ xc_lda_out_params *out_lda = (xc_lda_out_params *)malloc (sizeof (xc_lda_out_params));
502+ copy_mgga2lda (buf, out_lda);
459503 ierr = _xc_lda (aux, np, order, rho, out_lda);
460504 ADD_LDA;
505+ free (out_lda);
461506 break ;
462507 }
463508 case XC_FAMILY_GGA:{
464- xc_gga_out_params *out_gga = (xc_gga_out_params *)(buf);
509+ xc_gga_out_params *out_gga = (xc_gga_out_params *) malloc (sizeof (xc_gga_out_params));
510+ copy_mgga2gga (buf, out_gga);
465511 ierr = _xc_gga (aux, np, order, rho, sigma, out_gga);
466512 ADD_GGA;
513+ free (out_gga);
467514 break ;
468515 }
469516 case XC_FAMILY_MGGA:{
470- xc_mgga_out_params *out_mgga = (xc_mgga_out_params *)( buf) ;
517+ xc_mgga_out_params *out_mgga = buf;
471518 ierr = _xc_mgga (aux, np, order, rho, sigma, lapl, tau, out_mgga);
472519 ADD_MGGA;
473520 break ;
0 commit comments