diff --git a/includeCXX/sfx_flux.h b/includeCXX/sfx_flux.h index 8dcb5f09a6e8d239c409701a181bd09e69ec89fe..418d54bdc82b21cc9be45600968b891d174052d1 100644 --- a/includeCXX/sfx_flux.h +++ b/includeCXX/sfx_flux.h @@ -2,7 +2,7 @@ #include "sfx_template_parameters.h" #include <cstddef> -template<typename T, MemType RunMem, MemType memIn> +template<typename T, MemType memIn, MemType memOut, MemType RunMem > class Flux { private: diff --git a/srcCXX/sfx_call_class_func.cpp b/srcCXX/sfx_call_class_func.cpp index 2243e0d8063a473c9044311ccd5ba19c30758516..9d852e8ca25ab45589b2a9043278cd141d71d706 100644 --- a/srcCXX/sfx_call_class_func.cpp +++ b/srcCXX/sfx_call_class_func.cpp @@ -4,12 +4,6 @@ #include "../includeCXX/sfx_flux.h" #include <vector> -#ifdef INCLUDE_CUDA - Flux<float, MemType::GPU, MemType::CPU> F; -#else - Flux<float, MemType::CPU, MemType::CPU> F; -#endif - // -------------------------------------------------------------------------- // void surf_flux_CXX (float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, float *U_, float *dT_, float *Tsemi_, float *dQ_, float *h_, float *in_z0_m_, @@ -23,6 +17,12 @@ void surf_flux_CXX (float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_ const int maxiters_charnock, const int maxiters_convection, const int grid_size) { +#ifdef INCLUDE_CUDA + static Flux<float, MemType::CPU, MemType::CPU, MemType::GPU> F; +#else + static Flux<float, MemType::CPU, MemType::CPU, MemType::CPU> F; +#endif + F.set_params(grid_size, kappa, Pr_t_0_inv, Pr_t_inf_inv, alpha_m, alpha_h, alpha_h_fix, beta_m, beta_h, Rib_max, Re_rough_min, diff --git a/srcCXX/sfx_flux.cpp b/srcCXX/sfx_flux.cpp index 8f70e1503a7d2e921ccc8f990121120fde053081..8648c4fb78645edd4f792e2e0637fa45d810f229 100644 --- a/srcCXX/sfx_flux.cpp +++ b/srcCXX/sfx_flux.cpp @@ -9,8 +9,8 @@ #include "../includeCXX/sfx_memory_processing.h" -template<typename T, MemType RunMem, MemType memIn> -Flux<T, RunMem, memIn>::Flux() +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +Flux<T, memIn, memOut, RunMem>::Flux() { kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, @@ -28,8 +28,8 @@ Flux<T, RunMem, memIn>::Flux() allocated_size = 0; } -template<typename T, MemType RunMem, MemType memIn> -void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +void Flux<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, const T alpha_m_, const T alpha_h_, const T alpha_h_fix_, const T beta_m_, const T beta_h_, const T Rib_max_, const T Re_rough_min_, const T B1_rough_, const T B2_rough_, @@ -65,7 +65,10 @@ void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, co memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size); allocated_size = 0; memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size); - + } + if(RunMem != memOut) + { + const size_t new_size = grid_size * sizeof(T); allocated_size = 0; memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size); allocated_size = 0; @@ -93,8 +96,8 @@ void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, co } } -template<typename T, MemType RunMem, MemType memIn> -Flux<T, RunMem, memIn>::~Flux() +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +Flux<T, memIn, memOut, RunMem>::~Flux() { kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, @@ -110,31 +113,36 @@ Flux<T, RunMem, memIn>::~Flux() if(ifAllocated == true) { - memproc::dealloc<RunMem>((void*&)U); - memproc::dealloc<RunMem>((void*&)dT); - memproc::dealloc<RunMem>((void*&)Tsemi); - memproc::dealloc<RunMem>((void*&)dQ); - memproc::dealloc<RunMem>((void*&)h); - - memproc::dealloc<RunMem>((void*&)zeta); - memproc::dealloc<RunMem>((void*&)Rib); - memproc::dealloc<RunMem>((void*&)Re); - memproc::dealloc<RunMem>((void*&)Rib_conv_lim); - memproc::dealloc<RunMem>((void*&)z0_m); - memproc::dealloc<RunMem>((void*&)z0_t); - memproc::dealloc<RunMem>((void*&)B); - memproc::dealloc<RunMem>((void*&)Cm); - memproc::dealloc<RunMem>((void*&)Ct); - memproc::dealloc<RunMem>((void*&)Km); - memproc::dealloc<RunMem>((void*&)Pr_t_inv); + if(RunMem != memIn) + { + memproc::dealloc<RunMem>((void*&)U); + memproc::dealloc<RunMem>((void*&)dT); + memproc::dealloc<RunMem>((void*&)Tsemi); + memproc::dealloc<RunMem>((void*&)dQ); + memproc::dealloc<RunMem>((void*&)h); + } + if(RunMem != memOut) + { + memproc::dealloc<RunMem>((void*&)zeta); + memproc::dealloc<RunMem>((void*&)Rib); + memproc::dealloc<RunMem>((void*&)Re); + memproc::dealloc<RunMem>((void*&)Rib_conv_lim); + memproc::dealloc<RunMem>((void*&)z0_m); + memproc::dealloc<RunMem>((void*&)z0_t); + memproc::dealloc<RunMem>((void*&)B); + memproc::dealloc<RunMem>((void*&)Cm); + memproc::dealloc<RunMem>((void*&)Ct); + memproc::dealloc<RunMem>((void*&)Km); + memproc::dealloc<RunMem>((void*&)Pr_t_inv); + } ifAllocated = false; allocated_size = 0; } } -template<typename T, MemType RunMem, MemType memIn> -void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +void Flux<T, memIn, memOut, RunMem>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_) { if(RunMem == memIn) @@ -145,18 +153,6 @@ void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_ dQ = dQ_; h = h_; in_z0_m = in_z0_m_; - - zeta = zeta_; - Rib = Rib_; - Re = Re_; - Rib_conv_lim = Rib_conv_lim_; - z0_m = z0_m_; - z0_t = z0_t_; - B = B_; - Cm = Cm_; - Ct = Ct_; - Km = Km_; - Pr_t_inv = Pr_t_inv_; } else { @@ -169,10 +165,25 @@ void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_ memproc::memcopy<RunMem, memIn>(h, h_, new_size); memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size); } + + if(RunMem == memOut) + { + zeta = zeta_; + Rib = Rib_; + Re = Re_; + Rib_conv_lim = Rib_conv_lim_; + z0_m = z0_m_; + z0_t = z0_t_; + B = B_; + Cm = Cm_; + Ct = Ct_; + Km = Km_; + Pr_t_inv = Pr_t_inv_; + } } -template<typename T, MemType RunMem, MemType memIn> -void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +void Flux<T, memIn, memOut, RunMem>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, const int maxiters_charnock, const int maxiters_convection) { @@ -205,33 +216,39 @@ void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z grid_size); #endif - if(RunMem != memIn) + if(RunMem != memOut) { const size_t new_size = grid_size * sizeof(T); - memproc::memcopy<memIn, RunMem>(zeta_, zeta, new_size); - memproc::memcopy<memIn, RunMem>(Rib_, Rib, new_size); - memproc::memcopy<memIn, RunMem>(Re_, Re, new_size); - memproc::memcopy<memIn, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size); - memproc::memcopy<memIn, RunMem>(z0_m_, z0_m, new_size); - memproc::memcopy<memIn, RunMem>(z0_t_, z0_t, new_size); - memproc::memcopy<memIn, RunMem>(B_, B, new_size); - memproc::memcopy<memIn, RunMem>(Cm_, Cm, new_size); - memproc::memcopy<memIn, RunMem>(Ct_, Ct, new_size); - memproc::memcopy<memIn, RunMem>(Km_, Km, new_size); - memproc::memcopy<memIn, RunMem>(Pr_t_inv_, Pr_t_inv, new_size); + memproc::memcopy<memOut, RunMem>(zeta_, zeta, new_size); + memproc::memcopy<memOut, RunMem>(Rib_, Rib, new_size); + memproc::memcopy<memOut, RunMem>(Re_, Re, new_size); + memproc::memcopy<memOut, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size); + memproc::memcopy<memOut, RunMem>(z0_m_, z0_m, new_size); + memproc::memcopy<memOut, RunMem>(z0_t_, z0_t, new_size); + memproc::memcopy<memOut, RunMem>(B_, B, new_size); + memproc::memcopy<memOut, RunMem>(Cm_, Cm, new_size); + memproc::memcopy<memOut, RunMem>(Ct_, Ct, new_size); + memproc::memcopy<memOut, RunMem>(Km_, Km, new_size); + memproc::memcopy<memOut, RunMem>(Pr_t_inv_, Pr_t_inv, new_size); } } -template class Flux<float, MemType::CPU, MemType::CPU>; -template class Flux<double, MemType::CPU, MemType::CPU>; +template class Flux<float, MemType::CPU, MemType::CPU, MemType::CPU>; +template class Flux<double, MemType::CPU, MemType::CPU, MemType::CPU>; #ifdef INCLUDE_CUDA - template class Flux<float, MemType::GPU, MemType::GPU>; - template class Flux<float, MemType::GPU, MemType::CPU>; - template class Flux<float, MemType::CPU, MemType::GPU>; - - template class Flux<double, MemType::GPU, MemType::GPU>; - template class Flux<double, MemType::GPU, MemType::CPU>; - template class Flux<double, MemType::CPU, MemType::GPU>; + template class Flux<float, MemType::GPU, MemType::GPU, MemType::CPU>; + template class Flux<float, MemType::GPU, MemType::CPU, MemType::CPU>; + template class Flux<float, MemType::CPU, MemType::GPU, MemType::CPU>; + template class Flux<float, MemType::GPU, MemType::GPU, MemType::GPU>; + template class Flux<float, MemType::GPU, MemType::CPU, MemType::GPU>; + template class Flux<float, MemType::CPU, MemType::GPU, MemType::GPU>; + + template class Flux<double, MemType::GPU, MemType::GPU, MemType::CPU>; + template class Flux<double, MemType::GPU, MemType::CPU, MemType::CPU>; + template class Flux<double, MemType::CPU, MemType::GPU, MemType::CPU>; + template class Flux<double, MemType::GPU, MemType::GPU, MemType::GPU>; + template class Flux<double, MemType::GPU, MemType::CPU, MemType::GPU>; + template class Flux<double, MemType::CPU, MemType::GPU, MemType::GPU>; #endif \ No newline at end of file