diff --git a/srcCU/sfx_compute_esm.cu b/srcCU/sfx_compute_esm.cu index 302494a0f511f2d5205c4ab7bbd9129cc085acc8..354be330976dea575516ee81a927120262738773 100644 --- a/srcCU/sfx_compute_esm.cu +++ b/srcCU/sfx_compute_esm.cu @@ -1,8 +1,17 @@ -#include <cmath> #include <iostream> #include "../includeCU/sfx_compute_esm.cuh" #include "../includeCU/sfx_surface.cuh" +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} + template<typename T> __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, const T h, const T U, @@ -404,6 +413,8 @@ void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, Pr_m, nu_air, g, maxiters_charnock, maxiters_convection, grid_size); + + gpuErrchk( cudaPeekAtLastError() ); } template void compute_flux_esm_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, diff --git a/srcCU/sfx_compute_sheba.cu b/srcCU/sfx_compute_sheba.cu index fd80dd51e56be4f09803bf52b6516ae5ab05913a..56a93bb49c28ac039c681e6ad71ffc1aa6f33f87 100644 --- a/srcCU/sfx_compute_sheba.cu +++ b/srcCU/sfx_compute_sheba.cu @@ -1,8 +1,17 @@ -#include <cmath> #include <iostream> #include "../includeCU/sfx_compute_sheba.cuh" #include "../includeCU/sfx_surface.cuh" +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) +{ + if (code != cudaSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); + if (abort) exit(code); + } +} + template<typename T> __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, const T h, const T U, @@ -367,17 +376,17 @@ __global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z Km = kappa * Cm * U * h / phi_m; Pr_t_inv = phi_m / phi_h; - zeta_[index] = 0.0; - Rib_[index] = 0.0; - Re_[index] = 0.0; - B_[index] = 0.0; - z0_m_[index] = 0.0; - z0_t_[index] = 0.0; + zeta_[index] = zeta; + Rib_[index] = Rib; + Re_[index] = Re; + B_[index] = B; + z0_m_[index] = z0_m; + z0_t_[index] = z0_t; Rib_conv_lim_[index] = 0.0; - Cm_[index] = 0.0; - Ct_[index] = 0.0; - Km_[index] = 0.0; - Pr_t_inv_[index] = 0.0; + Cm_[index] = Cm; + Ct_[index] = Ct; + Km_[index] = Km; + Pr_t_inv_[index] = Pr_t_inv; } } @@ -426,8 +435,8 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_ const int maxiters_charnock, const int grid_size) { - const int BlockCount = int(ceil(float(grid_size) / 1024.0)); - dim3 cuBlock = dim3(1024, 1, 1); + const int BlockCount = int(ceil(float(grid_size) / 512.0)); + dim3 cuBlock = dim3(512, 1, 1); dim3 cuGrid = dim3(BlockCount, 1, 1); kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, @@ -444,6 +453,7 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_ Pr_m, nu_air, g, maxiters_charnock, grid_size); + gpuErrchk( cudaPeekAtLastError() ); } template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,