Skip to content
Snippets Groups Projects
Commit a77e6e9a authored by 数学の武士's avatar 数学の武士
Browse files

thread count sheba reduced

parent abff347e
Branches
Tags
No related merge requests found
#include <cmath>
#include <iostream>
#include "../includeCU/sfx_compute_esm.cuh"
#include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
......@@ -404,6 +413,8 @@ void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_,
Pr_m, nu_air, g,
maxiters_charnock, maxiters_convection,
grid_size);
gpuErrchk( cudaPeekAtLastError() );
}
template void compute_flux_esm_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
......
#include <cmath>
#include <iostream>
#include "../includeCU/sfx_compute_sheba.cuh"
#include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
......@@ -367,17 +376,17 @@ __global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z
Km = kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
zeta_[index] = 0.0;
Rib_[index] = 0.0;
Re_[index] = 0.0;
B_[index] = 0.0;
z0_m_[index] = 0.0;
z0_t_[index] = 0.0;
zeta_[index] = zeta;
Rib_[index] = Rib;
Re_[index] = Re;
B_[index] = B;
z0_m_[index] = z0_m;
z0_t_[index] = z0_t;
Rib_conv_lim_[index] = 0.0;
Cm_[index] = 0.0;
Ct_[index] = 0.0;
Km_[index] = 0.0;
Pr_t_inv_[index] = 0.0;
Cm_[index] = Cm;
Ct_[index] = Ct;
Km_[index] = Km;
Pr_t_inv_[index] = Pr_t_inv;
}
}
......@@ -426,8 +435,8 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_
const int maxiters_charnock,
const int grid_size)
{
const int BlockCount = int(ceil(float(grid_size) / 1024.0));
dim3 cuBlock = dim3(1024, 1, 1);
const int BlockCount = int(ceil(float(grid_size) / 512.0));
dim3 cuBlock = dim3(512, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1);
kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
......@@ -444,6 +453,7 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_
Pr_m, nu_air, g,
maxiters_charnock,
grid_size);
gpuErrchk( cudaPeekAtLastError() );
}
template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment