Skip to content
Snippets Groups Projects
Commit a77e6e9a authored by 数学の武士's avatar 数学の武士
Browse files

thread count sheba reduced

parent abff347e
No related branches found
No related tags found
No related merge requests found
#include <cmath>
#include <iostream> #include <iostream>
#include "../includeCU/sfx_compute_esm.cuh" #include "../includeCU/sfx_compute_esm.cuh"
#include "../includeCU/sfx_surface.cuh" #include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T> template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U, const T h, const T U,
...@@ -404,6 +413,8 @@ void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, ...@@ -404,6 +413,8 @@ void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_,
Pr_m, nu_air, g, Pr_m, nu_air, g,
maxiters_charnock, maxiters_convection, maxiters_charnock, maxiters_convection,
grid_size); grid_size);
gpuErrchk( cudaPeekAtLastError() );
} }
template void compute_flux_esm_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, template void compute_flux_esm_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
......
#include <cmath>
#include <iostream> #include <iostream>
#include "../includeCU/sfx_compute_sheba.cuh" #include "../includeCU/sfx_compute_sheba.cuh"
#include "../includeCU/sfx_surface.cuh" #include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T> template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U, const T h, const T U,
...@@ -367,17 +376,17 @@ __global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z ...@@ -367,17 +376,17 @@ __global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z
Km = kappa * Cm * U * h / phi_m; Km = kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h; Pr_t_inv = phi_m / phi_h;
zeta_[index] = 0.0; zeta_[index] = zeta;
Rib_[index] = 0.0; Rib_[index] = Rib;
Re_[index] = 0.0; Re_[index] = Re;
B_[index] = 0.0; B_[index] = B;
z0_m_[index] = 0.0; z0_m_[index] = z0_m;
z0_t_[index] = 0.0; z0_t_[index] = z0_t;
Rib_conv_lim_[index] = 0.0; Rib_conv_lim_[index] = 0.0;
Cm_[index] = 0.0; Cm_[index] = Cm;
Ct_[index] = 0.0; Ct_[index] = Ct;
Km_[index] = 0.0; Km_[index] = Km;
Pr_t_inv_[index] = 0.0; Pr_t_inv_[index] = Pr_t_inv;
} }
} }
...@@ -426,8 +435,8 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_ ...@@ -426,8 +435,8 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_
const int maxiters_charnock, const int maxiters_charnock,
const int grid_size) const int grid_size)
{ {
const int BlockCount = int(ceil(float(grid_size) / 1024.0)); const int BlockCount = int(ceil(float(grid_size) / 512.0));
dim3 cuBlock = dim3(1024, 1, 1); dim3 cuBlock = dim3(512, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1); dim3 cuGrid = dim3(BlockCount, 1, 1);
kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
...@@ -444,6 +453,7 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_ ...@@ -444,6 +453,7 @@ void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_
Pr_m, nu_air, g, Pr_m, nu_air, g,
maxiters_charnock, maxiters_charnock,
grid_size); grid_size);
gpuErrchk( cudaPeekAtLastError() );
} }
template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment