Skip to content
Snippets Groups Projects
Commit 5c81c8e5 authored by 数学の武士's avatar 数学の武士
Browse files

CXX and CUDA hotfix

parent 9f9b0fb5
No related branches found
No related tags found
No related merge requests found
#pragma once
template<typename T>
void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
#pragma once
#include "../includeC/sfx_data.h"
#include "../includeCU/sfx_math.cuh"
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
{
T psi_m, psi_h, f_m, f_h, c;
c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4);
zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m -
sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25);
f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim);
f_m = zeta_lim / h0_m;
f_h = zeta_lim / h0_t;
if (fabs(B) < 1.0e-10) f_h = f_m;
f_m = pow(1.0 - param.alpha_m * f_m, 0.25);
f_h = sqrt(1.0 - param.alpha_h_fix * f_h);
psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0)));
psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv;
Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
}
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
{
T Rib_coeff, psi0_m, psi0_h, phi, c;
psi0_m = log(h0_m);
psi0_h = B / psi0_m;
Rib_coeff = param.beta_m * Rib;
c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff;
zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff));
phi = param.beta_m * zeta;
psi_m = psi0_m + phi;
psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
}
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
const sfx_esm_param& param,
const sfx_esm_numericsTypeC& numerics)
{
T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0));
p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0));
zeta = zeta_conv_lim;
for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
{
zeta0_m = zeta / h0_m;
zeta0_h = zeta / h0_t;
if (fabs(B) < 1.0e-10)
zeta0_h = zeta0_m;
f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25);
f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0));
a_h = log((f0_h + 1.0)/(f0_h - 1.0));
c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0);
f = 3.0 * (1.0 - c_lim);
psi_m = f / f_m_conv_lim + p_m + a_m;
psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv;
if (i == numerics.maxiters_convection + 1)
break;
zeta = Rib * psi_m * psi_m / psi_h;
}
}
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
{
zeta = 0.0;
psi_m = log(h0_m);
psi_h = log(h0_t) / param.Pr_t_0_inv;
if (fabs(B) < 1.0e-10)
psi_h = psi_m / param.Pr_t_0_inv;
}
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param,
const sfx_esm_numericsTypeC& numerics)
{
T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
psi_m = log(h0_m);
psi_h = log(h0_t);
if (fabs(B) < 1.0e-10)
psi_h = psi_m;
zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h;
for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
{
zeta0_m = zeta / h0_m;
zeta0_h = zeta / h0_t;
if (fabs(B) < 1.0e-10)
zeta0_h = zeta0_m;
f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0);
f_h = sqrt(1.0 - param.alpha_h_fix * zeta);
f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0);
f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
f0_m = sfx_math::max(f0_m, T(1.000001e0));
f0_h = sfx_math::max(f0_h, T(1.000001e0));
psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv;
if (i == numerics.maxiters_convection + 1)
break;
zeta = Rib * psi_m * psi_m / psi_h;
}
}
\ No newline at end of file
......@@ -14,8 +14,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
// --- apply max restriction based on surface type
if (surface_type == param.surface_ocean) B = sfx_math::min(B, param.B_max_ocean);
if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_land);
if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_lake);
if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_lake);
if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_land);
// --- define roughness [thermal]
z0_t = z0_m / exp(B);
......@@ -23,7 +23,7 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
const T U, const T h,
const sfx_surface_param& surface_param,
const int maxiters)
{
......
#include <iostream>
#include "../includeCU/sfx_compute_sheba.cuh"
#include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
const T kappa,
const T h_charnock, const T c1_charnock, const T c2_charnock,
const int maxiters)
{
T Uc, a, b, c, c_min, f;
Uc = U;
a = 0.0;
b = 25.0;
c_min = log(h_charnock) / kappa;
for (int i = 0; i < maxiters; i++)
{
f = c1_charnock - 2.0 * log(Uc);
for (int j = 0; j < maxiters; j++)
{
c = (f + 2.0 * log(b)) / kappa;
if (U <= 8.0e0)
a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
c = max(c - a, c_min);
b = c;
}
z0_m = h_charnock * exp(-c * kappa);
z0_m = max(z0_m, T(0.000015e0));
Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
}
u_dyn0 = Uc / c;
}
template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0,
const float h, const float U,
const float kappa,
const float h_charnock, const float c1_charnock, const float c2_charnock,
const int maxiters);
template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0,
const double h, const double U,
const double kappa,
const double h_charnock, const double c1_charnock, const double c2_charnock,
const int maxiters);
template<typename T>
__device__ void get_thermal_roughness(T &z0_t, T &B,
const T z0_m, const T Re,
const T Re_rough_min,
const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough,
const T B_max_ocean, const T B_max_lake, const T B_max_land,
const int surface_type)
{
// --- define B = log(z0_m / z0_t)
if (Re <= Re_rough_min)
B = B1_rough * log(B3_rough * Re) + B2_rough;
else
// *: B4 takes into account Re value at z' ~ O(10) z0
B = B4_rough * (pow(Re, B2_rough));
// --- apply max restriction based on surface type
if (surface_type == 0)
B = min(B, B_max_ocean);
else if (surface_type == 2)
B = min(B, B_max_lake);
else if (surface_type == 1)
B = min(B, B_max_land);
// --- define roughness [thermal]
z0_t = z0_m / exp(B);
}
template __device__ void get_thermal_roughness(float &z0_t, float &B,
const float z0_m, const float Re,
const float Re_rough_min,
const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough,
const float B_max_ocean, const float B_max_lake, const float B_max_land,
const int surface_type);
template __device__ void get_thermal_roughness(double &z0_t, double &B,
const double z0_m, const double Re,
const double Re_rough_min,
const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough,
const double B_max_ocean, const double B_max_lake, const double B_max_land,
const int surface_type);
template<typename T>
__device__ void get_psi_mh(T &psi_m, T &psi_h,
const T zeta_m, const T zeta_h,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta_m >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
}
else
{ x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
}
if (zeta_h >= 0.0)
{
q_h = sqrt(c_h * c_h - 4.0);
x_h = zeta_h;
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template __device__ void get_psi_mh(float &psi_m, float &psi_h,
const float zeta_m, const float zeta_h,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_psi_mh(double &psi_m, double &psi_h,
const double zeta_m, const double zeta_h,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__device__ void get_psi(T &psi_m, T &psi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
q_h = sqrt(c_h * c_h - 4.0);
x_m = pow(1.0 + zeta, 1.0 / 3.0);
x_h = zeta;
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_m = pow(1.0 - alpha_m * zeta, 0.25);
x_h = pow(1.0 - alpha_h * zeta, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template __device__ void get_psi(float &psi_m, float &psi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_psi(double &psi_m, double &psi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const int maxiters)
{
T psi_m, psi_h, psi0_m, psi0_h, Linv;
const T gamma = 0.61;
Udyn = kappa * U / log(z / z0_m);
Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
zeta = 0.0;
// --- no wind
if (Udyn < 1e-5)
return;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
// --- near neutral case
if (Linv < 1e-5)
return;
for (int i = 0; i < maxiters; i++)
{
get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
if (Udyn < 1e-5)
break;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
}
}
template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const int maxiters);
template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const int maxiters);
template<typename T>
__device__ void get_phi(T &phi_m, T &phi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
if (zeta >= 0.0)
{
phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
}
else
{
phi_m = pow(1.0 - alpha_m * zeta, -0.25);
phi_h = pow(1.0 - alpha_h * zeta, -0.5);
}
}
template __device__ void get_phi(float &phi_m, float &phi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_phi(double &phi_m, double &phi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size)
{
const int index = blockIdx.x * blockDim.x + threadIdx.x;
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
Km, Pr_t_inv, Cm, Ct;
const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
int surface_type;
if(index < grid_size)
{
U = U_[index];
Tsemi = Tsemi_[index];
dT = dT_[index];
dQ = dQ_[index];
h = h_[index];
z0_m = in_z0_m_[index];
if (z0_m < 0.0) surface_type = 0;
else surface_type = 1;
if (surface_type == 0)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == 1)
{
h0_m = h / z0_m;
u_dyn0 = U * kappa / log(h0_m);
}
Re = u_dyn0 * z0_m / nu_air;
get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
// --- define relative height [thermal]
h0_t = h / z0_t;
// --- define Ri-bulk
Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
// --- get the fluxes
// ----------------------------------------------------------------------------
get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
// ----------------------------------------------------------------------------
get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
// ----------------------------------------------------------------------------
// --- define transfer coeff. (momentum) & (heat)
Cm = 0.0;
if (U > 0.0)
Cm = Udyn / U;
Ct = 0.0;
if (fabs(dT) > 0.0)
Ct = Tdyn / dT;
// --- define eddy viscosity & inverse Prandtl number
Km = kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
zeta_[index] = zeta;
Rib_[index] = Rib;
Re_[index] = Re;
B_[index] = B;
z0_m_[index] = z0_m;
z0_t_[index] = z0_t;
Rib_conv_lim_[index] = 0.0;
Cm_[index] = Cm;
Ct_[index] = Ct;
Km_[index] = Km;
Pr_t_inv_[index] = Pr_t_inv;
}
}
template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const float Re_rough_min,
const float B1_rough, const float B2_rough,
const float B_max_land, const float B_max_ocean, const float B_max_lake,
const float gamma_c, const float Re_visc_min,
const float Pr_m, const float nu_air, const float g,
const int maxiters_charnock,
const int grid_size);
template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const double Re_rough_min,
const double B1_rough, const double B2_rough,
const double B_max_land, const double B_max_ocean, const double B_max_lake,
const double gamma_c, const double Re_visc_min,
const double Pr_m, const double nu_air, const double g,
const int maxiters_charnock,
const int grid_size);
template<typename T>
void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size)
{
const int BlockCount = int(ceil(float(grid_size) / 512.0));
dim3 cuBlock = dim3(512, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1);
kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
kappa, Pr_t_0_inv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h,
Re_rough_min,
B1_rough, B2_rough,
B_max_land, B_max_ocean, B_max_lake,
gamma_c, Re_visc_min,
Pr_m, nu_air, g,
maxiters_charnock,
grid_size);
gpuErrchk( cudaPeekAtLastError() );
}
template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const float Re_rough_min,
const float B1_rough, const float B2_rough,
const float B_max_land, const float B_max_ocean, const float B_max_lake,
const float gamma_c, const float Re_visc_min,
const float Pr_m, const float nu_air, const float g,
const int maxiters_charnock,
const int grid_size);
template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const double Re_rough_min,
const double B1_rough, const double B2_rough,
const double B_max_land, const double B_max_ocean, const double B_max_lake,
const double gamma_c, const double Re_visc_min,
const double Pr_m, const double nu_air, const double g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
......@@ -46,7 +46,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
......
......@@ -47,7 +47,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
......
......@@ -49,7 +49,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux()
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
......
......@@ -50,7 +50,7 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
......
......@@ -41,12 +41,12 @@ module sfx_data
!> &details using arrays as input
type, public :: meteoDataVecType
#if defined(INCLUDE_CXX)
real, pointer :: h(:) !< constant flux layer height [m]
real, pointer :: U(:) !< abs(wind speed) at 'h' [m/s]
real, pointer :: dT(:) !< difference between potential temperature at 'h' and at surface [K]
real, pointer :: Tsemi(:) !< semi-sum of potential temperature at 'h' and at surface [K]
real, pointer :: dQ(:) !< difference between humidity at 'h' and at surface [g/g]
real, pointer :: z0_m(:) !< surface aerodynamic roughness (should be < 0 for water bodies surface)
real, pointer, contiguous :: h(:) !< constant flux layer height [m]
real, pointer, contiguous :: U(:) !< abs(wind speed) at 'h' [m/s]
real, pointer, contiguous :: dT(:) !< difference between potential temperature at 'h' and at surface [K]
real, pointer, contiguous :: Tsemi(:) !< semi-sum of potential temperature at 'h' and at surface [K]
real, pointer, contiguous :: dQ(:) !< difference between humidity at 'h' and at surface [g/g]
real, pointer, contiguous :: z0_m(:) !< surface aerodynamic roughness (should be < 0 for water bodies surface)
#else
real, allocatable :: h(:) !< constant flux layer height [m]
real, allocatable :: U(:) !< abs(wind speed) at 'h' [m/s]
......@@ -91,17 +91,17 @@ module sfx_data
!> &details using arrays as output
type, public :: sfxDataVecType
#if defined(INCLUDE_CXX)
real, pointer :: zeta(:) !< = z/L [n/d]
real, pointer :: Rib(:) !< bulk Richardson number [n/d]
real, pointer :: Re(:) !< Reynolds number [n/d]
real, pointer :: B(:) !< = log(z0_m / z0_h) [n/d]
real, pointer :: z0_m(:) !< aerodynamic roughness [m]
real, pointer :: z0_t(:) !< thermal roughness [m]
real, pointer :: Rib_conv_lim(:) !< Ri-bulk convection critical value [n/d]
real, pointer :: Cm(:) !< transfer coefficient for momentum [n/d]
real, pointer :: Ct(:) !< transfer coefficient for heat [n/d]
real, pointer :: Km(:) !< eddy viscosity coeff. at h [m^2/s]
real, pointer :: Pr_t_inv(:) !< inverse turbulent Prandtl number at h [n/d]
real, pointer, contiguous :: zeta(:) !< = z/L [n/d]
real, pointer, contiguous :: Rib(:) !< bulk Richardson number [n/d]
real, pointer, contiguous :: Re(:) !< Reynolds number [n/d]
real, pointer, contiguous :: B(:) !< = log(z0_m / z0_h) [n/d]
real, pointer, contiguous :: z0_m(:) !< aerodynamic roughness [m]
real, pointer, contiguous :: z0_t(:) !< thermal roughness [m]
real, pointer, contiguous :: Rib_conv_lim(:) !< Ri-bulk convection critical value [n/d]
real, pointer, contiguous :: Cm(:) !< transfer coefficient for momentum [n/d]
real, pointer, contiguous :: Ct(:) !< transfer coefficient for heat [n/d]
real, pointer, contiguous :: Km(:) !< eddy viscosity coeff. at h [m^2/s]
real, pointer, contiguous :: Pr_t_inv(:) !< inverse turbulent Prandtl number at h [n/d]
#else
real, allocatable :: zeta(:) !< = z/L [n/d]
real, allocatable :: Rib(:) !< bulk Richardson number [n/d]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment