Skip to content
Snippets Groups Projects
Commit ef278ad5 authored by 数学の武士's avatar 数学の武士
Browse files

Add c++/CUDA sheba non iterative impl

parent 614d73ed
No related branches found
No related tags found
No related merge requests found
......@@ -125,6 +125,32 @@ extern "C" {
int maxiters_charnock;
};
struct sfx_sheba_noit_param_C
{
float kappa;
float Pr_t_0_inv;
float Pr_t_inf_inv;
float alpha_m;
float alpha_h;
float alpha_h_fix;
float Rib_max;
float gamma;
float zeta_a;
float a_m;
float b_m;
float a_h;
float b_h;
float c_h;
};
struct sfx_sheba_noit_numericsType_C
{
int maxiters_charnock;
int maxiters_convection;
};
#ifdef __cplusplus
}
#endif
\ No newline at end of file
......@@ -2,10 +2,72 @@
#include "sfx-data.h"
#include "sfx-math.cuh"
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T& psi_m, T& psi_h,
const T zeta_m, const T zeta_h,
const model& param)
{
T x_m, x_h, q_m, q_h;
q_m = powf((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
x_m = powf(1.0 + zeta_m, 1.0 / 3.0);
psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m *
(2.0 * logf((x_m + q_m) / (1.0 + q_m)) -
logf((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) +
2.0 * sqrtf(3.0) * (atanf((2.0 * x_m - q_m) / (sqrtf(3.0) * q_m)) -
atanf((2.0 - q_m) / (sqrtf(3.0) * q_m))));
q_h = sqrtf(param.c_h * param.c_h - 4.0);
x_h = zeta_h;
psi_h = -0.5 * param.b_h * logf(1.0 + param.c_h * x_h + x_h * x_h) +
((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) *
(logf((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) -
logf((param.c_h - q_h) / (param.c_h + q_h)));
}
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_zeta(T& zeta,
const T Rib, const T h, const T z0_m, const T z0_t,
const model& param)
{
T Ribl, C1, A1, A2, lne, lnet;
T psi_m, psi_h, psi0_m, psi0_h;
Ribl = (Rib * param.Pr_t_0_inv) * (1 - z0_t / h) / ((1 - z0_m / h)*(1 - z0_m / h));
get_psi_stable(psi_m, psi_h, param.zeta_a, param.zeta_a, param);
get_psi_stable(psi0_m, psi0_h, param.zeta_a * z0_m / h, param.zeta_a * z0_t / h, param);
lne = logf(h/z0_m);
lnet = logf(h/z0_t);
C1 = (lne*lne)/lnet;
A1 = powf(lne - psi_m + psi0_m, 2 * (param.gamma-1)) / ( powf(param.zeta_a, param.gamma-1)
* powf(lnet - (psi_h - psi0_h) * param.Pr_t_0_inv, param.gamma-1));
A2 = powf(lne - psi_m + psi0_m, 2) / (lnet - (psi_h - psi0_h) * param.Pr_t_0_inv) - C1;
zeta = C1 * Ribl + A1 * A2 * powf(Ribl, param.gamma);
}
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER T f_m_conv(const T zeta,
const model& param)
{
return powf(1.0 - param.alpha_m * zeta, 0.25);
}
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER T f_h_conv(const T zeta,
const model& param)
{
return sqrtf(1.0 - param.alpha_h * zeta);
}
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
const T h0_m, const T h0_t, const T B,
const sfx_param& param)
const model& param)
{
T psi_m, psi_h, f_m, f_h, c;
......@@ -13,8 +75,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim,
zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m -
sqrtf( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
f_m_lim = powf(1.0 - param.alpha_m * zeta_lim, 0.25);
f_h_lim = sqrtf(1.0 - param.alpha_h * zeta_lim);
f_m_lim = f_m_conv(zeta_lim, param);
f_h_lim = f_h_conv(zeta_lim, param);
f_m = zeta_lim / h0_m;
f_h = zeta_lim / h0_t;
......@@ -29,10 +91,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim,
Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_param& param)
const model& param)
{
T Rib_coeff, psi0_m, psi0_h, phi, c;
......@@ -49,11 +111,11 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
const sfx_param& param,
const model& param,
const int maxiters_convection)
{
T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
......@@ -89,10 +151,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &ze
}
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
const T h0_m, const T h0_t, const T B,
const sfx_param& param)
const model& param)
{
zeta = 0.0;
psi_m = logf(h0_m);
......@@ -101,10 +163,21 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
psi_h = psi_m / param.Pr_t_0_inv;
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h,
const T h0_m, const T h0_t, const T B,
const model& param)
{
psi_m = logf(h0_m);
psi_h = logf(h0_t) / param.Pr_t_0_inv;
// *: this looks redundant z0_t = z0_m in case |B| ~ 0
if (fabsf(B) < 1.0e-10) psi_h = psi_m / param.Pr_t_0_inv;
}
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_param& param,
const model& param,
const int maxiters_convection)
{
T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
......@@ -143,10 +216,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h,
}
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
const T zeta,
const sfx_param& param)
const model& param)
{
T x_m, x_h;
T q_m, q_h;
......@@ -173,10 +246,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
}
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
const T zeta_m, const T zeta_h,
const sfx_param& param)
const model& param)
{
T x_m, x_h;
T q_m, q_h;
......@@ -209,12 +282,12 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T& zeta,
const T U, const T Tsemi, const T dT, const T dQ,
const T z, const T z0_m, const T z0_t,
const T beta,
const sfx_param& param,
const model& param,
const int maxiters)
{
const T gamma = T(0.61);
......@@ -254,10 +327,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn
}
}
template<typename T, class sfx_param>
template<typename T, class model>
FUCNTION_DECLARATION_SPECIFIER void get_phi(T &phi_m, T &phi_h,
const T zeta,
const sfx_param& param)
const model& param)
{
if (zeta >= 0.0)
{
......
......@@ -20,6 +20,14 @@ extern "C" {
const struct sfx_sheba_numericsType_C* numerics,
const struct sfx_phys_constants* constants,
const int grid_size);
void sheba_noit_compute_flux(struct sfxDataVecTypeC* sfx,
struct meteoDataVecTypeC* meteo,
const struct sfx_sheba_noit_param_C* model_param,
const struct sfx_surface_param* surface_param,
const struct sfx_sheba_noit_numericsType_C* numerics,
const struct sfx_phys_constants* constants,
const int grid_size);
#ifdef __cplusplus
}
#endif
\ No newline at end of file
......@@ -4,85 +4,55 @@
#include "sfx-data.h"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class FluxShebaBase : public ModelBase<T, memIn, memOut, RunMem>
{
public:
using ModelBase<T, memIn, memOut, RunMem>::res_sfx;
using ModelBase<T, memIn, memOut, RunMem>::sfx;
using ModelBase<T, memIn, memOut, RunMem>::meteo;
using ModelBase<T, memIn, memOut, RunMem>::grid_size;
using ModelBase<T, memIn, memOut, RunMem>::ifAllocated;
using ModelBase<T, memIn, memOut, RunMem>::allocated_size;
sfx_surface_param surface;
sfx_phys_constants phys;
sfx_sheba_param_C model;
sfx_sheba_numericsType_C numerics;
FluxShebaBase(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param_C model,
const sfx_surface_param surface,
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size);
~FluxShebaBase();
};
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class FluxSheba : public FluxShebaBase<T, memIn, memOut, RunMem>
class FluxSheba : public ModelBase<T, memIn, memOut, RunMem>
{};
template<typename T, MemType memIn, MemType memOut >
class FluxSheba<T, memIn, memOut, MemType::CPU> : public FluxShebaBase<T, memIn, memOut, MemType::CPU>
class FluxSheba<T, memIn, memOut, MemType::CPU> : public ModelBase<T, memIn, memOut, MemType::CPU>
{
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::res_sfx;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::sfx;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::meteo;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::surface;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::phys;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::grid_size;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::allocated_size;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::model;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::numerics;
using ModelBase<T, memIn, memOut, MemType::CPU>::res_sfx;
using ModelBase<T, memIn, memOut, MemType::CPU>::sfx;
using ModelBase<T, memIn, memOut, MemType::CPU>::meteo;
using ModelBase<T, memIn, memOut, MemType::CPU>::grid_size;
using ModelBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
using ModelBase<T, memIn, memOut, MemType::CPU>::allocated_size;
public:
FluxSheba(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param_C model,
const int grid_size) : ModelBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, grid_size) {}
~FluxSheba() = default;
void compute_flux(const sfx_sheba_param_C model,
const sfx_surface_param surface,
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, model,
surface, numerics, phys, grid_size) {}
~FluxSheba() = default;
void compute_flux();
const sfx_phys_constants phys);
void noit_compute_flux(const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys);
};
#ifdef INCLUDE_CUDA
template<typename T, MemType memIn, MemType memOut >
class FluxSheba<T, memIn, memOut, MemType::GPU> : public FluxShebaBase<T, memIn, memOut, MemType::GPU>
class FluxSheba<T, memIn, memOut, MemType::GPU> : public ModelBase<T, memIn, memOut, MemType::GPU>
{
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::res_sfx;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::sfx;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::meteo;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::surface;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::phys;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::grid_size;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::allocated_size;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::model;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::numerics;
using ModelBase<T, memIn, memOut, MemType::GPU>::res_sfx;
using ModelBase<T, memIn, memOut, MemType::GPU>::sfx;
using ModelBase<T, memIn, memOut, MemType::GPU>::meteo;
using ModelBase<T, memIn, memOut, MemType::GPU>::grid_size;
using ModelBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
using ModelBase<T, memIn, memOut, MemType::GPU>::allocated_size;
public:
FluxSheba(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param_C model,
const int grid_size) : ModelBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, grid_size) {}
~FluxSheba() = default;
void compute_flux(const sfx_sheba_param_C model,
const sfx_surface_param surface,
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, model,
surface, numerics, phys, grid_size) {}
~FluxSheba() = default;
void compute_flux();
const sfx_phys_constants phys);
void noit_compute_flux(const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys);
};
#endif
\ No newline at end of file
......@@ -24,3 +24,14 @@ void c_sheba_compute_flux (struct sfxDataVecTypeC* sfx,
{
sheba_compute_flux(sfx, meteo, model_param, surface_param, numerics, constants, *grid_size);
}
void c_sheba_noit_compute_flux (struct sfxDataVecTypeC* sfx,
struct meteoDataVecTypeC* meteo,
const struct sfx_sheba_noit_param_C* model_param,
const struct sfx_surface_param* surface_param,
const struct sfx_sheba_noit_numericsType_C* numerics,
const struct sfx_phys_constants* constants,
const int *grid_size)
{
sheba_noit_compute_flux(sfx, meteo, model_param, surface_param, numerics, constants, *grid_size);
}
\ No newline at end of file
......@@ -16,6 +16,15 @@ namespace sfx_kernel
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size);
template<typename T>
__global__ void noit_compute_flux(sfxDataVecTypeC sfx,
meteoDataVecTypeC meteo,
const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size);
}
template<typename T>
......@@ -100,8 +109,152 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
}
}
template<typename T>
__global__ void sfx_kernel::noit_compute_flux(sfxDataVecTypeC sfx,
meteoDataVecTypeC meteo,
const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys,
const int grid_size)
{
const int index = blockIdx.x * blockDim.x + threadIdx.x;
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
zeta, Rib, zeta_conv_lim, Rib_conv_lim,
f_m_conv_lim, f_h_conv_lim,
psi_m, psi_h,
psi0_m, psi0_h,
Udyn, Tdyn, Qdyn,
phi_m, phi_h,
Km, Pr_t_inv, Cm, Ct,
fval;
int surface_type;
if(index < grid_size)
{
U = meteo.U[index];
Tsemi = meteo.Tsemi[index];
dT = meteo.dT[index];
dQ = meteo.dQ[index];
h = meteo.h[index];
z0_m = meteo.z0_m[index];
surface_type = z0_m < 0.0 ? surface.surface_ocean : surface.surface_land;
if (surface_type == surface.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, U, h, surface, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface.surface_land)
{
h0_m = h / z0_m;
u_dyn0 = U * model.kappa / logf(h0_m);
}
Re = u_dyn0 * z0_m / phys.nu_air;
get_thermal_roughness(z0_t, B, z0_m, Re, surface, surface_type);
// --- define relative height [thermal]
h0_t = h / z0_t;
// --- define Ri-bulk
Rib = (phys.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
get_convection_lim(zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model);
// --- get the fluxes
// ----------------------------------------------------------------------------
if (Rib > 0.0)
{
// --- stable stratification block
// --- restrict bulk Ri value
// *: note that this value is written to output
// Rib = min(Rib, Rib_max)
get_zeta(zeta, Rib, h, z0_m, z0_t, model);
get_psi_stable(psi_m, psi_h, zeta, zeta, model);
get_psi_stable(psi0_m, psi0_h, zeta * z0_m / h, zeta * z0_t / h, model);
phi_m = 1.0 + (model.a_m * zeta * powf(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + model.b_m * zeta);
phi_h = 1.0 + (model.a_h * zeta + model.b_h * zeta * zeta) / (1.0 + model.c_h * zeta + zeta * zeta);
Udyn = model.kappa * U / (logf(h / z0_m) - (psi_m - psi0_m));
Tdyn = model.kappa * dT * model.Pr_t_0_inv / (logf(h / z0_t) - (psi_h - psi0_h));
}
else if (Rib < Rib_conv_lim)
{
// --- strong instability block
get_psi_convection(psi_m, psi_h, zeta, Rib,
zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model, numerics.maxiters_convection);
fval = powf(zeta_conv_lim / zeta, 1.0/3.0);
phi_m = fval / f_m_conv_lim;
phi_h = fval / (model.Pr_t_0_inv * f_h_conv_lim);
}
else if (Rib > -0.001)
{
// --- nearly neutral [-0.001, 0] block
get_psi_neutral(psi_m, psi_h, h0_m, h0_t, B, model);
zeta = 0.0;
phi_m = 1.0;
phi_h = 1.0 / model.Pr_t_0_inv;
}
else
{
// --- weak & semistrong instability block
get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model, numerics.maxiters_convection);
phi_m = powf(1.0 - model.alpha_m * zeta, -0.25);
phi_h = 1.0 / (model.Pr_t_0_inv * sqrtf(1.0 - model.alpha_h_fix * zeta));
}
// ----------------------------------------------------------------------------
// --- define transfer coeff. (momentum) & (heat)
if(Rib > 0)
{
Cm = 0.0;
if (U > 0.0)
Cm = Udyn / U;
Ct = 0.0;
if (fabs(dT) > 0.0)
Ct = Tdyn / dT;
}
else
{
Cm = model.kappa / psi_m;
Ct = model.kappa / psi_h;
}
// --- define eddy viscosity & inverse Prandtl number
Km = model.kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
sfx.zeta[index] = zeta;
sfx.Rib[index] = Rib;
sfx.Re[index] = Re;
sfx.B[index] = B;
sfx.z0_m[index] = z0_m;
sfx.z0_t[index] = z0_t;
sfx.Rib_conv_lim[index] = Rib_conv_lim;
sfx.Cm[index] = Cm;
sfx.Ct[index] = Ct;
sfx.Km[index] = Km;
sfx.Pr_t_inv[index] = Pr_t_inv;
}
}
template<typename T, MemType memIn, MemType memOut >
void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux(const sfx_sheba_param_C model,
const sfx_surface_param surface,
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys)
{
const int BlockCount = int(ceil(float(grid_size) / 1024.0));
dim3 cuBlock = dim3(1024, 1, 1);
......@@ -127,13 +280,35 @@ void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
}
}
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
template<typename T, MemType memIn, MemType memOut >
void FluxSheba<T, memIn, memOut, MemType::GPU>::noit_compute_flux(const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys)
{
const int BlockCount = int(ceil(float(grid_size) / 1024.0));
dim3 cuBlock = dim3(1024, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1);
sfx_kernel::noit_compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model,
surface, numerics, phys, grid_size);
if(MemType::GPU != memOut)
{
const size_t new_size = grid_size * sizeof(T);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
}
}
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
......
......@@ -32,10 +32,27 @@ void sheba_compute_flux (sfxDataVecTypeC* sfx,
const int grid_size)
{
#ifdef INCLUDE_CUDA
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, *model_param, *surface_param, *numerics, *constants, grid_size);
F.compute_flux();
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, grid_size);
F.compute_flux(*model_param, *surface_param, *numerics, *constants);
#else
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, *model_param, *surface_param, *numerics, *constants, grid_size);
F.compute_flux();
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, grid_size);
F.compute_flux(*model_param, *surface_param, *numerics, *constants);
#endif
}
void sheba_noit_compute_flux (sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_noit_param_C* model_param,
const sfx_surface_param* surface_param,
const sfx_sheba_noit_numericsType_C* numerics,
const sfx_phys_constants* constants,
const int grid_size)
{
#ifdef INCLUDE_CUDA
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, grid_size);
F.noit_compute_flux(*model_param, *surface_param, *numerics, *constants);
#else
static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, grid_size);
F.noit_compute_flux(*model_param, *surface_param, *numerics, *constants);
#endif
}
\ No newline at end of file
......@@ -10,26 +10,11 @@
#include "sfx-surface.cuh"
#include "sfx-model-compute-subfunc.cuh"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
FluxShebaBase<T, memIn, memOut, RunMem>::FluxShebaBase(sfxDataVecTypeC* sfx_in,
meteoDataVecTypeC* meteo_in,
const sfx_sheba_param_C model_param_in,
const sfx_surface_param surface_param_in,
const sfx_sheba_numericsType_C numerics_in,
const sfx_phys_constants phys_constants_in,
const int grid_size_in) : ModelBase<T, memIn, memOut, RunMem>(sfx_in, meteo_in, grid_size_in)
{
surface = surface_param_in;
phys = phys_constants_in;
model = model_param_in;
numerics = numerics_in;
}
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
FluxShebaBase<T, memIn, memOut, RunMem>::~FluxShebaBase() {}
template<typename T, MemType memIn, MemType memOut >
void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux(const sfx_sheba_param_C model,
const sfx_surface_param surface,
const sfx_sheba_numericsType_C numerics,
const sfx_phys_constants phys)
{
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
......@@ -101,6 +86,158 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
sfx.Km[step] = Km;
sfx.Pr_t_inv[step] = Pr_t_inv;
}
if(MemType::CPU != memOut)
{
const size_t new_size = grid_size * sizeof(T);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
}
}
template<typename T, MemType memIn, MemType memOut >
void FluxSheba<T, memIn, memOut, MemType::CPU>::noit_compute_flux(const sfx_sheba_noit_param_C model,
const sfx_surface_param surface,
const sfx_sheba_noit_numericsType_C numerics,
const sfx_phys_constants phys)
{
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
zeta, Rib, zeta_conv_lim, Rib_conv_lim,
f_m_conv_lim, f_h_conv_lim,
psi_m, psi_h,
psi0_m, psi0_h,
Udyn, Tdyn, Qdyn,
phi_m, phi_h,
Km, Pr_t_inv, Cm, Ct,
fval;
int surface_type;
for (int step = 0; step < grid_size; step++)
{
U = meteo.U[step];
Tsemi = meteo.Tsemi[step];
dT = meteo.dT[step];
dQ = meteo.dQ[step];
h = meteo.h[step];
z0_m = meteo.z0_m[step];
surface_type = z0_m < 0.0 ? surface.surface_ocean : surface.surface_land;
if (surface_type == surface.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, U, h, surface, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface.surface_land)
{
h0_m = h / z0_m;
u_dyn0 = U * model.kappa / logf(h0_m);
}
Re = u_dyn0 * z0_m / phys.nu_air;
get_thermal_roughness(z0_t, B, z0_m, Re, surface, surface_type);
// --- define relative height [thermal]
h0_t = h / z0_t;
// --- define Ri-bulk
Rib = (phys.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
get_convection_lim(zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model);
// --- get the fluxes
// ----------------------------------------------------------------------------
if (Rib > 0.0)
{
// --- stable stratification block
// --- restrict bulk Ri value
// *: note that this value is written to output
// Rib = min(Rib, Rib_max)
get_zeta(zeta, Rib, h, z0_m, z0_t, model);
get_psi_stable(psi_m, psi_h, zeta, zeta, model);
get_psi_stable(psi0_m, psi0_h, zeta * z0_m / h, zeta * z0_t / h, model);
phi_m = 1.0 + (model.a_m * zeta * powf(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + model.b_m * zeta);
phi_h = 1.0 + (model.a_h * zeta + model.b_h * zeta * zeta) / (1.0 + model.c_h * zeta + zeta * zeta);
Udyn = model.kappa * U / (logf(h / z0_m) - (psi_m - psi0_m));
Tdyn = model.kappa * dT * model.Pr_t_0_inv / (logf(h / z0_t) - (psi_h - psi0_h));
}
else if (Rib < Rib_conv_lim)
{
// --- strong instability block
get_psi_convection(psi_m, psi_h, zeta, Rib,
zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model, numerics.maxiters_convection);
fval = powf(zeta_conv_lim / zeta, 1.0/3.0);
phi_m = fval / f_m_conv_lim;
phi_h = fval / (model.Pr_t_0_inv * f_h_conv_lim);
}
else if (Rib > -0.001)
{
// --- nearly neutral [-0.001, 0] block
get_psi_neutral(psi_m, psi_h, h0_m, h0_t, B, model);
zeta = 0.0;
phi_m = 1.0;
phi_h = 1.0 / model.Pr_t_0_inv;
}
else
{
// --- weak & semistrong instability block
get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model, numerics.maxiters_convection);
phi_m = powf(1.0 - model.alpha_m * zeta, -0.25);
phi_h = 1.0 / (model.Pr_t_0_inv * sqrtf(1.0 - model.alpha_h_fix * zeta));
}
// ----------------------------------------------------------------------------
// --- define transfer coeff. (momentum) & (heat)
if(Rib > 0)
{
Cm = 0.0;
if (U > 0.0)
Cm = Udyn / U;
Ct = 0.0;
if (fabs(dT) > 0.0)
Ct = Tdyn / dT;
}
else
{
Cm = model.kappa / psi_m;
Ct = model.kappa / psi_h;
}
// --- define eddy viscosity & inverse Prandtl number
Km = model.kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
sfx.zeta[step] = zeta;
sfx.Rib[step] = Rib;
sfx.Re[step] = Re;
sfx.B[step] = B;
sfx.z0_m[step] = z0_m;
sfx.z0_t[step] = z0_t;
sfx.Rib_conv_lim[step] = Rib_conv_lim;
sfx.Cm[step] = Cm;
sfx.Ct[step] = Ct;
sfx.Km[step] = Km;
sfx.Pr_t_inv[step] = Pr_t_inv;
}
if(MemType::CPU != memOut)
{
......@@ -120,16 +257,8 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
}
template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::CPU>;
#ifdef INCLUDE_CUDA
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
......
......@@ -41,8 +41,7 @@ module sfx_sheba
#if defined(INCLUDE_CXX)
type, BIND(C), public :: sfx_sheba_param_C
real(C_FLOAT) :: kappa
real(C_FLOAT) ::
Pr_t_0_inv
real(C_FLOAT) :: Pr_t_0_inv
real(C_FLOAT) :: alpha_m
real(C_FLOAT) :: alpha_h
......
......@@ -58,6 +58,7 @@ module sfx_sheba_noniterative
type, BIND(C), public :: sfx_sheba_noit_numericsType_C
integer(C_INT) :: maxiters_charnock
integer(C_INT) :: maxiters_convection
end type
INTERFACE
......@@ -71,7 +72,7 @@ module sfx_sheba_noniterative
type(C_PTR), value :: sfx
type(C_PTR), value :: meteo
type(sfx_sheba_noit_param_C) :: model_param
type(sfx_surface_noit_param) :: surface_param
type(sfx_surface_param) :: surface_param
type(sfx_sheba_noit_numericsType_C) :: numerics
type(sfx_phys_constants) :: constants
END SUBROUTINE c_sheba_noit_compute_flux
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment