Skip to content
Snippets Groups Projects
Commit 113fb8f3 authored by Evgeny Mortikov's avatar Evgeny Mortikov
Browse files
parents a4dc4109 3d5285ba
No related branches found
No related tags found
No related merge requests found
Showing
with 576 additions and 933 deletions
......@@ -32,8 +32,6 @@ set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules)
if(USE_CONFIG_PARSER)
add_subdirectory(config-parser/)
add_definitions(-DUSE_CONFIG_PARSER)
# list(APPEND RUN_MACRO -DUSE_CONFIG_PARSER)
set(USE_CXX ON)
endif(USE_CONFIG_PARSER)
if(INCLUDE_CUDA)
......@@ -55,7 +53,6 @@ if(USE_CXX)
enable_language(CXX)
set(CMAKE_CXX_STANDARD 11)
add_definitions(-DINCLUDE_CXX)
# list(APPEND RUN_MACRO -DINCLUDE_CXX)
endif(USE_CXX)
set(SOURCES_F
......@@ -90,34 +87,35 @@ if(USE_CXX)
set(HEADERS_C
includeC/sfx_data.h
)
list(APPEND HEADERS_DIRS includeC/)
set(SOURCES_CXX
srcCXX/model_base.cpp
srcCXX/sfx_esm.cpp
srcCXX/sfx_sheba.cpp
srcCXX/sfx_compute_sheba.cpp
srcCXX/sfx_call_class_func.cpp
)
set(HEADERS_CXX
includeCU/sfx_surface.cuh
includeCU/sfx_math.cuh
includeCU/sfx_esm_compute_subfunc.cuh
includeCU/sfx_model_compute_subfunc.cuh
includeCXX/model_base.h
includeCXX/sfx_esm.h
includeCXX/sfx_sheba.h
includeCXX/sfx_compute_sheba.h
includeCXX/sfx_call_class_func.h
)
list(APPEND HEADERS_DIRS includeCU/)
list(APPEND HEADERS_DIRS includeCXX/)
endif(USE_CXX)
if(INCLUDE_CUDA)
set(SOURCES_CU
srcCU/sfx_esm.cu
srcCU/sfx_compute_sheba.cu
srcCU/sfx_sheba.cu
)
set(HEADERS_CU
includeCU/sfx_compute_sheba.cuh
)
endif(INCLUDE_CUDA)
......@@ -142,23 +140,29 @@ endif(USE_CXX OR INCLUDE_CUDA)
set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_C} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F})
set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ")
if(USE_CXX OR INCLUDE_CUDA)
set(CMAKE_CXX_FLAGS " -g -Wunused-variable ")
set(CMAKE_C_FLAGS " -g ")
set(CMAKE_CUDA_FLAGS " -g ")
endif(USE_CXX OR INCLUDE_CUDA)
set(CMAKE_Fortran_FLAGS " -cpp ")
add_executable(sfx ${SOURCES})
set_property(TARGET sfx PROPERTY LINKER_LANGUAGE Fortran)
target_include_directories(sfx PUBLIC ${CMAKE_BINARY_DIR}/modules/)
target_include_directories(sfx PUBLIC ${HEADERS_DIRS})
if(USE_CONFIG_PARSER)
target_link_libraries(sfx config_parser_F config_parser_CXX)
endif(USE_CONFIG_PARSER)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
target_compile_options(sfx
PUBLIC $<$<COMPILE_LANGUAGE:C>: -g >)
target_compile_options(sfx
PUBLIC $<$<COMPILE_LANGUAGE:CXX>: -g >)
target_compile_options(sfx
PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: -g >)
target_compile_options(sfx
PUBLIC $<$<COMPILE_LANGUAGE:Fortran>: -g -fbacktrace -ffpe-trap=zero,overflow,underflow >)
endif()
# copy data & configs on post build
add_custom_command(
TARGET sfx POST_BUILD
......
......@@ -34,6 +34,34 @@ Option `COMPILER` enables set of compiler-specific options. Most common are `gnu
./sfx.exe
```
## Building with CMake
Currently there is an implementation of Cmake-based toolchain which allows to build the model quicker and with less hustle (minimum required cmake version is 3.19). Clone the project and load appropriate modules. Create a build directory outside of cloned projects:
```bash
mkdir build && cd build
```
The code supports fluxes computations on both CPU and GPU and the CPU implementation has two versions: Fortran and C++ implementation.
1. Building the Fortran CPU version:
```bash
cmake ..
```
2. Building the C++ CPU version:
```bash
cmake -DUSE_CXX=ON ..
```
3. Building the C++ GPU version:
```bash
cmake -DINCLUDE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=<N> ..
```
where `N` is the architecture to generate device code for.
## Running test cases
We have prepared several dataseta to make the test calculations. Data is prepared on the basis of measurements.
......
......@@ -60,6 +60,7 @@ extern "C" {
int surface_land;
int surface_lake;
float kappa;
float gamma_c;
float Re_visc_min;
float h_charnock;
......
#pragma once
template<typename T>
void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
#pragma once
#include "../includeCXX/sfx_template_parameters.h"
#include "sfx_template_parameters.h"
#include <cstddef>
namespace memproc
......
#pragma once
#include "../includeC/sfx_data.h"
#include "../includeCU/sfx_math.cuh"
#include "sfx_data.h"
#include "sfx_math.cuh"
template<typename T>
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
const sfx_param& param)
{
T psi_m, psi_h, f_m, f_h, c;
......@@ -29,10 +29,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim,
Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
}
template<typename T>
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
const sfx_param& param)
{
T Rib_coeff, psi0_m, psi0_h, phi, c;
......@@ -49,12 +49,12 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
}
template<typename T>
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
const sfx_esm_param& param,
const sfx_esm_numericsTypeC& numerics)
const sfx_param& param,
const int maxiters_convection)
{
T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
......@@ -63,7 +63,7 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &ze
zeta = zeta_conv_lim;
for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
for (int i = 1; i <= maxiters_convection + 1; i++)
{
zeta0_m = zeta / h0_m;
zeta0_h = zeta / h0_t;
......@@ -82,17 +82,17 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &ze
psi_m = f / f_m_conv_lim + p_m + a_m;
psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv;
if (i == numerics.maxiters_convection + 1)
if (i == maxiters_convection + 1)
break;
zeta = Rib * psi_m * psi_m / psi_h;
}
}
template<typename T>
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param)
const sfx_param& param)
{
zeta = 0.0;
psi_m = log(h0_m);
......@@ -101,11 +101,11 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
psi_h = psi_m / param.Pr_t_0_inv;
}
template<typename T>
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
const T Rib, const T h0_m, const T h0_t, const T B,
const sfx_esm_param& param,
const sfx_esm_numericsTypeC& numerics)
const sfx_param& param,
const int maxiters_convection)
{
T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
......@@ -117,7 +117,7 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h,
zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h;
for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
for (int i = 1; i <= maxiters_convection + 1; i++)
{
zeta0_m = zeta / h0_m;
zeta0_h = zeta / h0_t;
......@@ -136,9 +136,137 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h,
psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv;
if (i == numerics.maxiters_convection + 1)
if (i == maxiters_convection + 1)
break;
zeta = Rib * psi_m * psi_m / psi_h;
}
}
\ No newline at end of file
}
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
const T zeta,
const sfx_param& param)
{
T x_m, x_h;
T q_m, q_h;
if (zeta >= 0.0)
{
q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
q_h = sqrt(param.c_h * param.c_h - 4.0);
x_m = pow(1.0 + zeta, 1.0 / 3.0);
x_h = zeta;
psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h)));
}
else
{
x_m = pow(1.0 - param.alpha_m * zeta, 0.25);
x_h = pow(1.0 - param.alpha_h * zeta, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
const T zeta_m, const T zeta_h,
const sfx_param& param)
{
T x_m, x_h;
T q_m, q_h;
if (zeta_m >= 0.0)
{
q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
}
else
{
x_m = pow(1.0 - param.alpha_m * zeta_m, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
}
if (zeta_h >= 0.0)
{
q_h = sqrt(param.c_h * param.c_h - 4.0);
x_h = zeta_h;
psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h)));
}
else
{
x_h = pow(1.0 - param.alpha_h * zeta_h, 0.25);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T& zeta,
const T U, const T Tsemi, const T dT, const T dQ,
const T z, const T z0_m, const T z0_t,
const T beta,
const sfx_param& param,
const int maxiters)
{
const T gamma = T(0.61);
T psi_m, psi_h, psi0_m, psi0_h, Linv;
Udyn = param.kappa * U / log(z / z0_m);
Tdyn = param.kappa * dT * param.Pr_t_0_inv / log(z / z0_t);
Qdyn = param.kappa * dQ * param.Pr_t_0_inv / log(z / z0_t);
zeta = 0.0;
// --- no wind
if (Udyn < 1e-5)
return;
Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
// --- near neutral case
if (Linv < 1e-5)
return;
for (int i = 0; i < maxiters; i++)
{
get_psi(psi_m, psi_h, zeta, param);
get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv,
param);
Udyn = param.kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
Tdyn = param.kappa * dT * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
Qdyn = param.kappa * dQ * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
if (Udyn < 1e-5)
break;
Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
}
}
template<typename T, class sfx_param>
FUCNTION_DECLARATION_SPECIFIER void get_phi(T &phi_m, T &phi_h,
const T zeta,
const sfx_param& param)
{
if (zeta >= 0.0)
{
phi_m = 1.0 + (param.a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + param.b_m * zeta);
phi_h = 1.0 + (param.a_h * zeta + param.b_h * zeta * zeta) / (1.0 + param.c_h * zeta + zeta * zeta);
}
else
{
phi_m = pow(1.0 - param.alpha_m * zeta, -0.25);
phi_h = pow(1.0 - param.alpha_h * zeta, -0.5);
}
}
#pragma once
#include "../includeC/sfx_data.h"
#include "../includeCU/sfx_math.cuh"
#include "sfx_data.h"
#include "sfx_math.cuh"
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
......@@ -14,8 +14,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
// --- apply max restriction based on surface type
if (surface_type == param.surface_ocean) B = sfx_math::min(B, param.B_max_ocean);
if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_land);
if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_lake);
if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_lake);
if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_land);
// --- define roughness [thermal]
z0_t = z0_m / exp(B);
......@@ -23,30 +23,29 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
template<typename T>
FUCNTION_DECLARATION_SPECIFIER void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
const sfx_esm_param& model_param,
const T U, const T h,
const sfx_surface_param& surface_param,
const sfx_esm_numericsTypeC& numerics)
const int maxiters)
{
T Uc, a, b, c, c_min, f;
Uc = U;
a = 0.0;
b = 25.0;
c_min = log(surface_param.h_charnock) / model_param.kappa;
c_min = log(surface_param.h_charnock) / surface_param.kappa;
for (int i = 0; i < numerics.maxiters_charnock; i++)
for (int i = 0; i < maxiters; i++)
{
f = surface_param.c1_charnock - 2.0 * log(Uc);
for (int j = 0; j < numerics.maxiters_charnock; j++)
for (int j = 0; j < maxiters; j++)
{
c = (f + 2.0 * log(b)) / model_param.kappa;
c = (f + 2.0 * log(b)) / surface_param.kappa;
if (U <= 8.0e0)
a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / model_param.kappa;
a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / surface_param.kappa;
c = sfx_math::max(c - a, c_min);
b = c;
}
z0_m = surface_param.h_charnock * exp(-c * model_param.kappa);
z0_m = surface_param.h_charnock * exp(-c * surface_param.kappa);
z0_m = sfx_math::max(z0_m, T(0.000015e0));
Uc = U * log(surface_param.h_charnock / z0_m) / log(h / z0_m);
}
......
#pragma once
#include <cstddef>
#include "sfx_template_parameters.h"
#include "../includeC/sfx_data.h"
#include "sfx_data.h"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class ModelBase
......
#pragma once
#include "../includeC/sfx_data.h"
#include "sfx_data.h"
#ifdef __cplusplus
extern "C" {
......
#pragma once
template<typename T>
void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
#pragma once
#include "sfx_template_parameters.h"
#include "../includeCXX/model_base.h"
#include "../includeC/sfx_data.h"
#include "model_base.h"
#include "sfx_data.h"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class FluxEsmBase : public ModelBase<T, memIn, memOut, RunMem>
......
#pragma once
#include "sfx_template_parameters.h"
#include <cstddef>
#include "model_base.h"
#include "sfx_data.h"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class FluxSheba
class FluxShebaBase : public ModelBase<T, memIn, memOut, RunMem>
{
private:
T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m;
T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv;
public:
using ModelBase<T, memIn, memOut, RunMem>::res_sfx;
using ModelBase<T, memIn, memOut, RunMem>::sfx;
using ModelBase<T, memIn, memOut, RunMem>::meteo;
using ModelBase<T, memIn, memOut, RunMem>::grid_size;
using ModelBase<T, memIn, memOut, RunMem>::ifAllocated;
using ModelBase<T, memIn, memOut, RunMem>::allocated_size;
T kappa, Pr_t_0_inv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h,
Re_rough_min,
B1_rough, B2_rough,
B_max_land, B_max_ocean, B_max_lake,
gamma_c,
Re_visc_min,
Pr_m, nu_air, g;
sfx_surface_param surface_param;
sfx_phys_constants phys_constants;
sfx_sheba_param model_param;
sfx_sheba_numericsTypeC numerics;
int grid_size;
bool ifAllocated;
size_t allocated_size;
public:
FluxSheba();
void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g);
~FluxSheba();
FluxShebaBase(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param model_param,
const sfx_surface_param surface_param,
const sfx_sheba_numericsTypeC numerics,
const sfx_phys_constants phys_constants,
const int grid_size);
~FluxShebaBase();
};
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
class FluxSheba : public FluxShebaBase<T, memIn, memOut, RunMem>
{};
void compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_,
const int maxiters_charnock);
template<typename T, MemType memIn, MemType memOut >
class FluxSheba<T, memIn, memOut, MemType::CPU> : public FluxShebaBase<T, memIn, memOut, MemType::CPU>
{
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::res_sfx;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::sfx;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::meteo;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::surface_param;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::phys_constants;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::grid_size;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::allocated_size;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::model_param;
using FluxShebaBase<T, memIn, memOut, MemType::CPU>::numerics;
public:
FluxSheba(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param model_param,
const sfx_surface_param surface_param,
const sfx_sheba_numericsTypeC numerics,
const sfx_phys_constants phys_constants,
const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, model_param,
surface_param, numerics, phys_constants, grid_size) {}
~FluxSheba() = default;
void compute_flux();
};
private:
void set_data( T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_);
};
\ No newline at end of file
#ifdef INCLUDE_CUDA
template<typename T, MemType memIn, MemType memOut >
class FluxSheba<T, memIn, memOut, MemType::GPU> : public FluxShebaBase<T, memIn, memOut, MemType::GPU>
{
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::res_sfx;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::sfx;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::meteo;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::surface_param;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::phys_constants;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::grid_size;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::allocated_size;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::model_param;
using FluxShebaBase<T, memIn, memOut, MemType::GPU>::numerics;
public:
FluxSheba(sfxDataVecTypeC* sfx,
meteoDataVecTypeC* meteo,
const sfx_sheba_param model_param,
const sfx_surface_param surface_param,
const sfx_sheba_numericsTypeC numerics,
const sfx_phys_constants phys_constants,
const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, model_param,
surface_param, numerics, phys_constants, grid_size) {}
~FluxSheba() = default;
void compute_flux();
};
#endif
\ No newline at end of file
#include <stdlib.h>
#include <stdio.h>
#include "../includeCXX/sfx_call_class_func.h"
#include "sfx_call_class_func.h"
// #include "../includeC/sfx_call_cxx.h"
// -------------------------------------------------------------------------- //
......
#include <iostream>
#include "../includeCU/sfx_compute_sheba.cuh"
#include "../includeCU/sfx_surface.cuh"
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
template<typename T>
__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
const T h, const T U,
const T kappa,
const T h_charnock, const T c1_charnock, const T c2_charnock,
const int maxiters)
{
T Uc, a, b, c, c_min, f;
Uc = U;
a = 0.0;
b = 25.0;
c_min = log(h_charnock) / kappa;
for (int i = 0; i < maxiters; i++)
{
f = c1_charnock - 2.0 * log(Uc);
for (int j = 0; j < maxiters; j++)
{
c = (f + 2.0 * log(b)) / kappa;
if (U <= 8.0e0)
a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
c = max(c - a, c_min);
b = c;
}
z0_m = h_charnock * exp(-c * kappa);
z0_m = max(z0_m, T(0.000015e0));
Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
}
u_dyn0 = Uc / c;
}
template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0,
const float h, const float U,
const float kappa,
const float h_charnock, const float c1_charnock, const float c2_charnock,
const int maxiters);
template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0,
const double h, const double U,
const double kappa,
const double h_charnock, const double c1_charnock, const double c2_charnock,
const int maxiters);
template<typename T>
__device__ void get_thermal_roughness(T &z0_t, T &B,
const T z0_m, const T Re,
const T Re_rough_min,
const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough,
const T B_max_ocean, const T B_max_lake, const T B_max_land,
const int surface_type)
{
// --- define B = log(z0_m / z0_t)
if (Re <= Re_rough_min)
B = B1_rough * log(B3_rough * Re) + B2_rough;
else
// *: B4 takes into account Re value at z' ~ O(10) z0
B = B4_rough * (pow(Re, B2_rough));
// --- apply max restriction based on surface type
if (surface_type == 0)
B = min(B, B_max_ocean);
else if (surface_type == 2)
B = min(B, B_max_lake);
else if (surface_type == 1)
B = min(B, B_max_land);
// --- define roughness [thermal]
z0_t = z0_m / exp(B);
}
template __device__ void get_thermal_roughness(float &z0_t, float &B,
const float z0_m, const float Re,
const float Re_rough_min,
const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough,
const float B_max_ocean, const float B_max_lake, const float B_max_land,
const int surface_type);
template __device__ void get_thermal_roughness(double &z0_t, double &B,
const double z0_m, const double Re,
const double Re_rough_min,
const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough,
const double B_max_ocean, const double B_max_lake, const double B_max_land,
const int surface_type);
template<typename T>
__device__ void get_psi_mh(T &psi_m, T &psi_h,
const T zeta_m, const T zeta_h,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta_m >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
}
else
{ x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
}
if (zeta_h >= 0.0)
{
q_h = sqrt(c_h * c_h - 4.0);
x_h = zeta_h;
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template __device__ void get_psi_mh(float &psi_m, float &psi_h,
const float zeta_m, const float zeta_h,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_psi_mh(double &psi_m, double &psi_h,
const double zeta_m, const double zeta_h,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__device__ void get_psi(T &psi_m, T &psi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
q_h = sqrt(c_h * c_h - 4.0);
x_m = pow(1.0 + zeta, 1.0 / 3.0);
x_h = zeta;
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_m = pow(1.0 - alpha_m * zeta, 0.25);
x_h = pow(1.0 - alpha_h * zeta, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template __device__ void get_psi(float &psi_m, float &psi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_psi(double &psi_m, double &psi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const int maxiters)
{
T psi_m, psi_h, psi0_m, psi0_h, Linv;
const T gamma = 0.61;
Udyn = kappa * U / log(z / z0_m);
Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
zeta = 0.0;
// --- no wind
if (Udyn < 1e-5)
return;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
// --- near neutral case
if (Linv < 1e-5)
return;
for (int i = 0; i < maxiters; i++)
{
get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
if (Udyn < 1e-5)
break;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
}
}
template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const int maxiters);
template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const int maxiters);
template<typename T>
__device__ void get_phi(T &phi_m, T &phi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
if (zeta >= 0.0)
{
phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
}
else
{
phi_m = pow(1.0 - alpha_m * zeta, -0.25);
phi_h = pow(1.0 - alpha_h * zeta, -0.5);
}
}
template __device__ void get_phi(float &phi_m, float &phi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template __device__ void get_phi(double &phi_m, double &phi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size)
{
const int index = blockIdx.x * blockDim.x + threadIdx.x;
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
Km, Pr_t_inv, Cm, Ct;
const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
int surface_type;
if(index < grid_size)
{
U = U_[index];
Tsemi = Tsemi_[index];
dT = dT_[index];
dQ = dQ_[index];
h = h_[index];
z0_m = in_z0_m_[index];
if (z0_m < 0.0) surface_type = 0;
else surface_type = 1;
if (surface_type == 0)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == 1)
{
h0_m = h / z0_m;
u_dyn0 = U * kappa / log(h0_m);
}
Re = u_dyn0 * z0_m / nu_air;
get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
// --- define relative height [thermal]
h0_t = h / z0_t;
// --- define Ri-bulk
Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
// --- get the fluxes
// ----------------------------------------------------------------------------
get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
// ----------------------------------------------------------------------------
get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
// ----------------------------------------------------------------------------
// --- define transfer coeff. (momentum) & (heat)
Cm = 0.0;
if (U > 0.0)
Cm = Udyn / U;
Ct = 0.0;
if (fabs(dT) > 0.0)
Ct = Tdyn / dT;
// --- define eddy viscosity & inverse Prandtl number
Km = kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
zeta_[index] = zeta;
Rib_[index] = Rib;
Re_[index] = Re;
B_[index] = B;
z0_m_[index] = z0_m;
z0_t_[index] = z0_t;
Rib_conv_lim_[index] = 0.0;
Cm_[index] = Cm;
Ct_[index] = Ct;
Km_[index] = Km;
Pr_t_inv_[index] = Pr_t_inv;
}
}
template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const float Re_rough_min,
const float B1_rough, const float B2_rough,
const float B_max_land, const float B_max_ocean, const float B_max_lake,
const float gamma_c, const float Re_visc_min,
const float Pr_m, const float nu_air, const float g,
const int maxiters_charnock,
const int grid_size);
template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const double Re_rough_min,
const double B1_rough, const double B2_rough,
const double B_max_land, const double B_max_ocean, const double B_max_lake,
const double gamma_c, const double Re_visc_min,
const double Pr_m, const double nu_air, const double g,
const int maxiters_charnock,
const int grid_size);
template<typename T>
void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size)
{
const int BlockCount = int(ceil(float(grid_size) / 512.0));
dim3 cuBlock = dim3(512, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1);
kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
kappa, Pr_t_0_inv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h,
Re_rough_min,
B1_rough, B2_rough,
B_max_land, B_max_ocean, B_max_lake,
gamma_c, Re_visc_min,
Pr_m, nu_air, g,
maxiters_charnock,
grid_size);
gpuErrchk( cudaPeekAtLastError() );
}
template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const float Re_rough_min,
const float B1_rough, const float B2_rough,
const float B_max_land, const float B_max_ocean, const float B_max_lake,
const float gamma_c, const float Re_visc_min,
const float Pr_m, const float nu_air, const float g,
const int maxiters_charnock,
const int grid_size);
template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const double Re_rough_min,
const double B1_rough, const double B2_rough,
const double B_max_land, const double B_max_ocean, const double B_max_lake,
const double gamma_c, const double Re_visc_min,
const double Pr_m, const double nu_air, const double g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "../includeCXX/sfx_esm.h"
#include "../includeCU/sfx_esm_compute_subfunc.cuh"
#include "../includeCU/sfx_surface.cuh"
#include "../includeCU/sfx_memory_processing.cuh"
#include "sfx_esm.h"
#include "sfx_model_compute_subfunc.cuh"
#include "sfx_surface.cuh"
#include "sfx_memory_processing.cuh"
namespace sfx_kernel
{
......@@ -46,7 +46,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, h, U, model_param, surface_param, numerics);
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
......@@ -77,7 +77,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
}
else if (Rib < Rib_conv_lim)
{
get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics);
get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics.maxiters_convection);
fval = pow(zeta_conv_lim / zeta, 1.0/3.0);
phi_m = fval / f_m_conv_lim;
......@@ -92,7 +92,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
}
else
{
get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics);
get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics.maxiters_convection);
phi_m = pow(1.0 - model_param.alpha_m * zeta, -0.25);
phi_h = 1.0 / (model_param.Pr_t_0_inv * sqrt(1.0 - model_param.alpha_h_fix * zeta));
......
#include "../includeCU/sfx_memory_processing.cuh"
#include "sfx_memory_processing.cuh"
#include <cuda.h>
#include <cuda_runtime_api.h>
......
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "sfx_sheba.h"
#include "sfx_model_compute_subfunc.cuh"
#include "sfx_surface.cuh"
#include "sfx_memory_processing.cuh"
namespace sfx_kernel
{
template<typename T>
__global__ void compute_flux(sfxDataVecTypeC sfx,
meteoDataVecTypeC meteo,
const sfx_sheba_param model_param,
const sfx_surface_param surface_param,
const sfx_sheba_numericsTypeC numerics,
const sfx_phys_constants phys_constants,
const int grid_size);
}
template<typename T>
__global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
meteoDataVecTypeC meteo,
const sfx_sheba_param model_param,
const sfx_surface_param surface_param,
const sfx_sheba_numericsTypeC numerics,
const sfx_phys_constants phys_constants,
const int grid_size)
{
const int index = blockIdx.x * blockDim.x + threadIdx.x;
T h, U, dT, Tsemi, dQ, z0_m;
T z0_t, B, h0_m, h0_t, u_dyn0, Re,
zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
Km, Pr_t_inv, Cm, Ct;
int surface_type;
if(index < grid_size)
{
U = meteo.U[index];
Tsemi = meteo.Tsemi[index];
dT = meteo.dT[index];
dQ = meteo.dQ[index];
h = meteo.h[index];
z0_m = meteo.z0_m[index];
surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land;
if (surface_type == surface_param.surface_ocean)
{
get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
h0_m = h / z0_m;
}
if (surface_type == surface_param.surface_land)
{
h0_m = h / z0_m;
u_dyn0 = U * model_param.kappa / log(h0_m);
}
Re = u_dyn0 * z0_m / phys_constants.nu_air;
get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type);
// --- define relative height [thermal]
h0_t = h / z0_t;
// --- define Ri-bulk
Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
// --- get the fluxes
// ----------------------------------------------------------------------------
get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta,
U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10);
// ----------------------------------------------------------------------------
get_phi(phi_m, phi_h, zeta, model_param);
// ----------------------------------------------------------------------------
// --- define transfer coeff. (momentum) & (heat)
Cm = 0.0;
if (U > 0.0)
Cm = Udyn / U;
Ct = 0.0;
if (fabs(dT) > 0.0)
Ct = Tdyn / dT;
// --- define eddy viscosity & inverse Prandtl number
Km = model_param.kappa * Cm * U * h / phi_m;
Pr_t_inv = phi_m / phi_h;
sfx.zeta[index] = zeta;
sfx.Rib[index] = Rib;
sfx.Re[index] = Re;
sfx.B[index] = B;
sfx.z0_m[index] = z0_m;
sfx.z0_t[index] = z0_t;
sfx.Rib_conv_lim[index] = T(0.0);
sfx.Cm[index] = Cm;
sfx.Ct[index] = Ct;
sfx.Km[index] = Km;
sfx.Pr_t_inv[index] = Pr_t_inv;
}
}
template<typename T, MemType memIn, MemType memOut >
void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
{
const int BlockCount = int(ceil(float(grid_size) / 1024.0));
dim3 cuBlock = dim3(1024, 1, 1);
dim3 cuGrid = dim3(BlockCount, 1, 1);
sfx_kernel::compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model_param,
surface_param, numerics, phys_constants, grid_size);
if(MemType::GPU != memOut)
{
const size_t new_size = grid_size * sizeof(T);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
}
}
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>;
template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::GPU>;
template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>;
template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>;
template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>;
\ No newline at end of file
#include "../includeCXX/model_base.h"
#include "model_base.h"
#ifdef INCLUDE_CUDA
#include "../includeCU/sfx_memory_processing.cuh"
#include "sfx_memory_processing.cuh"
#endif
#include "../includeCXX/sfx_memory_processing.h"
#include "sfx_memory_processing.h"
template<typename T, MemType memIn, MemType memOut, MemType RunMem >
ModelBase<T, memIn, memOut, RunMem>::ModelBase(sfxDataVecTypeC* sfx_in,
......
#include <stdlib.h>
#include <stdio.h>
#include "../includeCXX/sfx_call_class_func.h"
#include "../includeCXX/sfx_esm.h"
#include "../includeCXX/sfx_sheba.h"
#include "sfx_call_class_func.h"
#include "sfx_esm.h"
#include "sfx_sheba.h"
#include <vector>
// -------------------------------------------------------------------------- //
......
#include <cmath>
#include <iostream>
#include "../includeCXX/sfx_compute_sheba.h"
// #include "../includeCXX/sfx_surface.h"
template<typename T>
void get_psi_mh(T &psi_m, T &psi_h,
const T zeta_m, const T zeta_h,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta_m >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
}
else
{ x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
}
if (zeta_h >= 0.0)
{
q_h = sqrt(c_h * c_h - 4.0);
x_h = zeta_h;
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template void get_psi_mh(float &psi_m, float &psi_h,
const float zeta_m, const float zeta_h,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template void get_psi_mh(double &psi_m, double &psi_h,
const double zeta_m, const double zeta_h,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
void get_psi(T &psi_m, T &psi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
T x_m, x_h;
T q_m, q_h;
if (zeta >= 0.0)
{
q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
q_h = sqrt(c_h * c_h - 4.0);
x_m = pow(1.0 + zeta, 1.0 / 3.0);
x_h = zeta;
psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
}
else
{
x_m = pow(1.0 - alpha_m * zeta, 0.25);
x_h = pow(1.0 - alpha_h * zeta, 0.25);
psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
}
}
template void get_psi(float &psi_m, float &psi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template void get_psi(double &psi_m, double &psi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const int maxiters)
{
T psi_m, psi_h, psi0_m, psi0_h, Linv;
const T gamma = 0.61;
Udyn = kappa * U / log(z / z0_m);
Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
zeta = 0.0;
// --- no wind
if (Udyn < 1e-5)
return;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
// --- near neutral case
if (Linv < 1e-5)
return;
for (int i = 0; i < maxiters; i++)
{
get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv,
alpha_m, alpha_h,
a_m, a_h,
b_m, b_h,
c_h);
Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
if (Udyn < 1e-5)
break;
Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
zeta = z * Linv;
}
}
template void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const int maxiters);
template void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const int maxiters);
template<typename T>
void get_phi(T &phi_m, T &phi_h,
const T zeta,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h)
{
if (zeta >= 0.0)
{
phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
}
else
{
phi_m = pow(1.0 - alpha_m * zeta, -0.25);
phi_h = pow(1.0 - alpha_h * zeta, -0.5);
}
}
template void get_phi(float &phi_m, float &phi_h,
const float zeta,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h);
template void get_phi(double &phi_m, double &phi_h,
const double zeta,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h);
template<typename T>
void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
const T kappa, const T Pr_t_0_inv,
const T alpha_m, const T alpha_h,
const T a_m, const T a_h,
const T b_m, const T b_h,
const T c_h,
const T Re_rough_min,
const T B1_rough, const T B2_rough,
const T B_max_land, const T B_max_ocean, const T B_max_lake,
const T gamma_c, const T Re_visc_min,
const T Pr_m, const T nu_air, const T g,
const int maxiters_charnock,
const int grid_size)
{
// T h, U, dT, Tsemi, dQ, z0_m;
// T z0_t, B, h0_m, h0_t, u_dyn0, Re,
// zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
// Km, Pr_t_inv, Cm, Ct;
// const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
// const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
// int surface_type;
// for (int step = 0; step < grid_size; step++)
// {
// U = U_[step];
// Tsemi = Tsemi_[step];
// dT = dT_[step];
// dQ = dQ_[step];
// h = h_[step];
// z0_m = in_z0_m_[step];
// if (z0_m < 0.0) surface_type = 0;
// else surface_type = 1;
// if (surface_type == 0)
// {
// get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
// h0_m = h / z0_m;
// }
// if (surface_type == 1)
// {
// h0_m = h / z0_m;
// u_dyn0 = U * kappa / log(h0_m);
// }
// Re = u_dyn0 * z0_m / nu_air;
// get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
// // --- define relative height [thermal]
// h0_t = h / z0_t;
// // --- define Ri-bulk
// Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
// // --- get the fluxes
// // ----------------------------------------------------------------------------
// get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
// // ----------------------------------------------------------------------------
// get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
// // ----------------------------------------------------------------------------
// // --- define transfer coeff. (momentum) & (heat)
// Cm = 0.0;
// if (U > 0.0)
// Cm = Udyn / U;
// Ct = 0.0;
// if (fabs(dT) > 0.0)
// Ct = Tdyn / dT;
// // --- define eddy viscosity & inverse Prandtl number
// Km = kappa * Cm * U * h / phi_m;
// Pr_t_inv = phi_m / phi_h;
// zeta_[step] = zeta;
// Rib_[step] = Rib;
// Re_[step] = Re;
// B_[step] = B;
// z0_m_[step] = z0_m;
// z0_t_[step] = z0_t;
// Rib_conv_lim_[step] = 0.0;
// Cm_[step] = Cm;
// Ct_[step] = Ct;
// Km_[step] = Km;
// Pr_t_inv_[step] = Pr_t_inv;
// }
}
template void compute_flux_sheba_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
const float kappa, const float Pr_t_0_inv,
const float alpha_m, const float alpha_h,
const float a_m, const float a_h,
const float b_m, const float b_h,
const float c_h,
const float Re_rough_min,
const float B1_rough, const float B2_rough,
const float B_max_land, const float B_max_ocean, const float B_max_lake,
const float gamma_c, const float Re_visc_min,
const float Pr_m, const float nu_air, const float g,
const int maxiters_charnock,
const int grid_size);
template void compute_flux_sheba_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m,
const double kappa, const double Pr_t_0_inv,
const double alpha_m, const double alpha_h,
const double a_m, const double a_h,
const double b_m, const double b_h,
const double c_h,
const double Re_rough_min,
const double B1_rough, const double B2_rough,
const double B_max_land, const double B_max_ocean, const double B_max_lake,
const double gamma_c, const double Re_visc_min,
const double Pr_m, const double nu_air, const double g,
const int maxiters_charnock,
const int grid_size);
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment