diff --git a/CMakeLists.txt b/CMakeLists.txt index df461be3016ff213e48b8ba55601d301bb048d0e..492fe28ef1824427466a0ac32323835754271a37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,8 +32,6 @@ set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules) if(USE_CONFIG_PARSER) add_subdirectory(config-parser/) add_definitions(-DUSE_CONFIG_PARSER) - # list(APPEND RUN_MACRO -DUSE_CONFIG_PARSER) - set(USE_CXX ON) endif(USE_CONFIG_PARSER) if(INCLUDE_CUDA) @@ -55,7 +53,6 @@ if(USE_CXX) enable_language(CXX) set(CMAKE_CXX_STANDARD 11) add_definitions(-DINCLUDE_CXX) - # list(APPEND RUN_MACRO -DINCLUDE_CXX) endif(USE_CXX) set(SOURCES_F @@ -90,34 +87,35 @@ if(USE_CXX) set(HEADERS_C includeC/sfx_data.h ) + list(APPEND HEADERS_DIRS includeC/) set(SOURCES_CXX srcCXX/model_base.cpp srcCXX/sfx_esm.cpp srcCXX/sfx_sheba.cpp - srcCXX/sfx_compute_sheba.cpp srcCXX/sfx_call_class_func.cpp ) set(HEADERS_CXX includeCU/sfx_surface.cuh includeCU/sfx_math.cuh - includeCU/sfx_esm_compute_subfunc.cuh + includeCU/sfx_model_compute_subfunc.cuh includeCXX/model_base.h includeCXX/sfx_esm.h includeCXX/sfx_sheba.h - includeCXX/sfx_compute_sheba.h includeCXX/sfx_call_class_func.h ) + list(APPEND HEADERS_DIRS includeCU/) + list(APPEND HEADERS_DIRS includeCXX/) endif(USE_CXX) if(INCLUDE_CUDA) set(SOURCES_CU srcCU/sfx_esm.cu - srcCU/sfx_compute_sheba.cu + srcCU/sfx_sheba.cu ) set(HEADERS_CU - includeCU/sfx_compute_sheba.cuh + ) endif(INCLUDE_CUDA) @@ -142,23 +140,29 @@ endif(USE_CXX OR INCLUDE_CUDA) set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_C} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F}) -set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ") - -if(USE_CXX OR INCLUDE_CUDA) - set(CMAKE_CXX_FLAGS " -g -Wunused-variable ") - set(CMAKE_C_FLAGS " -g ") - - set(CMAKE_CUDA_FLAGS " -g ") -endif(USE_CXX OR INCLUDE_CUDA) +set(CMAKE_Fortran_FLAGS " -cpp ") add_executable(sfx ${SOURCES}) set_property(TARGET sfx PROPERTY LINKER_LANGUAGE Fortran) target_include_directories(sfx PUBLIC ${CMAKE_BINARY_DIR}/modules/) +target_include_directories(sfx PUBLIC ${HEADERS_DIRS}) if(USE_CONFIG_PARSER) target_link_libraries(sfx config_parser_F config_parser_CXX) endif(USE_CONFIG_PARSER) +if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + target_compile_options(sfx + PUBLIC $<$<COMPILE_LANGUAGE:C>: -g >) + target_compile_options(sfx + PUBLIC $<$<COMPILE_LANGUAGE:CXX>: -g >) + target_compile_options(sfx + PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: -g >) + target_compile_options(sfx + PUBLIC $<$<COMPILE_LANGUAGE:Fortran>: -g -fbacktrace -ffpe-trap=zero,overflow,underflow >) +endif() + + # copy data & configs on post build add_custom_command( TARGET sfx POST_BUILD diff --git a/README.md b/README.md index ae3d0d2f263628c2951d62ed68e08946b30ed933..1a25762ec162bd7c23e5d78c17436e973caf997a 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,34 @@ Option `COMPILER` enables set of compiler-specific options. Most common are `gnu ./sfx.exe ``` +## Building with CMake + +Currently there is an implementation of Cmake-based toolchain which allows to build the model quicker and with less hustle (minimum required cmake version is 3.19). Clone the project and load appropriate modules. Create a build directory outside of cloned projects: + +```bash +mkdir build && cd build +``` + +The code supports fluxes computations on both CPU and GPU and the CPU implementation has two versions: Fortran and C++ implementation. + +1. Building the Fortran CPU version: +```bash +cmake .. +``` + +2. Building the C++ CPU version: +```bash +cmake -DUSE_CXX=ON .. +``` + +3. Building the C++ GPU version: +```bash +cmake -DINCLUDE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=<N> .. +``` +where `N` is the architecture to generate device code for. + + + ## Running test cases We have prepared several dataseta to make the test calculations. Data is prepared on the basis of measurements. diff --git a/includeC/sfx_data.h b/includeC/sfx_data.h index d1bf113c04df7116ac93176c39702f86037fc048..dba42c80adea9bbcfc6fe5292abf65f153396e01 100644 --- a/includeC/sfx_data.h +++ b/includeC/sfx_data.h @@ -60,6 +60,7 @@ extern "C" { int surface_land; int surface_lake; + float kappa; float gamma_c; float Re_visc_min; float h_charnock; diff --git a/includeCU/sfx_compute_sheba.cuh b/includeCU/sfx_compute_sheba.cuh deleted file mode 100644 index acd67d6ec11ad91aeef3f7291e4c283a2c9e9a2d..0000000000000000000000000000000000000000 --- a/includeCU/sfx_compute_sheba.cuh +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -template<typename T> -void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g, - const int maxiters_charnock, - const int grid_size); \ No newline at end of file diff --git a/includeCU/sfx_esm_compute_subfunc.cuh b/includeCU/sfx_esm_compute_subfunc.cuh deleted file mode 100644 index 46812fa9320532355569ec82040a6be9f3a900f5..0000000000000000000000000000000000000000 --- a/includeCU/sfx_esm_compute_subfunc.cuh +++ /dev/null @@ -1,144 +0,0 @@ -#pragma once -#include "../includeC/sfx_data.h" -#include "../includeCU/sfx_math.cuh" - -template<typename T> -FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim, - const T h0_m, const T h0_t, const T B, - const sfx_esm_param& param) -{ - T psi_m, psi_h, f_m, f_h, c; - - c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4); - zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - - sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h); - - f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25); - f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim); - - f_m = zeta_lim / h0_m; - f_h = zeta_lim / h0_t; - if (fabs(B) < 1.0e-10) f_h = f_m; - - f_m = pow(1.0 - param.alpha_m * f_m, 0.25); - f_h = sqrt(1.0 - param.alpha_h_fix * f_h); - - psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0))); - psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv; - - Rib_lim = zeta_lim * psi_h / (psi_m * psi_m); -} - -template<typename T> -FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta, - const T Rib, const T h0_m, const T h0_t, const T B, - const sfx_esm_param& param) -{ - T Rib_coeff, psi0_m, psi0_h, phi, c; - - psi0_m = log(h0_m); - psi0_h = B / psi0_m; - - Rib_coeff = param.beta_m * Rib; - c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff; - zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff)); - - phi = param.beta_m * zeta; - - psi_m = psi0_m + phi; - psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi; -} - -template<typename T> -FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, - const T Rib, const T h0_m, const T h0_t, const T B, - const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim, - const sfx_esm_param& param, - const sfx_esm_numericsTypeC& numerics) -{ - T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f; - - p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0)); - p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0)); - - zeta = zeta_conv_lim; - - for (int i = 1; i <= numerics.maxiters_convection + 1; i++) - { - zeta0_m = zeta / h0_m; - zeta0_h = zeta / h0_t; - if (fabs(B) < 1.0e-10) - zeta0_h = zeta0_m; - - f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25); - f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h); - - a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0)); - a_h = log((f0_h + 1.0)/(f0_h - 1.0)); - - c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0); - f = 3.0 * (1.0 - c_lim); - - psi_m = f / f_m_conv_lim + p_m + a_m; - psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv; - - if (i == numerics.maxiters_convection + 1) - break; - - zeta = Rib * psi_m * psi_m / psi_h; - } -} - -template<typename T> -FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta, - const T h0_m, const T h0_t, const T B, - const sfx_esm_param& param) -{ - zeta = 0.0; - psi_m = log(h0_m); - psi_h = log(h0_t) / param.Pr_t_0_inv; - if (fabs(B) < 1.0e-10) - psi_h = psi_m / param.Pr_t_0_inv; -} - -template<typename T> -FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta, - const T Rib, const T h0_m, const T h0_t, const T B, - const sfx_esm_param& param, - const sfx_esm_numericsTypeC& numerics) -{ - T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h; - - psi_m = log(h0_m); - psi_h = log(h0_t); - - if (fabs(B) < 1.0e-10) - psi_h = psi_m; - - zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h; - - for (int i = 1; i <= numerics.maxiters_convection + 1; i++) - { - zeta0_m = zeta / h0_m; - zeta0_h = zeta / h0_t; - if (fabs(B) < 1.0e-10) - zeta0_h = zeta0_m; - - f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0); - f_h = sqrt(1.0 - param.alpha_h_fix * zeta); - - f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0); - f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h); - - f0_m = sfx_math::max(f0_m, T(1.000001e0)); - f0_h = sfx_math::max(f0_h, T(1.000001e0)); - - psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m)); - psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv; - - if (i == numerics.maxiters_convection + 1) - break; - - zeta = Rib * psi_m * psi_m / psi_h; - } -} \ No newline at end of file diff --git a/includeCU/sfx_memory_processing.cuh b/includeCU/sfx_memory_processing.cuh index c68c858db0764633b37d9cfce0ee0f92bd1d1d69..2e68b1402dcfd009ef694a22730946e0728d4729 100644 --- a/includeCU/sfx_memory_processing.cuh +++ b/includeCU/sfx_memory_processing.cuh @@ -1,5 +1,5 @@ #pragma once -#include "../includeCXX/sfx_template_parameters.h" +#include "sfx_template_parameters.h" #include <cstddef> namespace memproc diff --git a/includeCU/sfx_model_compute_subfunc.cuh b/includeCU/sfx_model_compute_subfunc.cuh new file mode 100644 index 0000000000000000000000000000000000000000..ecc53f20a1889bfe7024ce652b070c610090cfd0 --- /dev/null +++ b/includeCU/sfx_model_compute_subfunc.cuh @@ -0,0 +1,272 @@ +#pragma once +#include "sfx_data.h" +#include "sfx_math.cuh" + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim, + const T h0_m, const T h0_t, const T B, + const sfx_param& param) +{ + T psi_m, psi_h, f_m, f_h, c; + + c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4); + zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - + sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h); + + f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25); + f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim); + + f_m = zeta_lim / h0_m; + f_h = zeta_lim / h0_t; + if (fabs(B) < 1.0e-10) f_h = f_m; + + f_m = pow(1.0 - param.alpha_m * f_m, 0.25); + f_h = sqrt(1.0 - param.alpha_h_fix * f_h); + + psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0))); + psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv; + + Rib_lim = zeta_lim * psi_h / (psi_m * psi_m); +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta, + const T Rib, const T h0_m, const T h0_t, const T B, + const sfx_param& param) +{ + T Rib_coeff, psi0_m, psi0_h, phi, c; + + psi0_m = log(h0_m); + psi0_h = B / psi0_m; + + Rib_coeff = param.beta_m * Rib; + c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff; + zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff)); + + phi = param.beta_m * zeta; + + psi_m = psi0_m + phi; + psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi; +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, + const T Rib, const T h0_m, const T h0_t, const T B, + const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim, + const sfx_param& param, + const int maxiters_convection) +{ + T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f; + + p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0)); + p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0)); + + zeta = zeta_conv_lim; + + for (int i = 1; i <= maxiters_convection + 1; i++) + { + zeta0_m = zeta / h0_m; + zeta0_h = zeta / h0_t; + if (fabs(B) < 1.0e-10) + zeta0_h = zeta0_m; + + f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25); + f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h); + + a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0)); + a_h = log((f0_h + 1.0)/(f0_h - 1.0)); + + c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0); + f = 3.0 * (1.0 - c_lim); + + psi_m = f / f_m_conv_lim + p_m + a_m; + psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv; + + if (i == maxiters_convection + 1) + break; + + zeta = Rib * psi_m * psi_m / psi_h; + } +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta, + const T h0_m, const T h0_t, const T B, + const sfx_param& param) +{ + zeta = 0.0; + psi_m = log(h0_m); + psi_h = log(h0_t) / param.Pr_t_0_inv; + if (fabs(B) < 1.0e-10) + psi_h = psi_m / param.Pr_t_0_inv; +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta, + const T Rib, const T h0_m, const T h0_t, const T B, + const sfx_param& param, + const int maxiters_convection) +{ + T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h; + + psi_m = log(h0_m); + psi_h = log(h0_t); + + if (fabs(B) < 1.0e-10) + psi_h = psi_m; + + zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h; + + for (int i = 1; i <= maxiters_convection + 1; i++) + { + zeta0_m = zeta / h0_m; + zeta0_h = zeta / h0_t; + if (fabs(B) < 1.0e-10) + zeta0_h = zeta0_m; + + f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0); + f_h = sqrt(1.0 - param.alpha_h_fix * zeta); + + f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0); + f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h); + + f0_m = sfx_math::max(f0_m, T(1.000001e0)); + f0_h = sfx_math::max(f0_h, T(1.000001e0)); + + psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m)); + psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv; + + if (i == maxiters_convection + 1) + break; + + zeta = Rib * psi_m * psi_m / psi_h; + } +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h, + const T zeta, + const sfx_param& param) +{ + T x_m, x_h; + T q_m, q_h; + + if (zeta >= 0.0) + { + q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0); + q_h = sqrt(param.c_h * param.c_h - 4.0); + + x_m = pow(1.0 + zeta, 1.0 / 3.0); + x_h = zeta; + + psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); + + psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h))); + } + else + { + x_m = pow(1.0 - param.alpha_m * zeta, 0.25); + x_h = pow(1.0 - param.alpha_h * zeta, 0.25); + + psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); + psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); + } +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h, + const T zeta_m, const T zeta_h, + const sfx_param& param) +{ + T x_m, x_h; + T q_m, q_h; + + if (zeta_m >= 0.0) + { + q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0); + x_m = pow(1.0 + zeta_m, 1.0 / 3.0); + + psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); + } + else + { + x_m = pow(1.0 - param.alpha_m * zeta_m, 0.25); + psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); + } + + if (zeta_h >= 0.0) + { + q_h = sqrt(param.c_h * param.c_h - 4.0); + x_h = zeta_h; + + psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h))); + } + else + { + x_h = pow(1.0 - param.alpha_h * zeta_h, 0.25); + psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); + } +} + + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T& zeta, + const T U, const T Tsemi, const T dT, const T dQ, + const T z, const T z0_m, const T z0_t, + const T beta, + const sfx_param& param, + const int maxiters) +{ + const T gamma = T(0.61); + T psi_m, psi_h, psi0_m, psi0_h, Linv; + + Udyn = param.kappa * U / log(z / z0_m); + Tdyn = param.kappa * dT * param.Pr_t_0_inv / log(z / z0_t); + Qdyn = param.kappa * dQ * param.Pr_t_0_inv / log(z / z0_t); + zeta = 0.0; + + // --- no wind + if (Udyn < 1e-5) + return; + + Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); + zeta = z * Linv; + + // --- near neutral case + if (Linv < 1e-5) + return; + + for (int i = 0; i < maxiters; i++) + { + get_psi(psi_m, psi_h, zeta, param); + get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, + param); + + Udyn = param.kappa * U / (log(z / z0_m) - (psi_m - psi0_m)); + Tdyn = param.kappa * dT * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); + Qdyn = param.kappa * dQ * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); + + if (Udyn < 1e-5) + break; + + Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); + zeta = z * Linv; + } +} + +template<typename T, class sfx_param> +FUCNTION_DECLARATION_SPECIFIER void get_phi(T &phi_m, T &phi_h, + const T zeta, + const sfx_param& param) +{ + if (zeta >= 0.0) + { + phi_m = 1.0 + (param.a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + param.b_m * zeta); + phi_h = 1.0 + (param.a_h * zeta + param.b_h * zeta * zeta) / (1.0 + param.c_h * zeta + zeta * zeta); + } + else + { + phi_m = pow(1.0 - param.alpha_m * zeta, -0.25); + phi_h = pow(1.0 - param.alpha_h * zeta, -0.5); + } +} diff --git a/includeCU/sfx_surface.cuh b/includeCU/sfx_surface.cuh index 2344f41c42c0c6de0b8805863a1972833b59f954..c2d6ebe54abd4d45fafbeddcea558c3800f0e236 100644 --- a/includeCU/sfx_surface.cuh +++ b/includeCU/sfx_surface.cuh @@ -1,6 +1,6 @@ #pragma once -#include "../includeC/sfx_data.h" -#include "../includeCU/sfx_math.cuh" +#include "sfx_data.h" +#include "sfx_math.cuh" template<typename T> FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B, @@ -14,8 +14,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B, // --- apply max restriction based on surface type if (surface_type == param.surface_ocean) B = sfx_math::min(B, param.B_max_ocean); - if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_land); - if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_lake); + if (surface_type == param.surface_lake) B = sfx_math::min(B, param.B_max_lake); + if (surface_type == param.surface_land) B = sfx_math::min(B, param.B_max_land); // --- define roughness [thermal] z0_t = z0_m / exp(B); @@ -23,30 +23,29 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B, template<typename T> FUCNTION_DECLARATION_SPECIFIER void get_charnock_roughness(T &z0_m, T &u_dyn0, - const T h, const T U, - const sfx_esm_param& model_param, + const T U, const T h, const sfx_surface_param& surface_param, - const sfx_esm_numericsTypeC& numerics) + const int maxiters) { T Uc, a, b, c, c_min, f; Uc = U; a = 0.0; b = 25.0; - c_min = log(surface_param.h_charnock) / model_param.kappa; + c_min = log(surface_param.h_charnock) / surface_param.kappa; - for (int i = 0; i < numerics.maxiters_charnock; i++) + for (int i = 0; i < maxiters; i++) { f = surface_param.c1_charnock - 2.0 * log(Uc); - for (int j = 0; j < numerics.maxiters_charnock; j++) + for (int j = 0; j < maxiters; j++) { - c = (f + 2.0 * log(b)) / model_param.kappa; + c = (f + 2.0 * log(b)) / surface_param.kappa; if (U <= 8.0e0) - a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / model_param.kappa; + a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / surface_param.kappa; c = sfx_math::max(c - a, c_min); b = c; } - z0_m = surface_param.h_charnock * exp(-c * model_param.kappa); + z0_m = surface_param.h_charnock * exp(-c * surface_param.kappa); z0_m = sfx_math::max(z0_m, T(0.000015e0)); Uc = U * log(surface_param.h_charnock / z0_m) / log(h / z0_m); } diff --git a/includeCXX/model_base.h b/includeCXX/model_base.h index f51c7e7b4aa08d2864626f847a1008835a6f8165..2a3bb893d0d9eef0bcb2f5bb10fa2a2f920e4f52 100644 --- a/includeCXX/model_base.h +++ b/includeCXX/model_base.h @@ -1,7 +1,7 @@ #pragma once #include <cstddef> #include "sfx_template_parameters.h" -#include "../includeC/sfx_data.h" +#include "sfx_data.h" template<typename T, MemType memIn, MemType memOut, MemType RunMem > class ModelBase diff --git a/includeCXX/sfx_call_class_func.h b/includeCXX/sfx_call_class_func.h index 0d79140b7adb6b27f81f4e03a655fcb976f5afed..8cdb0a77e3e57a1e9f64fccb2dcb1e67a71cb850 100644 --- a/includeCXX/sfx_call_class_func.h +++ b/includeCXX/sfx_call_class_func.h @@ -1,5 +1,5 @@ #pragma once -#include "../includeC/sfx_data.h" +#include "sfx_data.h" #ifdef __cplusplus extern "C" { diff --git a/includeCXX/sfx_compute_sheba.h b/includeCXX/sfx_compute_sheba.h deleted file mode 100644 index d13bf591f340e95fe5257c03b6796eebb10387fc..0000000000000000000000000000000000000000 --- a/includeCXX/sfx_compute_sheba.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -template<typename T> -void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g, - const int maxiters_charnock, - const int grid_size); \ No newline at end of file diff --git a/includeCXX/sfx_esm.h b/includeCXX/sfx_esm.h index c1cc836a5c721f349b722a73002ba3128ceae292..d4040389d69569e6ba7799606906abe7f13c8db8 100644 --- a/includeCXX/sfx_esm.h +++ b/includeCXX/sfx_esm.h @@ -1,7 +1,7 @@ #pragma once #include "sfx_template_parameters.h" -#include "../includeCXX/model_base.h" -#include "../includeC/sfx_data.h" +#include "model_base.h" +#include "sfx_data.h" template<typename T, MemType memIn, MemType memOut, MemType RunMem > class FluxEsmBase : public ModelBase<T, memIn, memOut, RunMem> diff --git a/includeCXX/sfx_sheba.h b/includeCXX/sfx_sheba.h index 676980f1fe273a040481b2a3dc414ab84c7daba8..33c93aa76be928524577774d0831e41b83546c76 100644 --- a/includeCXX/sfx_sheba.h +++ b/includeCXX/sfx_sheba.h @@ -1,48 +1,88 @@ #pragma once #include "sfx_template_parameters.h" -#include <cstddef> +#include "model_base.h" +#include "sfx_data.h" template<typename T, MemType memIn, MemType memOut, MemType RunMem > -class FluxSheba +class FluxShebaBase : public ModelBase<T, memIn, memOut, RunMem> { -private: - T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m; - T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv; +public: + using ModelBase<T, memIn, memOut, RunMem>::res_sfx; + using ModelBase<T, memIn, memOut, RunMem>::sfx; + using ModelBase<T, memIn, memOut, RunMem>::meteo; + using ModelBase<T, memIn, memOut, RunMem>::grid_size; + using ModelBase<T, memIn, memOut, RunMem>::ifAllocated; + using ModelBase<T, memIn, memOut, RunMem>::allocated_size; - T kappa, Pr_t_0_inv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h, - Re_rough_min, - B1_rough, B2_rough, - B_max_land, B_max_ocean, B_max_lake, - gamma_c, - Re_visc_min, - Pr_m, nu_air, g; + sfx_surface_param surface_param; + sfx_phys_constants phys_constants; + sfx_sheba_param model_param; + sfx_sheba_numericsTypeC numerics; - int grid_size; - bool ifAllocated; - size_t allocated_size; -public: - FluxSheba(); - void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g); - ~FluxSheba(); + FluxShebaBase(sfxDataVecTypeC* sfx, + meteoDataVecTypeC* meteo, + const sfx_sheba_param model_param, + const sfx_surface_param surface_param, + const sfx_sheba_numericsTypeC numerics, + const sfx_phys_constants phys_constants, + const int grid_size); + ~FluxShebaBase(); +}; + +template<typename T, MemType memIn, MemType memOut, MemType RunMem > +class FluxSheba : public FluxShebaBase<T, memIn, memOut, RunMem> +{}; - void compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, - const int maxiters_charnock); +template<typename T, MemType memIn, MemType memOut > +class FluxSheba<T, memIn, memOut, MemType::CPU> : public FluxShebaBase<T, memIn, memOut, MemType::CPU> +{ + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::res_sfx; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::sfx; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::meteo; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::surface_param; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::phys_constants; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::grid_size; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::ifAllocated; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::allocated_size; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::model_param; + using FluxShebaBase<T, memIn, memOut, MemType::CPU>::numerics; +public: + FluxSheba(sfxDataVecTypeC* sfx, + meteoDataVecTypeC* meteo, + const sfx_sheba_param model_param, + const sfx_surface_param surface_param, + const sfx_sheba_numericsTypeC numerics, + const sfx_phys_constants phys_constants, + const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, model_param, + surface_param, numerics, phys_constants, grid_size) {} + ~FluxSheba() = default; + void compute_flux(); +}; -private: - void set_data( T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_); -}; \ No newline at end of file +#ifdef INCLUDE_CUDA +template<typename T, MemType memIn, MemType memOut > +class FluxSheba<T, memIn, memOut, MemType::GPU> : public FluxShebaBase<T, memIn, memOut, MemType::GPU> +{ + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::res_sfx; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::sfx; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::meteo; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::surface_param; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::phys_constants; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::grid_size; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::ifAllocated; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::allocated_size; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::model_param; + using FluxShebaBase<T, memIn, memOut, MemType::GPU>::numerics; +public: + FluxSheba(sfxDataVecTypeC* sfx, + meteoDataVecTypeC* meteo, + const sfx_sheba_param model_param, + const sfx_surface_param surface_param, + const sfx_sheba_numericsTypeC numerics, + const sfx_phys_constants phys_constants, + const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, model_param, + surface_param, numerics, phys_constants, grid_size) {} + ~FluxSheba() = default; + void compute_flux(); +}; +#endif \ No newline at end of file diff --git a/srcC/sfx_call_cxx.c b/srcC/sfx_call_cxx.c index 506e4646936355e7512ff74b3f76ecc36dfb064b..208a8ed67505caacbadf5cc8794d5ebb12dfc65a 100644 --- a/srcC/sfx_call_cxx.c +++ b/srcC/sfx_call_cxx.c @@ -1,6 +1,6 @@ #include <stdlib.h> #include <stdio.h> -#include "../includeCXX/sfx_call_class_func.h" +#include "sfx_call_class_func.h" // #include "../includeC/sfx_call_cxx.h" // -------------------------------------------------------------------------- // diff --git a/srcCU/sfx_compute_sheba.cu b/srcCU/sfx_compute_sheba.cu deleted file mode 100644 index 56a93bb49c28ac039c681e6ad71ffc1aa6f33f87..0000000000000000000000000000000000000000 --- a/srcCU/sfx_compute_sheba.cu +++ /dev/null @@ -1,486 +0,0 @@ -#include <iostream> -#include "../includeCU/sfx_compute_sheba.cuh" -#include "../includeCU/sfx_surface.cuh" - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) -{ - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - -template<typename T> -__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, - const T h, const T U, - const T kappa, - const T h_charnock, const T c1_charnock, const T c2_charnock, - const int maxiters) -{ - T Uc, a, b, c, c_min, f; - - Uc = U; - a = 0.0; - b = 25.0; - c_min = log(h_charnock) / kappa; - - for (int i = 0; i < maxiters; i++) - { - f = c1_charnock - 2.0 * log(Uc); - for (int j = 0; j < maxiters; j++) - { - c = (f + 2.0 * log(b)) / kappa; - if (U <= 8.0e0) - a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa; - c = max(c - a, c_min); - b = c; - } - z0_m = h_charnock * exp(-c * kappa); - z0_m = max(z0_m, T(0.000015e0)); - Uc = U * log(h_charnock / z0_m) / log(h / z0_m); - } - - u_dyn0 = Uc / c; -} - -template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0, - const float h, const float U, - const float kappa, - const float h_charnock, const float c1_charnock, const float c2_charnock, - const int maxiters); -template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0, - const double h, const double U, - const double kappa, - const double h_charnock, const double c1_charnock, const double c2_charnock, - const int maxiters); - -template<typename T> -__device__ void get_thermal_roughness(T &z0_t, T &B, - const T z0_m, const T Re, - const T Re_rough_min, - const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough, - const T B_max_ocean, const T B_max_lake, const T B_max_land, - const int surface_type) -{ - // --- define B = log(z0_m / z0_t) - if (Re <= Re_rough_min) - B = B1_rough * log(B3_rough * Re) + B2_rough; - else - // *: B4 takes into account Re value at z' ~ O(10) z0 - B = B4_rough * (pow(Re, B2_rough)); - - // --- apply max restriction based on surface type - if (surface_type == 0) - B = min(B, B_max_ocean); - else if (surface_type == 2) - B = min(B, B_max_lake); - else if (surface_type == 1) - B = min(B, B_max_land); - - // --- define roughness [thermal] - z0_t = z0_m / exp(B); -} - -template __device__ void get_thermal_roughness(float &z0_t, float &B, - const float z0_m, const float Re, - const float Re_rough_min, - const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough, - const float B_max_ocean, const float B_max_lake, const float B_max_land, - const int surface_type); -template __device__ void get_thermal_roughness(double &z0_t, double &B, - const double z0_m, const double Re, - const double Re_rough_min, - const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough, - const double B_max_ocean, const double B_max_lake, const double B_max_land, - const int surface_type); - -template<typename T> -__device__ void get_psi_mh(T &psi_m, T &psi_h, - const T zeta_m, const T zeta_h, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - T x_m, x_h; - T q_m, q_h; - - if (zeta_m >= 0.0) - { - q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); - x_m = pow(1.0 + zeta_m, 1.0 / 3.0); - - psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); - } - else - { x_m = pow(1.0 - alpha_m * zeta_m, 0.25); - psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); - } - - if (zeta_h >= 0.0) - { - q_h = sqrt(c_h * c_h - 4.0); - x_h = zeta_h; - - psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); - } - else - { - x_h = pow(1.0 - alpha_h * zeta_h, 0.25); - psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); - } -} - -template __device__ void get_psi_mh(float &psi_m, float &psi_h, - const float zeta_m, const float zeta_h, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template __device__ void get_psi_mh(double &psi_m, double &psi_h, - const double zeta_m, const double zeta_h, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -__device__ void get_psi(T &psi_m, T &psi_h, - const T zeta, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - T x_m, x_h; - T q_m, q_h; - - if (zeta >= 0.0) - { - q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); - q_h = sqrt(c_h * c_h - 4.0); - - x_m = pow(1.0 + zeta, 1.0 / 3.0); - x_h = zeta; - - psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); - - psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); - } - else - { - x_m = pow(1.0 - alpha_m * zeta, 0.25); - x_h = pow(1.0 - alpha_h * zeta, 0.25); - - psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); - psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); - } -} - -template __device__ void get_psi(float &psi_m, float &psi_h, - const float zeta, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template __device__ void get_psi(double &psi_m, double &psi_h, - const double zeta, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta, - const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const int maxiters) -{ - T psi_m, psi_h, psi0_m, psi0_h, Linv; - const T gamma = 0.61; - - Udyn = kappa * U / log(z / z0_m); - Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t); - Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t); - zeta = 0.0; - - // --- no wind - if (Udyn < 1e-5) - return; - - Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); - zeta = z * Linv; - - // --- near neutral case - if (Linv < 1e-5) - return; - - for (int i = 0; i < maxiters; i++) - { - get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h); - - get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h); - - Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m)); - Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); - Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); - - if (Udyn < 1e-5) - break; - - Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); - zeta = z * Linv; - } -} - -template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta, - const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta, - const float kappa, const float Pr_t_0_inv, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const int maxiters); -template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta, - const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta, - const double kappa, const double Pr_t_0_inv, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h, - const int maxiters); - -template<typename T> -__device__ void get_phi(T &phi_m, T &phi_h, - const T zeta, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - if (zeta >= 0.0) - { - phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta); - phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta); - } - else - { - phi_m = pow(1.0 - alpha_m * zeta, -0.25); - phi_h = pow(1.0 - alpha_h * zeta, -0.5); - } -} - -template __device__ void get_phi(float &phi_m, float &phi_h, - const float zeta, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template __device__ void get_phi(double &phi_m, double &phi_h, - const double zeta, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g, - const int maxiters_charnock, - const int grid_size) -{ - const int index = blockIdx.x * blockDim.x + threadIdx.x; - - T h, U, dT, Tsemi, dQ, z0_m; - T z0_t, B, h0_m, h0_t, u_dyn0, Re, - zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, - Km, Pr_t_inv, Cm, Ct; - - const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8)); - const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock; - - int surface_type; - - if(index < grid_size) - { - U = U_[index]; - Tsemi = Tsemi_[index]; - dT = dT_[index]; - dQ = dQ_[index]; - h = h_[index]; - z0_m = in_z0_m_[index]; - - if (z0_m < 0.0) surface_type = 0; - else surface_type = 1; - - if (surface_type == 0) - { - get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock); - h0_m = h / z0_m; - } - if (surface_type == 1) - { - h0_m = h / z0_m; - u_dyn0 = U * kappa / log(h0_m); - } - - Re = u_dyn0 * z0_m / nu_air; - get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type); - - // --- define relative height [thermal] - h0_t = h / z0_t; - - // --- define Ri-bulk - Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); - - // --- get the fluxes - // ---------------------------------------------------------------------------- - get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10); - // ---------------------------------------------------------------------------- - - get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h); - // ---------------------------------------------------------------------------- - - // --- define transfer coeff. (momentum) & (heat) - Cm = 0.0; - if (U > 0.0) - Cm = Udyn / U; - Ct = 0.0; - if (fabs(dT) > 0.0) - Ct = Tdyn / dT; - - // --- define eddy viscosity & inverse Prandtl number - Km = kappa * Cm * U * h / phi_m; - Pr_t_inv = phi_m / phi_h; - - zeta_[index] = zeta; - Rib_[index] = Rib; - Re_[index] = Re; - B_[index] = B; - z0_m_[index] = z0_m; - z0_t_[index] = z0_t; - Rib_conv_lim_[index] = 0.0; - Cm_[index] = Cm; - Ct_[index] = Ct; - Km_[index] = Km; - Pr_t_inv_[index] = Pr_t_inv; - } -} - -template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, - const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_, - const float kappa, const float Pr_t_0_inv, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const float Re_rough_min, - const float B1_rough, const float B2_rough, - const float B_max_land, const float B_max_ocean, const float B_max_lake, - const float gamma_c, const float Re_visc_min, - const float Pr_m, const float nu_air, const float g, - const int maxiters_charnock, - const int grid_size); -template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_, - const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_, - const double kappa, const double Pr_t_0_inv, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h, - const double Re_rough_min, - const double B1_rough, const double B2_rough, - const double B_max_land, const double B_max_ocean, const double B_max_lake, - const double gamma_c, const double Re_visc_min, - const double Pr_m, const double nu_air, const double g, - const int maxiters_charnock, - const int grid_size); - -template<typename T> -void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g, - const int maxiters_charnock, - const int grid_size) -{ - const int BlockCount = int(ceil(float(grid_size) / 512.0)); - dim3 cuBlock = dim3(512, 1, 1); - dim3 cuGrid = dim3(BlockCount, 1, 1); - - kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, - U_, dT_, Tsemi_, dQ_, h_, in_z0_m_, - kappa, Pr_t_0_inv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h, - Re_rough_min, - B1_rough, B2_rough, - B_max_land, B_max_ocean, B_max_lake, - gamma_c, Re_visc_min, - Pr_m, nu_air, g, - maxiters_charnock, - grid_size); - gpuErrchk( cudaPeekAtLastError() ); -} - -template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, - const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_, - const float kappa, const float Pr_t_0_inv, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const float Re_rough_min, - const float B1_rough, const float B2_rough, - const float B_max_land, const float B_max_ocean, const float B_max_lake, - const float gamma_c, const float Re_visc_min, - const float Pr_m, const float nu_air, const float g, - const int maxiters_charnock, - const int grid_size); -template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_, - const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_, - const double kappa, const double Pr_t_0_inv, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h, - const double Re_rough_min, - const double B1_rough, const double B2_rough, - const double B_max_land, const double B_max_ocean, const double B_max_lake, - const double gamma_c, const double Re_visc_min, - const double Pr_m, const double nu_air, const double g, - const int maxiters_charnock, - const int grid_size); \ No newline at end of file diff --git a/srcCU/sfx_esm.cu b/srcCU/sfx_esm.cu index c2d7aef58647901e46f277a699ce3dea8573a478..f8028d63a82ee44d920518671ce728799a2030e6 100644 --- a/srcCU/sfx_esm.cu +++ b/srcCU/sfx_esm.cu @@ -1,10 +1,10 @@ #include <cuda.h> #include <cuda_runtime_api.h> -#include "../includeCXX/sfx_esm.h" -#include "../includeCU/sfx_esm_compute_subfunc.cuh" -#include "../includeCU/sfx_surface.cuh" -#include "../includeCU/sfx_memory_processing.cuh" +#include "sfx_esm.h" +#include "sfx_model_compute_subfunc.cuh" +#include "sfx_surface.cuh" +#include "sfx_memory_processing.cuh" namespace sfx_kernel { @@ -46,7 +46,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx, if (surface_type == surface_param.surface_ocean) { - get_charnock_roughness(z0_m, u_dyn0, h, U, model_param, surface_param, numerics); + get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock); h0_m = h / z0_m; } if (surface_type == surface_param.surface_land) @@ -77,7 +77,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx, } else if (Rib < Rib_conv_lim) { - get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics); + get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics.maxiters_convection); fval = pow(zeta_conv_lim / zeta, 1.0/3.0); phi_m = fval / f_m_conv_lim; @@ -92,7 +92,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx, } else { - get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics); + get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics.maxiters_convection); phi_m = pow(1.0 - model_param.alpha_m * zeta, -0.25); phi_h = 1.0 / (model_param.Pr_t_0_inv * sqrt(1.0 - model_param.alpha_h_fix * zeta)); diff --git a/srcCU/sfx_memory_processing.cu b/srcCU/sfx_memory_processing.cu index 7dcc6cb5aedd07261cbe7d38ae1dcb5630859e73..8d622c1962312c9bb57f59832b099ed103c22e66 100644 --- a/srcCU/sfx_memory_processing.cu +++ b/srcCU/sfx_memory_processing.cu @@ -1,4 +1,4 @@ -#include "../includeCU/sfx_memory_processing.cuh" +#include "sfx_memory_processing.cuh" #include <cuda.h> #include <cuda_runtime_api.h> diff --git a/srcCU/sfx_sheba.cu b/srcCU/sfx_sheba.cu new file mode 100644 index 0000000000000000000000000000000000000000..2161b18798a531a8c81135c9a7e4c4058ff8bb45 --- /dev/null +++ b/srcCU/sfx_sheba.cu @@ -0,0 +1,144 @@ +#include <cuda.h> +#include <cuda_runtime_api.h> + +#include "sfx_sheba.h" +#include "sfx_model_compute_subfunc.cuh" +#include "sfx_surface.cuh" +#include "sfx_memory_processing.cuh" + +namespace sfx_kernel +{ + template<typename T> + __global__ void compute_flux(sfxDataVecTypeC sfx, + meteoDataVecTypeC meteo, + const sfx_sheba_param model_param, + const sfx_surface_param surface_param, + const sfx_sheba_numericsTypeC numerics, + const sfx_phys_constants phys_constants, + const int grid_size); +} + +template<typename T> +__global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx, + meteoDataVecTypeC meteo, + const sfx_sheba_param model_param, + const sfx_surface_param surface_param, + const sfx_sheba_numericsTypeC numerics, + const sfx_phys_constants phys_constants, + const int grid_size) +{ + const int index = blockIdx.x * blockDim.x + threadIdx.x; + T h, U, dT, Tsemi, dQ, z0_m; + T z0_t, B, h0_m, h0_t, u_dyn0, Re, + zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, + Km, Pr_t_inv, Cm, Ct; + int surface_type; + + if(index < grid_size) + { + U = meteo.U[index]; + Tsemi = meteo.Tsemi[index]; + dT = meteo.dT[index]; + dQ = meteo.dQ[index]; + h = meteo.h[index]; + z0_m = meteo.z0_m[index]; + + surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land; + + if (surface_type == surface_param.surface_ocean) + { + get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock); + h0_m = h / z0_m; + } + if (surface_type == surface_param.surface_land) + { + h0_m = h / z0_m; + u_dyn0 = U * model_param.kappa / log(h0_m); + } + + Re = u_dyn0 * z0_m / phys_constants.nu_air; + get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type); + + // --- define relative height [thermal] + h0_t = h / z0_t; + + // --- define Ri-bulk + Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); + + // --- get the fluxes + // ---------------------------------------------------------------------------- + get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, + U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10); + // ---------------------------------------------------------------------------- + + get_phi(phi_m, phi_h, zeta, model_param); + // ---------------------------------------------------------------------------- + + // --- define transfer coeff. (momentum) & (heat) + Cm = 0.0; + if (U > 0.0) + Cm = Udyn / U; + Ct = 0.0; + if (fabs(dT) > 0.0) + Ct = Tdyn / dT; + + // --- define eddy viscosity & inverse Prandtl number + Km = model_param.kappa * Cm * U * h / phi_m; + Pr_t_inv = phi_m / phi_h; + + sfx.zeta[index] = zeta; + sfx.Rib[index] = Rib; + sfx.Re[index] = Re; + sfx.B[index] = B; + sfx.z0_m[index] = z0_m; + sfx.z0_t[index] = z0_t; + sfx.Rib_conv_lim[index] = T(0.0); + sfx.Cm[index] = Cm; + sfx.Ct[index] = Ct; + sfx.Km[index] = Km; + sfx.Pr_t_inv[index] = Pr_t_inv; + } +} + +template<typename T, MemType memIn, MemType memOut > +void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux() +{ + const int BlockCount = int(ceil(float(grid_size) / 1024.0)); + dim3 cuBlock = dim3(1024, 1, 1); + dim3 cuGrid = dim3(BlockCount, 1, 1); + + sfx_kernel::compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model_param, + surface_param, numerics, phys_constants, grid_size); + + if(MemType::GPU != memOut) + { + const size_t new_size = grid_size * sizeof(T); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size); + memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size); + } +} + +template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>; +template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>; +template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>; +template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>; +template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>; +template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>; +template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>; + +template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>; +template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>; +template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>; +template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::GPU>; +template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>; +template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>; +template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>; \ No newline at end of file diff --git a/srcCXX/model_base.cpp b/srcCXX/model_base.cpp index ca1bcdb48c8633d6b6e6e21f37d7fab9d5a0c1b4..a72d47d5973035b4a7dd639c004f566802273a8b 100644 --- a/srcCXX/model_base.cpp +++ b/srcCXX/model_base.cpp @@ -1,9 +1,9 @@ -#include "../includeCXX/model_base.h" +#include "model_base.h" #ifdef INCLUDE_CUDA - #include "../includeCU/sfx_memory_processing.cuh" + #include "sfx_memory_processing.cuh" #endif -#include "../includeCXX/sfx_memory_processing.h" +#include "sfx_memory_processing.h" template<typename T, MemType memIn, MemType memOut, MemType RunMem > ModelBase<T, memIn, memOut, RunMem>::ModelBase(sfxDataVecTypeC* sfx_in, diff --git a/srcCXX/sfx_call_class_func.cpp b/srcCXX/sfx_call_class_func.cpp index 1cc665d1c8da5d3ae8c5f89e14c35dfb946811cc..9659c17b691ecb848a596bbf94de4accd181506f 100644 --- a/srcCXX/sfx_call_class_func.cpp +++ b/srcCXX/sfx_call_class_func.cpp @@ -1,8 +1,8 @@ #include <stdlib.h> #include <stdio.h> -#include "../includeCXX/sfx_call_class_func.h" -#include "../includeCXX/sfx_esm.h" -#include "../includeCXX/sfx_sheba.h" +#include "sfx_call_class_func.h" +#include "sfx_esm.h" +#include "sfx_sheba.h" #include <vector> // -------------------------------------------------------------------------- // diff --git a/srcCXX/sfx_compute_sheba.cpp b/srcCXX/sfx_compute_sheba.cpp deleted file mode 100644 index 167fcbb65fad8863ca7837721e61ac23ff82c747..0000000000000000000000000000000000000000 --- a/srcCXX/sfx_compute_sheba.cpp +++ /dev/null @@ -1,325 +0,0 @@ -#include <cmath> -#include <iostream> -#include "../includeCXX/sfx_compute_sheba.h" -// #include "../includeCXX/sfx_surface.h" - -template<typename T> -void get_psi_mh(T &psi_m, T &psi_h, - const T zeta_m, const T zeta_h, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - T x_m, x_h; - T q_m, q_h; - - if (zeta_m >= 0.0) - { - q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); - x_m = pow(1.0 + zeta_m, 1.0 / 3.0); - - psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); - } - else - { x_m = pow(1.0 - alpha_m * zeta_m, 0.25); - psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); - } - - if (zeta_h >= 0.0) - { - q_h = sqrt(c_h * c_h - 4.0); - x_h = zeta_h; - - psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); - } - else - { - x_h = pow(1.0 - alpha_h * zeta_h, 0.25); - psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); - } -} - -template void get_psi_mh(float &psi_m, float &psi_h, - const float zeta_m, const float zeta_h, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template void get_psi_mh(double &psi_m, double &psi_h, - const double zeta_m, const double zeta_h, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -void get_psi(T &psi_m, T &psi_h, - const T zeta, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - T x_m, x_h; - T q_m, q_h; - - if (zeta >= 0.0) - { - q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); - q_h = sqrt(c_h * c_h - 4.0); - - x_m = pow(1.0 + zeta, 1.0 / 3.0); - x_h = zeta; - - psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); - - psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); - } - else - { - x_m = pow(1.0 - alpha_m * zeta, 0.25); - x_h = pow(1.0 - alpha_h * zeta, 0.25); - - psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); - psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); - } -} - -template void get_psi(float &psi_m, float &psi_h, - const float zeta, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template void get_psi(double &psi_m, double &psi_h, - const double zeta, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta, - const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const int maxiters) -{ - T psi_m, psi_h, psi0_m, psi0_h, Linv; - const T gamma = 0.61; - - Udyn = kappa * U / log(z / z0_m); - Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t); - Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t); - zeta = 0.0; - - // --- no wind - if (Udyn < 1e-5) - return; - - Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); - zeta = z * Linv; - - // --- near neutral case - if (Linv < 1e-5) - return; - - for (int i = 0; i < maxiters; i++) - { - get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h); - - get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h); - - Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m)); - Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); - Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); - - if (Udyn < 1e-5) - break; - - Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); - zeta = z * Linv; - } -} - -template void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta, - const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta, - const float kappa, const float Pr_t_0_inv, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const int maxiters); -template void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta, - const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta, - const double kappa, const double Pr_t_0_inv, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h, - const int maxiters); - -template<typename T> -void get_phi(T &phi_m, T &phi_h, - const T zeta, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h) -{ - if (zeta >= 0.0) - { - phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta); - phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta); - } - else - { - phi_m = pow(1.0 - alpha_m * zeta, -0.25); - phi_h = pow(1.0 - alpha_h * zeta, -0.5); - } -} - -template void get_phi(float &phi_m, float &phi_h, - const float zeta, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h); -template void get_phi(double &phi_m, double &phi_h, - const double zeta, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h); - -template<typename T> -void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, - const T kappa, const T Pr_t_0_inv, - const T alpha_m, const T alpha_h, - const T a_m, const T a_h, - const T b_m, const T b_h, - const T c_h, - const T Re_rough_min, - const T B1_rough, const T B2_rough, - const T B_max_land, const T B_max_ocean, const T B_max_lake, - const T gamma_c, const T Re_visc_min, - const T Pr_m, const T nu_air, const T g, - const int maxiters_charnock, - const int grid_size) -{ - // T h, U, dT, Tsemi, dQ, z0_m; - // T z0_t, B, h0_m, h0_t, u_dyn0, Re, - // zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, - // Km, Pr_t_inv, Cm, Ct; - - // const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8)); - // const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock; - - // int surface_type; - - // for (int step = 0; step < grid_size; step++) - // { - // U = U_[step]; - // Tsemi = Tsemi_[step]; - // dT = dT_[step]; - // dQ = dQ_[step]; - // h = h_[step]; - // z0_m = in_z0_m_[step]; - - // if (z0_m < 0.0) surface_type = 0; - // else surface_type = 1; - - // if (surface_type == 0) - // { - // get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock); - // h0_m = h / z0_m; - // } - // if (surface_type == 1) - // { - // h0_m = h / z0_m; - // u_dyn0 = U * kappa / log(h0_m); - // } - - // Re = u_dyn0 * z0_m / nu_air; - // get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type); - - // // --- define relative height [thermal] - // h0_t = h / z0_t; - - // // --- define Ri-bulk - // Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); - - // // --- get the fluxes - // // ---------------------------------------------------------------------------- - // get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10); - // // ---------------------------------------------------------------------------- - - // get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h); - // // ---------------------------------------------------------------------------- - - // // --- define transfer coeff. (momentum) & (heat) - // Cm = 0.0; - // if (U > 0.0) - // Cm = Udyn / U; - // Ct = 0.0; - // if (fabs(dT) > 0.0) - // Ct = Tdyn / dT; - - // // --- define eddy viscosity & inverse Prandtl number - // Km = kappa * Cm * U * h / phi_m; - // Pr_t_inv = phi_m / phi_h; - - // zeta_[step] = zeta; - // Rib_[step] = Rib; - // Re_[step] = Re; - // B_[step] = B; - // z0_m_[step] = z0_m; - // z0_t_[step] = z0_t; - // Rib_conv_lim_[step] = 0.0; - // Cm_[step] = Cm; - // Ct_[step] = Ct; - // Km_[step] = Km; - // Pr_t_inv_[step] = Pr_t_inv; - // } -} - -template void compute_flux_sheba_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, - const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m, - const float kappa, const float Pr_t_0_inv, - const float alpha_m, const float alpha_h, - const float a_m, const float a_h, - const float b_m, const float b_h, - const float c_h, - const float Re_rough_min, - const float B1_rough, const float B2_rough, - const float B_max_land, const float B_max_ocean, const float B_max_lake, - const float gamma_c, const float Re_visc_min, - const float Pr_m, const float nu_air, const float g, - const int maxiters_charnock, - const int grid_size); -template void compute_flux_sheba_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_, - const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, - const double kappa, const double Pr_t_0_inv, - const double alpha_m, const double alpha_h, - const double a_m, const double a_h, - const double b_m, const double b_h, - const double c_h, - const double Re_rough_min, - const double B1_rough, const double B2_rough, - const double B_max_land, const double B_max_ocean, const double B_max_lake, - const double gamma_c, const double Re_visc_min, - const double Pr_m, const double nu_air, const double g, - const int maxiters_charnock, - const int grid_size); \ No newline at end of file diff --git a/srcCXX/sfx_esm.cpp b/srcCXX/sfx_esm.cpp index ad77a702428a957d5c18d27f3e227d2f35e0b72b..c324e80f1fa91fd1fa26e7c5acfaa7dbd1af1713 100644 --- a/srcCXX/sfx_esm.cpp +++ b/srcCXX/sfx_esm.cpp @@ -1,14 +1,14 @@ #include <iostream> #include <cmath> -#include "../includeCXX/sfx_esm.h" +#include "sfx_esm.h" #ifdef INCLUDE_CUDA - #include "../includeCU/sfx_memory_processing.cuh" + #include "sfx_memory_processing.cuh" #endif -#include "../includeCXX/sfx_memory_processing.h" -#include "../includeCU/sfx_surface.cuh" -#include "../includeCU/sfx_esm_compute_subfunc.cuh" +#include "sfx_memory_processing.h" +#include "sfx_surface.cuh" +#include "sfx_model_compute_subfunc.cuh" template<typename T, MemType memIn, MemType memOut, MemType RunMem > FluxEsmBase<T, memIn, memOut, RunMem>::FluxEsmBase(sfxDataVecTypeC* sfx_in, @@ -49,7 +49,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux() if (surface_type == surface_param.surface_ocean) { - get_charnock_roughness(z0_m, u_dyn0, h, U, model_param, surface_param, numerics); + get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock); h0_m = h / z0_m; } if (surface_type == surface_param.surface_land) @@ -80,7 +80,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux() } else if (Rib < Rib_conv_lim) { - get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics); + get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics.maxiters_convection); fval = pow(zeta_conv_lim / zeta, 1.0/3.0); phi_m = fval / f_m_conv_lim; @@ -95,7 +95,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux() } else { - get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics); + get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics.maxiters_convection); phi_m = pow(1.0 - model_param.alpha_m * zeta, -0.25); phi_h = 1.0 / (model_param.Pr_t_0_inv * sqrt(1.0 - model_param.alpha_h_fix * zeta)); diff --git a/srcCXX/sfx_memory_processing.cpp b/srcCXX/sfx_memory_processing.cpp index 1d5805a4716eb7c1068a5665c432a8caa95441ba..6b13745da36f58e8cf909206678e6d3583a11da4 100644 --- a/srcCXX/sfx_memory_processing.cpp +++ b/srcCXX/sfx_memory_processing.cpp @@ -1,4 +1,4 @@ -#include "../includeCXX/sfx_memory_processing.h" +#include "sfx_memory_processing.h" #include <cstdlib> #include <cstring> diff --git a/srcCXX/sfx_sheba.cpp b/srcCXX/sfx_sheba.cpp index af52cc2c78fcc2578815b2b9a75e02a96ff04c7e..abd48c71e3c01b939e5b517525cb4963b8a441c4 100644 --- a/srcCXX/sfx_sheba.cpp +++ b/srcCXX/sfx_sheba.cpp @@ -1,281 +1,136 @@ #include <iostream> +#include <cmath> -#include "../includeCXX/sfx_sheba.h" -#include "../includeCXX/sfx_compute_sheba.h" +#include "sfx_sheba.h" #ifdef INCLUDE_CUDA - #include "../includeCU/sfx_compute_sheba.cuh" - #include "../includeCU/sfx_memory_processing.cuh" + #include "sfx_memory_processing.cuh" #endif -#include "../includeCXX/sfx_memory_processing.h" +#include "sfx_memory_processing.h" +#include "sfx_surface.cuh" +#include "sfx_model_compute_subfunc.cuh" template<typename T, MemType memIn, MemType memOut, MemType RunMem > -FluxSheba<T, memIn, memOut, RunMem>::FluxSheba() +FluxShebaBase<T, memIn, memOut, RunMem>::FluxShebaBase(sfxDataVecTypeC* sfx_in, + meteoDataVecTypeC* meteo_in, + const sfx_sheba_param model_param_in, + const sfx_surface_param surface_param_in, + const sfx_sheba_numericsTypeC numerics_in, + const sfx_phys_constants phys_constants_in, + const int grid_size_in) : ModelBase<T, memIn, memOut, RunMem>(sfx_in, meteo_in, grid_size_in) { - kappa = 0, Pr_t_0_inv = 0, - alpha_m = 0, alpha_h = 0, - a_m = 0, a_h = 0, - b_m = 0, b_h = 0, - c_h = 0, - Re_rough_min = 0, - B1_rough = 0, B2_rough = 0, - B_max_land = 0, B_max_ocean = 0, B_max_lake = 0, - gamma_c = 0, - Re_visc_min = 0, - Pr_m = 0, nu_air = 0, g = 0; - - grid_size = 0; - - ifAllocated = false; - allocated_size = 0; + surface_param = surface_param_in; + phys_constants = phys_constants_in; + model_param = model_param_in; + numerics = numerics_in; } template<typename T, MemType memIn, MemType memOut, MemType RunMem > -void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, - const T alpha_m_, const T alpha_h_, - const float a_m_, const float a_h_, - const float b_m_, const float b_h_, - const float c_h_, - const T Re_rough_min_, - const T B1_rough_, const T B2_rough_, - const T B_max_land_, const T B_max_ocean_, const T B_max_lake_, - const T gamma_c_, const T Re_visc_min_, - const T Pr_m_, const T nu_air_, const T g_) -{ - kappa = kappa_, Pr_t_0_inv = Pr_t_0_inv_, - alpha_m = alpha_m_, alpha_h = alpha_h_, - a_m = a_m_, a_h = a_h_, - b_m = b_m_, b_h = b_h_, - c_h = c_h_, - Re_rough_min = Re_rough_min_, B_max_lake = B_max_lake_, - B1_rough = B1_rough_, B2_rough = B2_rough_, - B_max_land = B_max_land_, B_max_ocean = B_max_ocean_, B_max_lake = B_max_lake_, - gamma_c = gamma_c_, - Re_visc_min = Re_visc_min_, - Pr_m = Pr_m_, nu_air = nu_air_, g = g_; - - grid_size = grid_size_; - - if(RunMem != memIn) - { - const size_t new_size = grid_size * sizeof(T); +FluxShebaBase<T, memIn, memOut, RunMem>::~FluxShebaBase() {} - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(U), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(dT), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Tsemi), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(dQ), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size); +template<typename T, MemType memIn, MemType memOut > +void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux() +{ + T h, U, dT, Tsemi, dQ, z0_m; + T z0_t, B, h0_m, h0_t, u_dyn0, Re, + zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, + Km, Pr_t_inv, Cm, Ct; + int surface_type; - ifAllocated = true; - } - if(RunMem != memOut) + for (int step = 0; step < grid_size; step++) { - const size_t new_size = grid_size * sizeof(T); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Rib), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Re), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Rib_conv_lim), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(z0_m), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(z0_t), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(B), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Cm), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Ct), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Km), allocated_size, new_size); - allocated_size = 0; - memproc::realloc<RunMem>((void *&)(Pr_t_inv), allocated_size, new_size); - - // T* del = new T[grid_size]; - // for (int i = 0; i < grid_size; i++) - // del[i] = i; - - // memproc::memcopy<RunMem, memOut>(Cm, del, new_size); - - // for (int i = 0; i < grid_size; i++) - // del[i] = 0; + U = meteo.U[step]; + Tsemi = meteo.Tsemi[step]; + dT = meteo.dT[step]; + dQ = meteo.dQ[step]; + h = meteo.h[step]; + z0_m = meteo.z0_m[step]; - // printf("before\n"); - // for (int i = 0; i < 10; i++) - // printf("%f\n", del[i]); + surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land; - // memproc::memcopy<memOut, RunMem>(del, Cm, new_size); - - // for (int i = 0; i < grid_size; i++) - // printf("%f\n", del[i]); - - // delete[] del; - - ifAllocated = true; - } -} - -template<typename T, MemType memIn, MemType memOut, MemType RunMem > -FluxSheba<T, memIn, memOut, RunMem>::~FluxSheba() -{ - kappa = 0, Pr_t_0_inv = 0, - alpha_m = 0, alpha_h = 0, - a_m = 0, a_h = 0, - b_m = 0, b_h = 0, - c_h = 0, - Re_rough_min = 0, - B1_rough = 0, B2_rough = 0, - B_max_land = 0, B_max_ocean = 0, B_max_lake = 0, - gamma_c = 0, - Re_visc_min = 0, - Pr_m = 0, nu_air = 0, g = 0; - - grid_size = 0; - - if(ifAllocated == true) - { - if(RunMem != memIn) - { - memproc::dealloc<RunMem>((void*&)U); - memproc::dealloc<RunMem>((void*&)dT); - memproc::dealloc<RunMem>((void*&)Tsemi); - memproc::dealloc<RunMem>((void*&)dQ); - memproc::dealloc<RunMem>((void*&)h); + if (surface_type == surface_param.surface_ocean) + { + get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock); + h0_m = h / z0_m; } - if(RunMem != memOut) + if (surface_type == surface_param.surface_land) { - memproc::dealloc<RunMem>((void*&)zeta); - memproc::dealloc<RunMem>((void*&)Rib); - memproc::dealloc<RunMem>((void*&)Re); - memproc::dealloc<RunMem>((void*&)Rib_conv_lim); - memproc::dealloc<RunMem>((void*&)z0_m); - memproc::dealloc<RunMem>((void*&)z0_t); - memproc::dealloc<RunMem>((void*&)B); - memproc::dealloc<RunMem>((void*&)Cm); - memproc::dealloc<RunMem>((void*&)Ct); - memproc::dealloc<RunMem>((void*&)Km); - memproc::dealloc<RunMem>((void*&)Pr_t_inv); + h0_m = h / z0_m; + u_dyn0 = U * model_param.kappa / log(h0_m); } - ifAllocated = false; - allocated_size = 0; + Re = u_dyn0 * z0_m / phys_constants.nu_air; + get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type); + + // --- define relative height [thermal] + h0_t = h / z0_t; + + // --- define Ri-bulk + Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); + + // --- get the fluxes + // ---------------------------------------------------------------------------- + get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, + U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10); + // ---------------------------------------------------------------------------- + + get_phi(phi_m, phi_h, zeta, model_param); + // ---------------------------------------------------------------------------- + + // --- define transfer coeff. (momentum) & (heat) + Cm = 0.0; + if (U > 0.0) + Cm = Udyn / U; + Ct = 0.0; + if (fabs(dT) > 0.0) + Ct = Tdyn / dT; + + // --- define eddy viscosity & inverse Prandtl number + Km = model_param.kappa * Cm * U * h / phi_m; + Pr_t_inv = phi_m / phi_h; + + sfx.zeta[step] = zeta; + sfx.Rib[step] = Rib; + sfx.Re[step] = Re; + sfx.B[step] = B; + sfx.z0_m[step] = z0_m; + sfx.z0_t[step] = z0_t; + sfx.Rib_conv_lim[step] = T(0.0); + sfx.Cm[step] = Cm; + sfx.Ct[step] = Ct; + sfx.Km[step] = Km; + sfx.Pr_t_inv[step] = Pr_t_inv; } -} - -template<typename T, MemType memIn, MemType memOut, MemType RunMem > -void FluxSheba<T, memIn, memOut, RunMem>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_) -{ - if(RunMem == memIn) - { - U = U_; - dT = dT_; - Tsemi = Tsemi_; - dQ = dQ_; - h = h_; - in_z0_m = in_z0_m_; - } - else - { - const size_t new_size = grid_size * sizeof(T); - - memproc::memcopy<RunMem, memIn>(U, U_, new_size); - memproc::memcopy<RunMem, memIn>(dT, dT_, new_size); - memproc::memcopy<RunMem, memIn>(Tsemi, Tsemi_, new_size); - memproc::memcopy<RunMem, memIn>(dQ, dQ_, new_size); - memproc::memcopy<RunMem, memIn>(h, h_, new_size); - memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size); - } - - if(RunMem == memOut) - { - zeta = zeta_; - Rib = Rib_; - Re = Re_; - Rib_conv_lim = Rib_conv_lim_; - z0_m = z0_m_; - z0_t = z0_t_; - B = B_; - Cm = Cm_; - Ct = Ct_; - Km = Km_; - Pr_t_inv = Pr_t_inv_; - } -} - -template<typename T, MemType memIn, MemType memOut, MemType RunMem > -void FluxSheba<T, memIn, memOut, RunMem>::compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, - T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, - const int maxiters_charnock) -{ - set_data(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, - U_, dT_, Tsemi_, dQ_, h_, in_z0_m_); - - if(RunMem == MemType::CPU) - compute_flux_sheba_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, - U, dT, Tsemi, dQ, h, in_z0_m, - kappa, Pr_t_0_inv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h, - Re_rough_min, - B1_rough, B2_rough, - B_max_land, B_max_ocean, B_max_lake, - gamma_c, Re_visc_min, - Pr_m, nu_air, g, - maxiters_charnock, - grid_size); -#ifdef INCLUDE_CUDA - else compute_flux_sheba_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, - U, dT, Tsemi, dQ, h, in_z0_m, - kappa, Pr_t_0_inv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h, - Re_rough_min, - B1_rough, B2_rough, - B_max_land, B_max_ocean, B_max_lake, - gamma_c, Re_visc_min, - Pr_m, nu_air, g, - maxiters_charnock, - grid_size); -#endif - - if(RunMem != memOut) + if(MemType::CPU != memOut) { const size_t new_size = grid_size * sizeof(T); - - memproc::memcopy<memOut, RunMem>(zeta_, zeta, new_size); - memproc::memcopy<memOut, RunMem>(Rib_, Rib, new_size); - memproc::memcopy<memOut, RunMem>(Re_, Re, new_size); - memproc::memcopy<memOut, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size); - memproc::memcopy<memOut, RunMem>(z0_m_, z0_m, new_size); - memproc::memcopy<memOut, RunMem>(z0_t_, z0_t, new_size); - memproc::memcopy<memOut, RunMem>(B_, B, new_size); - memproc::memcopy<memOut, RunMem>(Cm_, Cm, new_size); - memproc::memcopy<memOut, RunMem>(Ct_, Ct, new_size); - memproc::memcopy<memOut, RunMem>(Km_, Km, new_size); - memproc::memcopy<memOut, RunMem>(Pr_t_inv_, Pr_t_inv, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size); + memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size); } } template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU>; -template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::CPU>; +template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::CPU>; #ifdef INCLUDE_CUDA + template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>; + template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>; + template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>; + template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>; + template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>; + template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>; + template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>; + template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>; template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>; template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>; @@ -283,12 +138,4 @@ template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::CPU>; template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>; template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>; template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>; - - template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::GPU>; - template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::CPU>; - template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::GPU>; - template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::GPU>; - template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::GPU>; - template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::CPU>; - template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::CPU>; #endif \ No newline at end of file diff --git a/srcF/sfx_data.f90 b/srcF/sfx_data.f90 index 4d6ceaa686a6ba2f73fa41158c002b6013b5f05a..1cdc8ebfa22ed16266362de6c48577070900fad3 100644 --- a/srcF/sfx_data.f90 +++ b/srcF/sfx_data.f90 @@ -41,12 +41,12 @@ module sfx_data !> &details using arrays as input type, public :: meteoDataVecType #if defined(INCLUDE_CXX) - real, pointer :: h(:) !< constant flux layer height [m] - real, pointer :: U(:) !< abs(wind speed) at 'h' [m/s] - real, pointer :: dT(:) !< difference between potential temperature at 'h' and at surface [K] - real, pointer :: Tsemi(:) !< semi-sum of potential temperature at 'h' and at surface [K] - real, pointer :: dQ(:) !< difference between humidity at 'h' and at surface [g/g] - real, pointer :: z0_m(:) !< surface aerodynamic roughness (should be < 0 for water bodies surface) + real, pointer, contiguous :: h(:) !< constant flux layer height [m] + real, pointer, contiguous :: U(:) !< abs(wind speed) at 'h' [m/s] + real, pointer, contiguous :: dT(:) !< difference between potential temperature at 'h' and at surface [K] + real, pointer, contiguous :: Tsemi(:) !< semi-sum of potential temperature at 'h' and at surface [K] + real, pointer, contiguous :: dQ(:) !< difference between humidity at 'h' and at surface [g/g] + real, pointer, contiguous :: z0_m(:) !< surface aerodynamic roughness (should be < 0 for water bodies surface) #else real, allocatable :: h(:) !< constant flux layer height [m] real, allocatable :: U(:) !< abs(wind speed) at 'h' [m/s] @@ -91,17 +91,17 @@ module sfx_data !> &details using arrays as output type, public :: sfxDataVecType #if defined(INCLUDE_CXX) - real, pointer :: zeta(:) !< = z/L [n/d] - real, pointer :: Rib(:) !< bulk Richardson number [n/d] - real, pointer :: Re(:) !< Reynolds number [n/d] - real, pointer :: B(:) !< = log(z0_m / z0_h) [n/d] - real, pointer :: z0_m(:) !< aerodynamic roughness [m] - real, pointer :: z0_t(:) !< thermal roughness [m] - real, pointer :: Rib_conv_lim(:) !< Ri-bulk convection critical value [n/d] - real, pointer :: Cm(:) !< transfer coefficient for momentum [n/d] - real, pointer :: Ct(:) !< transfer coefficient for heat [n/d] - real, pointer :: Km(:) !< eddy viscosity coeff. at h [m^2/s] - real, pointer :: Pr_t_inv(:) !< inverse turbulent Prandtl number at h [n/d] + real, pointer, contiguous :: zeta(:) !< = z/L [n/d] + real, pointer, contiguous :: Rib(:) !< bulk Richardson number [n/d] + real, pointer, contiguous :: Re(:) !< Reynolds number [n/d] + real, pointer, contiguous :: B(:) !< = log(z0_m / z0_h) [n/d] + real, pointer, contiguous :: z0_m(:) !< aerodynamic roughness [m] + real, pointer, contiguous :: z0_t(:) !< thermal roughness [m] + real, pointer, contiguous :: Rib_conv_lim(:) !< Ri-bulk convection critical value [n/d] + real, pointer, contiguous :: Cm(:) !< transfer coefficient for momentum [n/d] + real, pointer, contiguous :: Ct(:) !< transfer coefficient for heat [n/d] + real, pointer, contiguous :: Km(:) !< eddy viscosity coeff. at h [m^2/s] + real, pointer, contiguous :: Pr_t_inv(:) !< inverse turbulent Prandtl number at h [n/d] #else real, allocatable :: zeta(:) !< = z/L [n/d] real, allocatable :: Rib(:) !< bulk Richardson number [n/d] @@ -137,6 +137,7 @@ module sfx_data integer(C_INT) :: surface_land integer(C_INT) :: surface_lake + real(C_FLOAT) :: kappa; real(C_FLOAT) :: gamma_c; real(C_FLOAT) :: Re_visc_min; real(C_FLOAT) :: h_charnock; diff --git a/srcF/sfx_fc_wrapper.F90 b/srcF/sfx_fc_wrapper.F90 index de16ec388125675daba6c2719445c832623174d5..0e84bb3f56d8e6b0134a4e40ea53118d72ff13bc 100644 --- a/srcF/sfx_fc_wrapper.F90 +++ b/srcF/sfx_fc_wrapper.F90 @@ -32,30 +32,6 @@ module C_FUNC contains - subroutine set_c_struct_sfx_surface_param_values(surface_param) - use sfx_data - use sfx_surface - implicit none - type (sfx_surface_param), intent(inout) :: surface_param - surface_param%surface_ocean = surface_ocean - surface_param%surface_land = surface_land - surface_param%surface_lake = surface_lake - - surface_param%gamma_c = gamma_c - surface_param%Re_visc_min = Re_visc_min - surface_param%h_charnock = h_charnock - surface_param%c1_charnock = c1_charnock - surface_param%c2_charnock = c2_charnock - surface_param%Re_rough_min = Re_rough_min - surface_param%B1_rough = B1_rough - surface_param%B2_rough = B2_rough - surface_param%B3_rough = B3_rough - surface_param%B4_rough = B4_rough - surface_param%B_max_lake = B_max_lake - surface_param%B_max_ocean = B_max_ocean - surface_param%B_max_land = B_max_land - end subroutine set_c_struct_sfx_surface_param_values - subroutine set_c_struct_sfx_phys_constants_values(constants) use sfx_data use sfx_phys_const diff --git a/srcF/sfx_sheba.f90 b/srcF/sfx_sheba.f90 index e0ae6868c86c8df86afa77b65135040de957bca7..ffa5ae399323d619259a9d845596fad262475432 100644 --- a/srcF/sfx_sheba.f90 +++ b/srcF/sfx_sheba.f90 @@ -90,7 +90,7 @@ contains call set_meteo_vec_c(meteo, meteo_c) call set_sfx_vec_c(sfx, sfx_c) - ! call get_surface_fluxes_sheba(c_loc(sfx_c), c_loc(meteo_c), model_param, surface_param, numerics_c, phys_constants, n) + call get_surface_fluxes_sheba(c_loc(sfx_c), c_loc(meteo_c), model_param, surface_param, numerics_c, phys_constants, n) deallocate(meteo_c) deallocate(sfx_c) diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90 index d5e369902c2992bfec5620cf08e1e4ea95637fb9..79ebca981a43d1dc6b1bfe91a939f78aacc26b59 100644 --- a/srcF/sfx_surface.f90 +++ b/srcF/sfx_surface.f90 @@ -15,6 +15,9 @@ module sfx_surface ! public interface ! -------------------------------------------------------------------------------- +#if defined(INCLUDE_CXX) + public :: set_c_struct_sfx_surface_param_values +#endif public :: get_charnock_roughness public :: get_thermal_roughness ! -------------------------------------------------------------------------------- @@ -104,6 +107,31 @@ contains end function +#if defined(INCLUDE_CXX) + subroutine set_c_struct_sfx_surface_param_values(surface_param) + use sfx_data + implicit none + type (sfx_surface_param), intent(inout) :: surface_param + surface_param%surface_ocean = surface_ocean + surface_param%surface_land = surface_land + surface_param%surface_lake = surface_lake + + surface_param%kappa = kappa + surface_param%gamma_c = gamma_c + surface_param%Re_visc_min = Re_visc_min + surface_param%h_charnock = h_charnock + surface_param%c1_charnock = c1_charnock + surface_param%c2_charnock = c2_charnock + surface_param%Re_rough_min = Re_rough_min + surface_param%B1_rough = B1_rough + surface_param%B2_rough = B2_rough + surface_param%B3_rough = B3_rough + surface_param%B4_rough = B4_rough + surface_param%B_max_lake = B_max_lake + surface_param%B_max_ocean = B_max_ocean + surface_param%B_max_land = B_max_land + end subroutine set_c_struct_sfx_surface_param_values +#endif ! charnock roughness definition ! -------------------------------------------------------------------------------- subroutine get_charnock_roughness(z0_m, u_dyn0, U, h, maxiters)