diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f9868dd5a07b1a79628b250d1d661c31b3c546f..3a846f67f2da724bca3dd54cf5219ca537954208 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -93,10 +93,12 @@ if(INCLUDE_CUDA) set(SOURCES_CU srcCU/sfx_surface.cu srcCU/sfx_compute_esm.cu + srcCU/sfx_compute_sheba.cu ) set(HEADERS_CU includeCU/sfx_surface.cuh includeCU/sfx_compute_esm.cuh + includeCU/sfx_compute_sheba.cuh ) endif(INCLUDE_CUDA) @@ -127,7 +129,7 @@ if(INCLUDE_CXX OR INCLUDE_CUDA) set(CMAKE_CXX_FLAGS " -g -Wunused-variable ") set(CMAKE_C_FLAGS " -g ") - # set(CMAKE_CUDA_FLAGS " --device-c ") + set(CMAKE_CUDA_FLAGS " -g ") endif(INCLUDE_CXX OR INCLUDE_CUDA) add_executable(drag ${SOURCES}) diff --git a/includeCXX/sfx_sheba.h b/includeCXX/sfx_sheba.h index ce911e0f978528b89fe0a2a35cb5312a2690dfb1..676980f1fe273a040481b2a3dc414ab84c7daba8 100644 --- a/includeCXX/sfx_sheba.h +++ b/includeCXX/sfx_sheba.h @@ -9,12 +9,12 @@ private: T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m; T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv; - T kappa, Pr_t_0_inv, Pr_t_inf_inv, + T kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, - Rib_max, Re_rough_min, + Re_rough_min, B1_rough, B2_rough, B_max_land, B_max_ocean, B_max_lake, gamma_c, diff --git a/srcCU/sfx_compute_sheba.cu b/srcCU/sfx_compute_sheba.cu new file mode 100644 index 0000000000000000000000000000000000000000..fd80dd51e56be4f09803bf52b6516ae5ab05913a --- /dev/null +++ b/srcCU/sfx_compute_sheba.cu @@ -0,0 +1,476 @@ +#include <cmath> +#include <iostream> +#include "../includeCU/sfx_compute_sheba.cuh" +#include "../includeCU/sfx_surface.cuh" + +template<typename T> +__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0, + const T h, const T U, + const T kappa, + const T h_charnock, const T c1_charnock, const T c2_charnock, + const int maxiters) +{ + T Uc, a, b, c, c_min, f; + + Uc = U; + a = 0.0; + b = 25.0; + c_min = log(h_charnock) / kappa; + + for (int i = 0; i < maxiters; i++) + { + f = c1_charnock - 2.0 * log(Uc); + for (int j = 0; j < maxiters; j++) + { + c = (f + 2.0 * log(b)) / kappa; + if (U <= 8.0e0) + a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa; + c = max(c - a, c_min); + b = c; + } + z0_m = h_charnock * exp(-c * kappa); + z0_m = max(z0_m, T(0.000015e0)); + Uc = U * log(h_charnock / z0_m) / log(h / z0_m); + } + + u_dyn0 = Uc / c; +} + +template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0, + const float h, const float U, + const float kappa, + const float h_charnock, const float c1_charnock, const float c2_charnock, + const int maxiters); +template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0, + const double h, const double U, + const double kappa, + const double h_charnock, const double c1_charnock, const double c2_charnock, + const int maxiters); + +template<typename T> +__device__ void get_thermal_roughness(T &z0_t, T &B, + const T z0_m, const T Re, + const T Re_rough_min, + const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough, + const T B_max_ocean, const T B_max_lake, const T B_max_land, + const int surface_type) +{ + // --- define B = log(z0_m / z0_t) + if (Re <= Re_rough_min) + B = B1_rough * log(B3_rough * Re) + B2_rough; + else + // *: B4 takes into account Re value at z' ~ O(10) z0 + B = B4_rough * (pow(Re, B2_rough)); + + // --- apply max restriction based on surface type + if (surface_type == 0) + B = min(B, B_max_ocean); + else if (surface_type == 2) + B = min(B, B_max_lake); + else if (surface_type == 1) + B = min(B, B_max_land); + + // --- define roughness [thermal] + z0_t = z0_m / exp(B); +} + +template __device__ void get_thermal_roughness(float &z0_t, float &B, + const float z0_m, const float Re, + const float Re_rough_min, + const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough, + const float B_max_ocean, const float B_max_lake, const float B_max_land, + const int surface_type); +template __device__ void get_thermal_roughness(double &z0_t, double &B, + const double z0_m, const double Re, + const double Re_rough_min, + const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough, + const double B_max_ocean, const double B_max_lake, const double B_max_land, + const int surface_type); + +template<typename T> +__device__ void get_psi_mh(T &psi_m, T &psi_h, + const T zeta_m, const T zeta_h, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h) +{ + T x_m, x_h; + T q_m, q_h; + + if (zeta_m >= 0.0) + { + q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); + x_m = pow(1.0 + zeta_m, 1.0 / 3.0); + + psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); + } + else + { x_m = pow(1.0 - alpha_m * zeta_m, 0.25); + psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); + } + + if (zeta_h >= 0.0) + { + q_h = sqrt(c_h * c_h - 4.0); + x_h = zeta_h; + + psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); + } + else + { + x_h = pow(1.0 - alpha_h * zeta_h, 0.25); + psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); + } +} + +template __device__ void get_psi_mh(float &psi_m, float &psi_h, + const float zeta_m, const float zeta_h, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h); +template __device__ void get_psi_mh(double &psi_m, double &psi_h, + const double zeta_m, const double zeta_h, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h); + +template<typename T> +__device__ void get_psi(T &psi_m, T &psi_h, + const T zeta, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h) +{ + T x_m, x_h; + T q_m, q_h; + + if (zeta >= 0.0) + { + q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0); + q_h = sqrt(c_h * c_h - 4.0); + + x_m = pow(1.0 + zeta, 1.0 / 3.0); + x_h = zeta; + + psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m)))); + + psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h))); + } + else + { + x_m = pow(1.0 - alpha_m * zeta, 0.25); + x_h = pow(1.0 - alpha_h * zeta, 0.25); + + psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m); + psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h)); + } +} + +template __device__ void get_psi(float &psi_m, float &psi_h, + const float zeta, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h); +template __device__ void get_psi(double &psi_m, double &psi_h, + const double zeta, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h); + +template<typename T> +__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta, + const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta, + const T kappa, const T Pr_t_0_inv, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h, + const int maxiters) +{ + T psi_m, psi_h, psi0_m, psi0_h, Linv; + const T gamma = 0.61; + + Udyn = kappa * U / log(z / z0_m); + Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t); + Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t); + zeta = 0.0; + + // --- no wind + if (Udyn < 1e-5) + return; + + Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); + zeta = z * Linv; + + // --- near neutral case + if (Linv < 1e-5) + return; + + for (int i = 0; i < maxiters; i++) + { + get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h, + a_m, a_h, + b_m, b_h, + c_h); + + get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, + alpha_m, alpha_h, + a_m, a_h, + b_m, b_h, + c_h); + + Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m)); + Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); + Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); + + if (Udyn < 1e-5) + break; + + Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); + zeta = z * Linv; + } +} + +template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta, + const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta, + const float kappa, const float Pr_t_0_inv, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h, + const int maxiters); +template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta, + const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta, + const double kappa, const double Pr_t_0_inv, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h, + const int maxiters); + +template<typename T> +__device__ void get_phi(T &phi_m, T &phi_h, + const T zeta, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h) +{ + if (zeta >= 0.0) + { + phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta); + phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta); + } + else + { + phi_m = pow(1.0 - alpha_m * zeta, -0.25); + phi_h = pow(1.0 - alpha_h * zeta, -0.5); + } +} + +template __device__ void get_phi(float &phi_m, float &phi_h, + const float zeta, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h); +template __device__ void get_phi(double &phi_m, double &phi_h, + const double zeta, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h); + +template<typename T> +__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, + const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, + const T kappa, const T Pr_t_0_inv, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h, + const T Re_rough_min, + const T B1_rough, const T B2_rough, + const T B_max_land, const T B_max_ocean, const T B_max_lake, + const T gamma_c, const T Re_visc_min, + const T Pr_m, const T nu_air, const T g, + const int maxiters_charnock, + const int grid_size) +{ + const int index = blockIdx.x * blockDim.x + threadIdx.x; + + T h, U, dT, Tsemi, dQ, z0_m; + T z0_t, B, h0_m, h0_t, u_dyn0, Re, + zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, + Km, Pr_t_inv, Cm, Ct; + + const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8)); + const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock; + + int surface_type; + + if(index < grid_size) + { + U = U_[index]; + Tsemi = Tsemi_[index]; + dT = dT_[index]; + dQ = dQ_[index]; + h = h_[index]; + z0_m = in_z0_m_[index]; + + if (z0_m < 0.0) surface_type = 0; + else surface_type = 1; + + if (surface_type == 0) + { + get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock); + h0_m = h / z0_m; + } + if (surface_type == 1) + { + h0_m = h / z0_m; + u_dyn0 = U * kappa / log(h0_m); + } + + Re = u_dyn0 * z0_m / nu_air; + get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type); + + // --- define relative height [thermal] + h0_t = h / z0_t; + + // --- define Ri-bulk + Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); + + // --- get the fluxes + // ---------------------------------------------------------------------------- + get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10); + // ---------------------------------------------------------------------------- + + get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h); + // ---------------------------------------------------------------------------- + + // --- define transfer coeff. (momentum) & (heat) + Cm = 0.0; + if (U > 0.0) + Cm = Udyn / U; + Ct = 0.0; + if (fabs(dT) > 0.0) + Ct = Tdyn / dT; + + // --- define eddy viscosity & inverse Prandtl number + Km = kappa * Cm * U * h / phi_m; + Pr_t_inv = phi_m / phi_h; + + zeta_[index] = 0.0; + Rib_[index] = 0.0; + Re_[index] = 0.0; + B_[index] = 0.0; + z0_m_[index] = 0.0; + z0_t_[index] = 0.0; + Rib_conv_lim_[index] = 0.0; + Cm_[index] = 0.0; + Ct_[index] = 0.0; + Km_[index] = 0.0; + Pr_t_inv_[index] = 0.0; + } +} + +template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, + const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_, + const float kappa, const float Pr_t_0_inv, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h, + const float Re_rough_min, + const float B1_rough, const float B2_rough, + const float B_max_land, const float B_max_ocean, const float B_max_lake, + const float gamma_c, const float Re_visc_min, + const float Pr_m, const float nu_air, const float g, + const int maxiters_charnock, + const int grid_size); +template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_, + const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_, + const double kappa, const double Pr_t_0_inv, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h, + const double Re_rough_min, + const double B1_rough, const double B2_rough, + const double B_max_land, const double B_max_ocean, const double B_max_lake, + const double gamma_c, const double Re_visc_min, + const double Pr_m, const double nu_air, const double g, + const int maxiters_charnock, + const int grid_size); + +template<typename T> +void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, + const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_, + const T kappa, const T Pr_t_0_inv, + const T alpha_m, const T alpha_h, + const T a_m, const T a_h, + const T b_m, const T b_h, + const T c_h, + const T Re_rough_min, + const T B1_rough, const T B2_rough, + const T B_max_land, const T B_max_ocean, const T B_max_lake, + const T gamma_c, const T Re_visc_min, + const T Pr_m, const T nu_air, const T g, + const int maxiters_charnock, + const int grid_size) +{ + const int BlockCount = int(ceil(float(grid_size) / 1024.0)); + dim3 cuBlock = dim3(1024, 1, 1); + dim3 cuGrid = dim3(BlockCount, 1, 1); + + kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, + U_, dT_, Tsemi_, dQ_, h_, in_z0_m_, + kappa, Pr_t_0_inv, + alpha_m, alpha_h, + a_m, a_h, + b_m, b_h, + c_h, + Re_rough_min, + B1_rough, B2_rough, + B_max_land, B_max_ocean, B_max_lake, + gamma_c, Re_visc_min, + Pr_m, nu_air, g, + maxiters_charnock, + grid_size); +} + +template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_, + const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_, + const float kappa, const float Pr_t_0_inv, + const float alpha_m, const float alpha_h, + const float a_m, const float a_h, + const float b_m, const float b_h, + const float c_h, + const float Re_rough_min, + const float B1_rough, const float B2_rough, + const float B_max_land, const float B_max_ocean, const float B_max_lake, + const float gamma_c, const float Re_visc_min, + const float Pr_m, const float nu_air, const float g, + const int maxiters_charnock, + const int grid_size); +template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_, + const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_, + const double kappa, const double Pr_t_0_inv, + const double alpha_m, const double alpha_h, + const double a_m, const double a_h, + const double b_m, const double b_h, + const double c_h, + const double Re_rough_min, + const double B1_rough, const double B2_rough, + const double B_max_land, const double B_max_ocean, const double B_max_lake, + const double gamma_c, const double Re_visc_min, + const double Pr_m, const double nu_air, const double g, + const int maxiters_charnock, + const int grid_size); \ No newline at end of file diff --git a/srcCXX/sfx_call_class_func.cpp b/srcCXX/sfx_call_class_func.cpp index 9bc7ac49ada2ae569f714814dade1bbb4df7800b..7b307665527d325f3dfab54dedc3d6ed69c1f25e 100644 --- a/srcCXX/sfx_call_class_func.cpp +++ b/srcCXX/sfx_call_class_func.cpp @@ -57,7 +57,6 @@ void surf_flux_sheba_CXX (float *zeta_, float *Rib_, float *Re_, float *B_, floa #else static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F; #endif - F.set_params(grid_size, kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, diff --git a/srcCXX/sfx_compute_sheba.cpp b/srcCXX/sfx_compute_sheba.cpp index 1aa725234f7e3b98e7ad87381d1485b2472817d5..48463f1a71afdaccde27146f98392f972da6c410 100644 --- a/srcCXX/sfx_compute_sheba.cpp +++ b/srcCXX/sfx_compute_sheba.cpp @@ -145,7 +145,8 @@ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta, Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h)); - if (Udyn < 1e-5) break; + if (Udyn < 1e-5) + break; Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn); zeta = z * Linv; diff --git a/srcCXX/sfx_esm.cpp b/srcCXX/sfx_esm.cpp index 5d0a9dec3a0c56ca98117a212f69e37674a11ec1..4e1825a17c1f276aea363c5b8977f6dbd00756da 100644 --- a/srcCXX/sfx_esm.cpp +++ b/srcCXX/sfx_esm.cpp @@ -65,6 +65,8 @@ void FluxEsm<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size); allocated_size = 0; memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size); + + ifAllocated = true; } if(RunMem != memOut) { diff --git a/srcCXX/sfx_sheba.cpp b/srcCXX/sfx_sheba.cpp index 66d9a8118ca7028afd283e8d3d69e28cb68f7f84..af52cc2c78fcc2578815b2b9a75e02a96ff04c7e 100644 --- a/srcCXX/sfx_sheba.cpp +++ b/srcCXX/sfx_sheba.cpp @@ -49,7 +49,7 @@ void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const c_h = c_h_, Re_rough_min = Re_rough_min_, B_max_lake = B_max_lake_, B1_rough = B1_rough_, B2_rough = B2_rough_, - B_max_land = B_max_land_, B_max_ocean = B_max_ocean_, + B_max_land = B_max_land_, B_max_ocean = B_max_ocean_, B_max_lake = B_max_lake_, gamma_c = gamma_c_, Re_visc_min = Re_visc_min_, Pr_m = Pr_m_, nu_air = nu_air_, g = g_; @@ -72,6 +72,8 @@ void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size); allocated_size = 0; memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size); + + ifAllocated = true; } if(RunMem != memOut) { @@ -99,6 +101,26 @@ void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const allocated_size = 0; memproc::realloc<RunMem>((void *&)(Pr_t_inv), allocated_size, new_size); + // T* del = new T[grid_size]; + // for (int i = 0; i < grid_size; i++) + // del[i] = i; + + // memproc::memcopy<RunMem, memOut>(Cm, del, new_size); + + // for (int i = 0; i < grid_size; i++) + // del[i] = 0; + + // printf("before\n"); + // for (int i = 0; i < 10; i++) + // printf("%f\n", del[i]); + + // memproc::memcopy<memOut, RunMem>(del, Cm, new_size); + + // for (int i = 0; i < grid_size; i++) + // printf("%f\n", del[i]); + + // delete[] del; + ifAllocated = true; } } @@ -199,36 +221,37 @@ void FluxSheba<T, memIn, memOut, RunMem>::compute_flux_sheba(T *zeta_, T *Rib_, set_data(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, U_, dT_, Tsemi_, dQ_, h_, in_z0_m_); - if(RunMem == MemType::CPU) compute_flux_sheba_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, - U, dT, Tsemi, dQ, h, in_z0_m, - kappa, Pr_t_0_inv, - alpha_m, alpha_h, - a_m, a_h, - b_m, b_h, - c_h, - Re_rough_min, - B1_rough, B2_rough, - B_max_land, B_max_ocean, B_max_lake, - gamma_c, Re_visc_min, - Pr_m, nu_air, g, - maxiters_charnock, - grid_size); + if(RunMem == MemType::CPU) + compute_flux_sheba_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, + U, dT, Tsemi, dQ, h, in_z0_m, + kappa, Pr_t_0_inv, + alpha_m, alpha_h, + a_m, a_h, + b_m, b_h, + c_h, + Re_rough_min, + B1_rough, B2_rough, + B_max_land, B_max_ocean, B_max_lake, + gamma_c, Re_visc_min, + Pr_m, nu_air, g, + maxiters_charnock, + grid_size); #ifdef INCLUDE_CUDA - // else compute_flux_sheba_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, - // U, dT, Tsemi, dQ, h, in_z0_m, - // kappa, Pr_t_0_inv, - // alpha_m, alpha_h, - // a_m, a_h, - // b_m, b_h, - // c_h, - // Re_rough_min, - // B1_rough, B2_rough, - // B_max_land, B_max_ocean, B_max_lake, - // gamma_c, Re_visc_min, - // Pr_m, nu_air, g, - // maxiters_charnock, - // grid_size); + else compute_flux_sheba_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv, + U, dT, Tsemi, dQ, h, in_z0_m, + kappa, Pr_t_0_inv, + alpha_m, alpha_h, + a_m, a_h, + b_m, b_h, + c_h, + Re_rough_min, + B1_rough, B2_rough, + B_max_land, B_max_ocean, B_max_lake, + gamma_c, Re_visc_min, + Pr_m, nu_air, g, + maxiters_charnock, + grid_size); #endif if(RunMem != memOut) diff --git a/srcCXX/sfx_surface.cpp b/srcCXX/sfx_surface.cpp index 2bb012c4d345132757e487890081050acaa2f5fa..676055865f5fa86936b59b45ad588321e865f621 100644 --- a/srcCXX/sfx_surface.cpp +++ b/srcCXX/sfx_surface.cpp @@ -64,9 +64,9 @@ void get_thermal_roughness(T &z0_t, T &B, // --- apply max restriction based on surface type if (surface_type == 0) B = std::min(B, B_max_ocean); - else if (surface_type == 1) + else if (surface_type == 2) B = std::min(B, B_max_lake); - else if (surface_type == 2) + else if (surface_type == 1) B = std::min(B, B_max_land); // --- define roughness [thermal] diff --git a/srcF/sfx_sheba.f90 b/srcF/sfx_sheba.f90 index 26231a763c69ec0c2f216c92278ce71268a12c71..8befa12a579c8bd0c48c574ff7a55028d4555497 100644 --- a/srcF/sfx_sheba.f90 +++ b/srcF/sfx_sheba.f90 @@ -11,6 +11,10 @@ module sfx_sheba use sfx_data use sfx_surface use sfx_sheba_param + +#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX) + use C_FUNC +#endif ! -------------------------------------------------------------------------------- ! directives list @@ -50,7 +54,23 @@ contains type (sfxDataType) sfx_cell integer i ! ---------------------------------------------------------------------------- - +#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX) + call get_surface_fluxes_sheba(sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t, & + sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, & + meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, & + kappa, Pr_t_0_inv, & + alpha_m, alpha_h, & + a_m, a_h, & + b_m, b_h, & + c_h, & + Re_rough_min, & + B1_rough, B2_rough, & + B_max_land, B_max_ocean, B_max_lake, & + gamma_c, Re_visc_min, & + Pr_m, nu_air, g, & + numerics%maxiters_charnock, & + n) +#else do i = 1, n meteo_cell = meteoDataType(& h = meteo%h(i), & @@ -61,7 +81,7 @@ contains call push_sfx_data(sfx, sfx_cell, i) end do - +#endif end subroutine get_surface_fluxes_vec ! --------------------------------------------------------------------------------