#include <cuda.h> #include <cuda_runtime_api.h> #include "sfx_sheba.h" #include "sfx_model_compute_subfunc.cuh" #include "sfx_surface.cuh" #include "sfx_memory_processing.cuh" namespace sfx_kernel { template<typename T> __global__ void compute_flux(sfxDataVecTypeC sfx, meteoDataVecTypeC meteo, const sfx_sheba_param model_param, const sfx_surface_param surface_param, const sfx_sheba_numericsTypeC numerics, const sfx_phys_constants phys_constants, const int grid_size); } template<typename T> __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx, meteoDataVecTypeC meteo, const sfx_sheba_param model_param, const sfx_surface_param surface_param, const sfx_sheba_numericsTypeC numerics, const sfx_phys_constants phys_constants, const int grid_size) { const int index = blockIdx.x * blockDim.x + threadIdx.x; T h, U, dT, Tsemi, dQ, z0_m; T z0_t, B, h0_m, h0_t, u_dyn0, Re, zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h, Km, Pr_t_inv, Cm, Ct; int surface_type; if(index < grid_size) { U = meteo.U[index]; Tsemi = meteo.Tsemi[index]; dT = meteo.dT[index]; dQ = meteo.dQ[index]; h = meteo.h[index]; z0_m = meteo.z0_m[index]; surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land; if (surface_type == surface_param.surface_ocean) { get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock); h0_m = h / z0_m; } if (surface_type == surface_param.surface_land) { h0_m = h / z0_m; u_dyn0 = U * model_param.kappa / log(h0_m); } Re = u_dyn0 * z0_m / phys_constants.nu_air; get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type); // --- define relative height [thermal] h0_t = h / z0_t; // --- define Ri-bulk Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U); // --- get the fluxes // ---------------------------------------------------------------------------- get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10); // ---------------------------------------------------------------------------- get_phi(phi_m, phi_h, zeta, model_param); // ---------------------------------------------------------------------------- // --- define transfer coeff. (momentum) & (heat) Cm = 0.0; if (U > 0.0) Cm = Udyn / U; Ct = 0.0; if (fabs(dT) > 0.0) Ct = Tdyn / dT; // --- define eddy viscosity & inverse Prandtl number Km = model_param.kappa * Cm * U * h / phi_m; Pr_t_inv = phi_m / phi_h; sfx.zeta[index] = zeta; sfx.Rib[index] = Rib; sfx.Re[index] = Re; sfx.B[index] = B; sfx.z0_m[index] = z0_m; sfx.z0_t[index] = z0_t; sfx.Rib_conv_lim[index] = T(0.0); sfx.Cm[index] = Cm; sfx.Ct[index] = Ct; sfx.Km[index] = Km; sfx.Pr_t_inv[index] = Pr_t_inv; } } template<typename T, MemType memIn, MemType memOut > void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux() { const int BlockCount = int(ceil(float(grid_size) / 1024.0)); dim3 cuBlock = dim3(1024, 1, 1); dim3 cuGrid = dim3(BlockCount, 1, 1); sfx_kernel::compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model_param, surface_param, numerics, phys_constants, grid_size); if(MemType::GPU != memOut) { const size_t new_size = grid_size * sizeof(T); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size); memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size); } } template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>; template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>; template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>; template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>; template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>; template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>; template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>; template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>; template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>; template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>; template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::GPU>; template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>; template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>; template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>;