Skip to content
Snippets Groups Projects
sfx_compute_sheba.cu 17.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • 数学の武士's avatar
    .
    数学の武士 committed
    #include <iostream>
    #include "../includeCU/sfx_compute_sheba.cuh"
    #include "../includeCU/sfx_surface.cuh"
    
    
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
    {
       if (code != cudaSuccess) 
       {
          fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
          if (abort) exit(code);
       }
    }
    
    
    数学の武士's avatar
    .
    数学の武士 committed
    template<typename T>
    __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
        const T h, const T U,
        const T kappa, 
        const T h_charnock, const T c1_charnock, const T c2_charnock, 
        const int maxiters)
    {
        T Uc, a, b, c, c_min, f;
    
        Uc = U;
        a = 0.0;
        b = 25.0;
        c_min = log(h_charnock) / kappa;
    
        for (int i = 0; i < maxiters; i++)
        {
            f = c1_charnock - 2.0 * log(Uc);
            for (int j = 0; j < maxiters; j++)
            {
                c = (f + 2.0 * log(b)) / kappa;
                if (U <= 8.0e0) 
                    a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
                c = max(c - a, c_min);
                b = c;
            }
            z0_m = h_charnock * exp(-c * kappa);
            z0_m = max(z0_m, T(0.000015e0));
            Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
        }
        
        u_dyn0 = Uc / c;
    }
    
    template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0,
        const float h, const float U,
        const float kappa, 
        const float h_charnock, const float c1_charnock, const float c2_charnock, 
        const int maxiters);
    template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0, 
        const double h, const double U,
        const double kappa, 
        const double h_charnock, const double c1_charnock, const double c2_charnock,
        const int maxiters);
    
    template<typename T>
    __device__ void get_thermal_roughness(T &z0_t, T &B,
        const T z0_m, const T Re, 
        const T Re_rough_min, 
        const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough, 
        const T B_max_ocean, const T B_max_lake, const T B_max_land,
        const int surface_type)
    {
        // --- define B = log(z0_m / z0_t)
        if (Re <= Re_rough_min) 
            B = B1_rough * log(B3_rough * Re) + B2_rough;
        else
            // *: B4 takes into account Re value at z' ~ O(10) z0
            B = B4_rough * (pow(Re, B2_rough));
    
        //   --- apply max restriction based on surface type
        if (surface_type == 0) 
            B = min(B, B_max_ocean);
        else if (surface_type == 2) 
            B = min(B, B_max_lake);
        else if (surface_type == 1)
            B = min(B, B_max_land);
    
        // --- define roughness [thermal]
        z0_t = z0_m / exp(B);
    }
    
    template __device__ void get_thermal_roughness(float &z0_t, float &B,
        const float z0_m, const float Re, 
        const float Re_rough_min, 
        const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough, 
        const float B_max_ocean, const float B_max_lake, const float B_max_land,
        const int surface_type);
    template __device__ void get_thermal_roughness(double &z0_t, double &B,
        const double z0_m, const double Re, 
        const double Re_rough_min, 
        const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough, 
        const double B_max_ocean, const double B_max_lake, const double B_max_land,
        const int surface_type);
    
    template<typename T>
    __device__ void get_psi_mh(T &psi_m, T &psi_h,
        const T zeta_m, const T zeta_h,
        const T alpha_m, const T alpha_h,
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h)
    {
        T x_m, x_h;
        T q_m, q_h;
    
        if (zeta_m >= 0.0) 
        {
            q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
            x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
    
            psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
        }                                
        else
        {    x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
            psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
        }
    
        if (zeta_h >= 0.0)
        {    
            q_h = sqrt(c_h * c_h - 4.0);
            x_h = zeta_h;
    
            psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
        }
        else
        {
            x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
            psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
        }
    }
    
    template __device__ void get_psi_mh(float &psi_m, float &psi_h,
        const float zeta_m, const float zeta_h,
        const float alpha_m, const float alpha_h,
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h);
    template __device__ void get_psi_mh(double &psi_m, double &psi_h,
        const double zeta_m, const double zeta_h,
        const double alpha_m, const double alpha_h,
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h);
    
    template<typename T>
    __device__ void get_psi(T &psi_m, T &psi_h,
        const T zeta,
        const T alpha_m, const T alpha_h,
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h)
    {
        T x_m, x_h;
        T q_m, q_h;
    
        if (zeta >= 0.0) 
        {
            q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
            q_h = sqrt(c_h * c_h - 4.0);
    
            x_m = pow(1.0 + zeta, 1.0 / 3.0);
            x_h = zeta;
    
            psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
    
            psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
        }
        else
        {
            x_m = pow(1.0 - alpha_m * zeta, 0.25);
            x_h = pow(1.0 - alpha_h * zeta, 0.25); 
    
            psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
            psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
        }
    }
    
    template __device__ void get_psi(float &psi_m, float &psi_h,
        const float zeta,
        const float alpha_m, const float alpha_h,
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h);
    template __device__ void get_psi(double &psi_m, double &psi_h,
        const double zeta,
        const double alpha_m, const double alpha_h,
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h);
    
    template<typename T>
    __device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
        const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
        const T kappa, const T Pr_t_0_inv,
        const T alpha_m, const T alpha_h,
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h,
        const int maxiters)
    {
        T psi_m, psi_h, psi0_m, psi0_h, Linv;
        const T gamma = 0.61;
    
        Udyn = kappa * U / log(z / z0_m);
        Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
        Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
        zeta = 0.0;
    
        // --- no wind
        if (Udyn < 1e-5) 
            return;
    
        Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
        zeta = z * Linv;
    
        // --- near neutral case
        if (Linv < 1e-5) 
            return;
    
        for (int i = 0; i < maxiters; i++)
        {
            get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
            a_m, a_h, 
            b_m, b_h,
            c_h);
            
            get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, 
            alpha_m, alpha_h,
            a_m, a_h, 
            b_m, b_h,
            c_h);
    
            Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
            Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
            Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
    
            if (Udyn < 1e-5) 
                break;
    
            Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
            zeta = z * Linv;
        }
    }
    
    template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
        const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
        const float kappa, const float Pr_t_0_inv,
        const float alpha_m, const float alpha_h,
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h,
        const int maxiters);
    template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
        const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
        const double kappa, const double Pr_t_0_inv,
        const double alpha_m, const double alpha_h,
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h,
        const int maxiters);
    
    template<typename T>
    __device__ void get_phi(T &phi_m, T &phi_h,
        const T zeta, 
        const T alpha_m, const T alpha_h,
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h)
    {
        if (zeta >= 0.0) 
        {
            phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
            phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
        }
        else
        {
            phi_m = pow(1.0 - alpha_m * zeta, -0.25);
            phi_h = pow(1.0 - alpha_h * zeta, -0.5);
        }
    }
    
    template __device__ void get_phi(float &phi_m, float &phi_h,
        const float zeta, 
        const float alpha_m, const float alpha_h,
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h);
    template __device__ void get_phi(double &phi_m, double &phi_h,
        const double zeta, 
        const double alpha_m, const double alpha_h,
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h);
    
    template<typename T>
    __global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
        const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
        const T kappa, const T Pr_t_0_inv,
        const T alpha_m, const T alpha_h, 
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h,
        const T Re_rough_min, 
        const T B1_rough, const T B2_rough,
        const T B_max_land, const T B_max_ocean, const T B_max_lake,
        const T gamma_c, const T Re_visc_min,
        const T Pr_m, const T nu_air, const T g, 
        const int maxiters_charnock,
        const int grid_size)
    {
        const int index = blockIdx.x * blockDim.x + threadIdx.x;
    
        T h, U, dT, Tsemi, dQ, z0_m;
        T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
        zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
        Km, Pr_t_inv, Cm, Ct;
    
        const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
        const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
    
        int surface_type;
    
        if(index < grid_size)
        {
            U = U_[index];
            Tsemi = Tsemi_[index];
            dT = dT_[index];
            dQ = dQ_[index];
            h = h_[index];
            z0_m = in_z0_m_[index];
    
            if (z0_m < 0.0) surface_type = 0;
            else            surface_type = 1;
    
            if (surface_type == 0) 
            {
                get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
                h0_m = h / z0_m;
            }
            if (surface_type == 1) 
            {
                h0_m = h / z0_m;
                u_dyn0 = U * kappa / log(h0_m);
            }
    
            Re = u_dyn0 * z0_m / nu_air;
            get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
    
            // --- define relative height [thermal]
            h0_t = h / z0_t;
    
            // --- define Ri-bulk
            Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
    
            // --- get the fluxes
            // ----------------------------------------------------------------------------
            get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
            // ----------------------------------------------------------------------------
    
            get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
            // ----------------------------------------------------------------------------
    
            // --- define transfer coeff. (momentum) & (heat)
            Cm = 0.0;
            if (U > 0.0)
                Cm = Udyn / U;
            Ct = 0.0;
            if (fabs(dT) > 0.0) 
                Ct = Tdyn / dT;
    
            // --- define eddy viscosity & inverse Prandtl number
            Km = kappa * Cm * U * h / phi_m;
            Pr_t_inv = phi_m / phi_h;
    
    
            zeta_[index]         = zeta;
            Rib_[index]          = Rib;
            Re_[index]           = Re;
            B_[index]            = B;
            z0_m_[index]         = z0_m;
            z0_t_[index]         = z0_t;
    
    数学の武士's avatar
    .
    数学の武士 committed
            Rib_conv_lim_[index] = 0.0;
    
            Cm_[index]           = Cm;
            Ct_[index]           = Ct;
            Km_[index]           = Km;
            Pr_t_inv_[index]     = Pr_t_inv;
    
    数学の武士's avatar
    .
    数学の武士 committed
        }
    }
    
    template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
        const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
        const float kappa, const float Pr_t_0_inv,
        const float alpha_m, const float alpha_h, 
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h,
        const float Re_rough_min, 
        const float B1_rough, const float B2_rough,
        const float B_max_land, const float B_max_ocean, const float B_max_lake,
        const float gamma_c, const float Re_visc_min,
        const float Pr_m, const float nu_air, const float g, 
        const int maxiters_charnock,
        const int grid_size);
    template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
        const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
        const double kappa, const double Pr_t_0_inv,
        const double alpha_m, const double alpha_h, 
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h,
        const double Re_rough_min, 
        const double B1_rough, const double B2_rough,
        const double B_max_land, const double B_max_ocean, const double B_max_lake,
        const double gamma_c, const double Re_visc_min,
        const double Pr_m, const double nu_air, const double g, 
        const int maxiters_charnock,
        const int grid_size);
    
    template<typename T>
    void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
        const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
        const T kappa, const T Pr_t_0_inv,
        const T alpha_m, const T alpha_h, 
        const T a_m, const T a_h, 
        const T b_m, const T b_h,
        const T c_h,
        const T Re_rough_min, 
        const T B1_rough, const T B2_rough,
        const T B_max_land, const T B_max_ocean, const T B_max_lake,
        const T gamma_c, const T Re_visc_min,
        const T Pr_m, const T nu_air, const T g, 
        const int maxiters_charnock,
        const int grid_size)
    {
    
        const int BlockCount = int(ceil(float(grid_size) / 512.0));
        dim3 cuBlock = dim3(512, 1, 1);
    
    数学の武士's avatar
    .
    数学の武士 committed
    	dim3 cuGrid = dim3(BlockCount, 1, 1);
    
        kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
        U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
        kappa, Pr_t_0_inv, 
        alpha_m, alpha_h, 
        a_m, a_h, 
        b_m, b_h,
        c_h,
        Re_rough_min, 
        B1_rough, B2_rough,
        B_max_land, B_max_ocean, B_max_lake,
        gamma_c, Re_visc_min,
        Pr_m, nu_air, g, 
        maxiters_charnock, 
        grid_size);
    
        gpuErrchk( cudaPeekAtLastError() );
    
    数学の武士's avatar
    .
    数学の武士 committed
    }
    
    template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
        const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
        const float kappa, const float Pr_t_0_inv,
        const float alpha_m, const float alpha_h, 
        const float a_m, const float a_h, 
        const float b_m, const float b_h,
        const float c_h,
        const float Re_rough_min, 
        const float B1_rough, const float B2_rough,
        const float B_max_land, const float B_max_ocean, const float B_max_lake,
        const float gamma_c, const float Re_visc_min,
        const float Pr_m, const float nu_air, const float g, 
        const int maxiters_charnock,
        const int grid_size);
    template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
        const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
        const double kappa, const double Pr_t_0_inv,
        const double alpha_m, const double alpha_h, 
        const double a_m, const double a_h, 
        const double b_m, const double b_h,
        const double c_h,
        const double Re_rough_min, 
        const double B1_rough, const double B2_rough,
        const double B_max_land, const double B_max_ocean, const double B_max_lake,
        const double gamma_c, const double Re_visc_min,
        const double Pr_m, const double nu_air, const double g, 
        const int maxiters_charnock,
        const int grid_size);