diff --git a/includeCU/sfx_compute_sheba.cuh b/includeCU/sfx_compute_sheba.cuh
deleted file mode 100644
index acd67d6ec11ad91aeef3f7291e4c283a2c9e9a2d..0000000000000000000000000000000000000000
--- a/includeCU/sfx_compute_sheba.cuh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once 
-
-template<typename T>
-void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/includeCU/sfx_esm_compute_subfunc.cuh b/includeCU/sfx_esm_compute_subfunc.cuh
deleted file mode 100644
index 46812fa9320532355569ec82040a6be9f3a900f5..0000000000000000000000000000000000000000
--- a/includeCU/sfx_esm_compute_subfunc.cuh
+++ /dev/null
@@ -1,144 +0,0 @@
-#pragma once 
-#include "../includeC/sfx_data.h"
-#include "../includeCU/sfx_math.cuh"
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
-    const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    T psi_m, psi_h, f_m, f_h, c;
-
-    c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4);
-    zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - 
-                sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
-
-    f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25);
-    f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim);
-
-    f_m = zeta_lim / h0_m;
-    f_h = zeta_lim / h0_t;
-    if (fabs(B) < 1.0e-10) f_h = f_m;
-
-    f_m = pow(1.0 - param.alpha_m * f_m, 0.25);
-    f_h = sqrt(1.0 - param.alpha_h_fix * f_h);
-
-    psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0)));
-    psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv;
-
-    Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
-    const T Rib, const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    T Rib_coeff, psi0_m, psi0_h, phi, c;
-    
-    psi0_m = log(h0_m);
-    psi0_h = B / psi0_m;
-
-    Rib_coeff = param.beta_m * Rib;
-    c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff;
-    zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff));
-
-    phi = param.beta_m * zeta;
-
-    psi_m = psi0_m + phi;
-    psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, 
-    const T Rib, const T h0_m, const T h0_t, const T B, 
-    const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
-    const sfx_esm_param& param,
-    const sfx_esm_numericsTypeC& numerics)
-{
-    T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
-
-    p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0));
-    p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0));
-
-    zeta = zeta_conv_lim;
-
-    for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
-    {
-        zeta0_m = zeta / h0_m;
-        zeta0_h = zeta / h0_t;
-        if (fabs(B) < 1.0e-10) 
-            zeta0_h = zeta0_m;
-
-        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25);
-        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
-
-        a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0));
-        a_h = log((f0_h + 1.0)/(f0_h - 1.0));
-
-        c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0);
-        f = 3.0 * (1.0 - c_lim);
-
-        psi_m = f / f_m_conv_lim + p_m + a_m;
-        psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv;
-
-        if (i == numerics.maxiters_convection + 1) 
-            break;
-
-        zeta = Rib * psi_m * psi_m / psi_h;
-    }
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,   
-    const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    zeta = 0.0;
-    psi_m = log(h0_m);
-    psi_h = log(h0_t) / param.Pr_t_0_inv;
-    if (fabs(B) < 1.0e-10) 
-        psi_h = psi_m / param.Pr_t_0_inv;
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
-    const T Rib, const T h0_m, const T h0_t, const T B, 
-    const sfx_esm_param& param,
-    const sfx_esm_numericsTypeC& numerics)
-{
-    T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
-
-    psi_m = log(h0_m);
-    psi_h = log(h0_t);
-
-    if (fabs(B) < 1.0e-10) 
-        psi_h = psi_m;
-
-    zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h;
-
-    for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
-    {
-        zeta0_m = zeta / h0_m;
-        zeta0_h = zeta / h0_t;
-        if (fabs(B) < 1.0e-10) 
-            zeta0_h = zeta0_m;
-
-        f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0);
-        f_h = sqrt(1.0 - param.alpha_h_fix * zeta);
-
-        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0);
-        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
-
-        f0_m = sfx_math::max(f0_m, T(1.000001e0));
-        f0_h = sfx_math::max(f0_h, T(1.000001e0));
-
-        psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
-        psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv;
-
-        if (i == numerics.maxiters_convection + 1) 
-            break;
-
-        zeta = Rib * psi_m * psi_m / psi_h;
-    }
-}
\ No newline at end of file
diff --git a/includeCU/sfx_surface.cuh b/includeCU/sfx_surface.cuh
index 60a0aeba91e6542a9a5766634071f455d58c0513..9cd656933065a6347e47eb4c340384c3315b1ed1 100644
--- a/includeCU/sfx_surface.cuh
+++ b/includeCU/sfx_surface.cuh
@@ -14,8 +14,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
 
     //   --- apply max restriction based on surface type
     if (surface_type == param.surface_ocean)  B = sfx_math::min(B, param.B_max_ocean);
-    if (surface_type == param.surface_lake)   B = sfx_math::min(B, param.B_max_land);
-    if (surface_type == param.surface_land)   B = sfx_math::min(B, param.B_max_lake);
+    if (surface_type == param.surface_lake)   B = sfx_math::min(B, param.B_max_lake);
+    if (surface_type == param.surface_land)   B = sfx_math::min(B, param.B_max_land);
 
     // --- define roughness [thermal]
     z0_t = z0_m / exp(B);
@@ -23,7 +23,7 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
 
 template<typename T>
 FUCNTION_DECLARATION_SPECIFIER void get_charnock_roughness(T &z0_m, T &u_dyn0,
-    const T h, const T U,
+    const T U, const T h,
     const sfx_surface_param& surface_param,
     const int maxiters)
 {
diff --git a/srcCU/sfx_compute_sheba.cu b/srcCU/sfx_compute_sheba.cu
deleted file mode 100644
index 56a93bb49c28ac039c681e6ad71ffc1aa6f33f87..0000000000000000000000000000000000000000
--- a/srcCU/sfx_compute_sheba.cu
+++ /dev/null
@@ -1,486 +0,0 @@
-#include <iostream>
-#include "../includeCU/sfx_compute_sheba.cuh"
-#include "../includeCU/sfx_surface.cuh"
-
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess) 
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-
-template<typename T>
-__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
-    const T h, const T U,
-    const T kappa, 
-    const T h_charnock, const T c1_charnock, const T c2_charnock, 
-    const int maxiters)
-{
-    T Uc, a, b, c, c_min, f;
-
-    Uc = U;
-    a = 0.0;
-    b = 25.0;
-    c_min = log(h_charnock) / kappa;
-
-    for (int i = 0; i < maxiters; i++)
-    {
-        f = c1_charnock - 2.0 * log(Uc);
-        for (int j = 0; j < maxiters; j++)
-        {
-            c = (f + 2.0 * log(b)) / kappa;
-            if (U <= 8.0e0) 
-                a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
-            c = max(c - a, c_min);
-            b = c;
-        }
-        z0_m = h_charnock * exp(-c * kappa);
-        z0_m = max(z0_m, T(0.000015e0));
-        Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
-    }
-    
-    u_dyn0 = Uc / c;
-}
-
-template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0,
-    const float h, const float U,
-    const float kappa, 
-    const float h_charnock, const float c1_charnock, const float c2_charnock, 
-    const int maxiters);
-template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0, 
-    const double h, const double U,
-    const double kappa, 
-    const double h_charnock, const double c1_charnock, const double c2_charnock,
-    const int maxiters);
-
-template<typename T>
-__device__ void get_thermal_roughness(T &z0_t, T &B,
-    const T z0_m, const T Re, 
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough, 
-    const T B_max_ocean, const T B_max_lake, const T B_max_land,
-    const int surface_type)
-{
-    // --- define B = log(z0_m / z0_t)
-    if (Re <= Re_rough_min) 
-        B = B1_rough * log(B3_rough * Re) + B2_rough;
-    else
-        // *: B4 takes into account Re value at z' ~ O(10) z0
-        B = B4_rough * (pow(Re, B2_rough));
-
-    //   --- apply max restriction based on surface type
-    if (surface_type == 0) 
-        B = min(B, B_max_ocean);
-    else if (surface_type == 2) 
-        B = min(B, B_max_lake);
-    else if (surface_type == 1)
-        B = min(B, B_max_land);
-
-    // --- define roughness [thermal]
-    z0_t = z0_m / exp(B);
-}
-
-template __device__ void get_thermal_roughness(float &z0_t, float &B,
-    const float z0_m, const float Re, 
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough, 
-    const float B_max_ocean, const float B_max_lake, const float B_max_land,
-    const int surface_type);
-template __device__ void get_thermal_roughness(double &z0_t, double &B,
-    const double z0_m, const double Re, 
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough, 
-    const double B_max_ocean, const double B_max_lake, const double B_max_land,
-    const int surface_type);
-
-template<typename T>
-__device__ void get_psi_mh(T &psi_m, T &psi_h,
-    const T zeta_m, const T zeta_h,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta_m >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-    }                                
-    else
-    {    x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-    }
-
-    if (zeta_h >= 0.0)
-    {    
-        q_h = sqrt(c_h * c_h - 4.0);
-        x_h = zeta_h;
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template __device__ void get_psi_mh(float &psi_m, float &psi_h,
-    const float zeta_m, const float zeta_h,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_psi_mh(double &psi_m, double &psi_h,
-    const double zeta_m, const double zeta_h,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__device__ void get_psi(T &psi_m, T &psi_h,
-    const T zeta,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        q_h = sqrt(c_h * c_h - 4.0);
-
-        x_m = pow(1.0 + zeta, 1.0 / 3.0);
-        x_h = zeta;
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_m = pow(1.0 - alpha_m * zeta, 0.25);
-        x_h = pow(1.0 - alpha_h * zeta, 0.25); 
-
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template __device__ void get_psi(float &psi_m, float &psi_h,
-    const float zeta,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_psi(double &psi_m, double &psi_h,
-    const double zeta,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
-    const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const int maxiters)
-{
-    T psi_m, psi_h, psi0_m, psi0_h, Linv;
-    const T gamma = 0.61;
-
-    Udyn = kappa * U / log(z / z0_m);
-    Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
-    Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
-    zeta = 0.0;
-
-    // --- no wind
-    if (Udyn < 1e-5) 
-        return;
-
-    Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-    zeta = z * Linv;
-
-    // --- near neutral case
-    if (Linv < 1e-5) 
-        return;
-
-    for (int i = 0; i < maxiters; i++)
-    {
-        get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-        
-        get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, 
-        alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-
-        Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
-        Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-        Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-
-        if (Udyn < 1e-5) 
-            break;
-
-        Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-        zeta = z * Linv;
-    }
-}
-
-template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
-    const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const int maxiters);
-template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
-    const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const int maxiters);
-
-template<typename T>
-__device__ void get_phi(T &phi_m, T &phi_h,
-    const T zeta, 
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    if (zeta >= 0.0) 
-    {
-        phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
-        phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
-    }
-    else
-    {
-        phi_m = pow(1.0 - alpha_m * zeta, -0.25);
-        phi_h = pow(1.0 - alpha_h * zeta, -0.5);
-    }
-}
-
-template __device__ void get_phi(float &phi_m, float &phi_h,
-    const float zeta, 
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_phi(double &phi_m, double &phi_h,
-    const double zeta, 
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size)
-{
-    const int index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    T h, U, dT, Tsemi, dQ, z0_m;
-    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
-    zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
-    Km, Pr_t_inv, Cm, Ct;
-
-    const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
-    const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
-
-    int surface_type;
-
-    if(index < grid_size)
-    {
-        U = U_[index];
-        Tsemi = Tsemi_[index];
-        dT = dT_[index];
-        dQ = dQ_[index];
-        h = h_[index];
-        z0_m = in_z0_m_[index];
-
-        if (z0_m < 0.0) surface_type = 0;
-        else            surface_type = 1;
-
-        if (surface_type == 0) 
-        {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
-            h0_m = h / z0_m;
-        }
-        if (surface_type == 1) 
-        {
-            h0_m = h / z0_m;
-            u_dyn0 = U * kappa / log(h0_m);
-        }
-
-        Re = u_dyn0 * z0_m / nu_air;
-        get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
-
-        // --- define relative height [thermal]
-        h0_t = h / z0_t;
-
-        // --- define Ri-bulk
-        Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
-
-        // --- get the fluxes
-        // ----------------------------------------------------------------------------
-        get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
-        // ----------------------------------------------------------------------------
-
-        get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
-        // ----------------------------------------------------------------------------
-
-        // --- define transfer coeff. (momentum) & (heat)
-        Cm = 0.0;
-        if (U > 0.0)
-            Cm = Udyn / U;
-        Ct = 0.0;
-        if (fabs(dT) > 0.0) 
-            Ct = Tdyn / dT;
-
-        // --- define eddy viscosity & inverse Prandtl number
-        Km = kappa * Cm * U * h / phi_m;
-        Pr_t_inv = phi_m / phi_h;
-
-        zeta_[index]         = zeta;
-        Rib_[index]          = Rib;
-        Re_[index]           = Re;
-        B_[index]            = B;
-        z0_m_[index]         = z0_m;
-        z0_t_[index]         = z0_t;
-        Rib_conv_lim_[index] = 0.0;
-        Cm_[index]           = Cm;
-        Ct_[index]           = Ct;
-        Km_[index]           = Km;
-        Pr_t_inv_[index]     = Pr_t_inv;
-    }
-}
-
-template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
-    const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h, 
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough,
-    const float B_max_land, const float B_max_ocean, const float B_max_lake,
-    const float gamma_c, const float Re_visc_min,
-    const float Pr_m, const float nu_air, const float g, 
-    const int maxiters_charnock,
-    const int grid_size);
-template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
-    const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h, 
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough,
-    const double B_max_land, const double B_max_ocean, const double B_max_lake,
-    const double gamma_c, const double Re_visc_min,
-    const double Pr_m, const double nu_air, const double g, 
-    const int maxiters_charnock,
-    const int grid_size);
-
-template<typename T>
-void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size)
-{
-    const int BlockCount = int(ceil(float(grid_size) / 512.0));
-    dim3 cuBlock = dim3(512, 1, 1);
-	dim3 cuGrid = dim3(BlockCount, 1, 1);
-
-    kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
-    U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
-    kappa, Pr_t_0_inv, 
-    alpha_m, alpha_h, 
-    a_m, a_h, 
-    b_m, b_h,
-    c_h,
-    Re_rough_min, 
-    B1_rough, B2_rough,
-    B_max_land, B_max_ocean, B_max_lake,
-    gamma_c, Re_visc_min,
-    Pr_m, nu_air, g, 
-    maxiters_charnock, 
-    grid_size);
-    gpuErrchk( cudaPeekAtLastError() );
-}
-
-template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
-    const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h, 
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough,
-    const float B_max_land, const float B_max_ocean, const float B_max_lake,
-    const float gamma_c, const float Re_visc_min,
-    const float Pr_m, const float nu_air, const float g, 
-    const int maxiters_charnock,
-    const int grid_size);
-template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
-    const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h, 
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough,
-    const double B_max_land, const double B_max_ocean, const double B_max_lake,
-    const double gamma_c, const double Re_visc_min,
-    const double Pr_m, const double nu_air, const double g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/srcCU/sfx_esm.cu b/srcCU/sfx_esm.cu
index 49595c249c59ea23e40151477c95c772edacf37c..424383ece427624670c4377f72901db6d195c1b2 100644
--- a/srcCU/sfx_esm.cu
+++ b/srcCU/sfx_esm.cu
@@ -46,7 +46,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
 
         if (surface_type == surface_param.surface_ocean)
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
diff --git a/srcCU/sfx_sheba.cu b/srcCU/sfx_sheba.cu
index c5c33f806eda335662b1c3d1e399d15bbe7ffd53..c44a4bc145edcc2acfbe4ae59261e30524d77cd6 100644
--- a/srcCU/sfx_sheba.cu
+++ b/srcCU/sfx_sheba.cu
@@ -47,7 +47,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
 
         if (surface_type == surface_param.surface_ocean) 
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
diff --git a/srcCXX/sfx_esm.cpp b/srcCXX/sfx_esm.cpp
index 18e1c7fb2635000ff1f912d8457fb5afbb1c7178..2d8979c29787cd468711217753da41b2c2f85eba 100644
--- a/srcCXX/sfx_esm.cpp
+++ b/srcCXX/sfx_esm.cpp
@@ -49,7 +49,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux()
 
         if (surface_type == surface_param.surface_ocean)
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
diff --git a/srcCXX/sfx_sheba.cpp b/srcCXX/sfx_sheba.cpp
index 8417854ff63b0a35d7d2ff7cde63325941fb497e..5063ebbc10431834973a9d6fddfefda032400240 100644
--- a/srcCXX/sfx_sheba.cpp
+++ b/srcCXX/sfx_sheba.cpp
@@ -50,7 +50,7 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
 
         if (surface_type == surface_param.surface_ocean) 
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, surface_param, numerics.maxiters_charnock);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
diff --git a/srcF/sfx_data.f90 b/srcF/sfx_data.f90
index a3c8a2215aaa489a8189bd00bb3ce9896a1a43cb..1cdc8ebfa22ed16266362de6c48577070900fad3 100644
--- a/srcF/sfx_data.f90
+++ b/srcF/sfx_data.f90
@@ -41,12 +41,12 @@ module sfx_data
     !> &details using arrays as input
     type, public :: meteoDataVecType
 #if defined(INCLUDE_CXX)
-        real, pointer :: h(:)       !< constant flux layer height [m]
-        real, pointer :: U(:)       !< abs(wind speed) at 'h' [m/s]
-        real, pointer :: dT(:)      !< difference between potential temperature at 'h' and at surface [K]
-        real, pointer :: Tsemi(:)   !< semi-sum of potential temperature at 'h' and at surface [K]
-        real, pointer :: dQ(:)      !< difference between humidity at 'h' and at surface [g/g]
-        real, pointer :: z0_m(:)    !< surface aerodynamic roughness (should be < 0 for water bodies surface)
+        real, pointer, contiguous :: h(:)       !< constant flux layer height [m]
+        real, pointer, contiguous :: U(:)       !< abs(wind speed) at 'h' [m/s]
+        real, pointer, contiguous :: dT(:)      !< difference between potential temperature at 'h' and at surface [K]
+        real, pointer, contiguous :: Tsemi(:)   !< semi-sum of potential temperature at 'h' and at surface [K]
+        real, pointer, contiguous :: dQ(:)      !< difference between humidity at 'h' and at surface [g/g]
+        real, pointer, contiguous :: z0_m(:)    !< surface aerodynamic roughness (should be < 0 for water bodies surface)
 #else
         real, allocatable :: h(:)       !< constant flux layer height [m]
         real, allocatable :: U(:)       !< abs(wind speed) at 'h' [m/s]
@@ -91,17 +91,17 @@ module sfx_data
     !> &details using arrays as output
     type, public :: sfxDataVecType
 #if defined(INCLUDE_CXX)
-        real, pointer :: zeta(:)            !< = z/L [n/d]
-        real, pointer :: Rib(:)             !< bulk Richardson number [n/d]
-        real, pointer :: Re(:)              !< Reynolds number [n/d]
-        real, pointer :: B(:)               !< = log(z0_m / z0_h) [n/d]
-        real, pointer :: z0_m(:)            !< aerodynamic roughness [m]
-        real, pointer :: z0_t(:)            !< thermal roughness [m]
-        real, pointer :: Rib_conv_lim(:)    !< Ri-bulk convection critical value [n/d]
-        real, pointer :: Cm(:)              !< transfer coefficient for momentum [n/d]
-        real, pointer :: Ct(:)              !< transfer coefficient for heat [n/d]
-        real, pointer :: Km(:)              !< eddy viscosity coeff. at h [m^2/s]
-        real, pointer :: Pr_t_inv(:)        !< inverse turbulent Prandtl number at h [n/d]
+        real, pointer, contiguous :: zeta(:)            !< = z/L [n/d]
+        real, pointer, contiguous :: Rib(:)             !< bulk Richardson number [n/d]
+        real, pointer, contiguous :: Re(:)              !< Reynolds number [n/d]
+        real, pointer, contiguous :: B(:)               !< = log(z0_m / z0_h) [n/d]
+        real, pointer, contiguous :: z0_m(:)            !< aerodynamic roughness [m]
+        real, pointer, contiguous :: z0_t(:)            !< thermal roughness [m]
+        real, pointer, contiguous :: Rib_conv_lim(:)    !< Ri-bulk convection critical value [n/d]
+        real, pointer, contiguous :: Cm(:)              !< transfer coefficient for momentum [n/d]
+        real, pointer, contiguous :: Ct(:)              !< transfer coefficient for heat [n/d]
+        real, pointer, contiguous :: Km(:)              !< eddy viscosity coeff. at h [m^2/s]
+        real, pointer, contiguous :: Pr_t_inv(:)        !< inverse turbulent Prandtl number at h [n/d]
 #else
         real, allocatable :: zeta(:)            !< = z/L [n/d]
         real, allocatable :: Rib(:)             !< bulk Richardson number [n/d]