diff --git a/includeC/sfx-data.h b/includeC/sfx-data.h
index bd14366599263bd14dea299dd94dbf22f7e7ddbb..86475a1f0d20ff3415d2b67521051c985c8c1d19 100644
--- a/includeC/sfx-data.h
+++ b/includeC/sfx-data.h
@@ -124,6 +124,32 @@ extern "C" {
     {
         int maxiters_charnock;
     };
+
+    struct sfx_sheba_noit_param_C
+    {
+        float kappa;
+        float Pr_t_0_inv;
+        float Pr_t_inf_inv;
+
+        float alpha_m;
+        float alpha_h;
+        float alpha_h_fix;
+        float Rib_max;
+        float gamma;
+        float zeta_a;
+
+        float a_m;
+        float b_m;
+        float a_h;
+        float b_h;
+        float c_h;
+    };
+
+    struct sfx_sheba_noit_numericsType_C
+    {
+        int maxiters_charnock;
+        int maxiters_convection;
+    };
     
 #ifdef __cplusplus
 }
diff --git a/includeCU/sfx-model-compute-subfunc.cuh b/includeCU/sfx-model-compute-subfunc.cuh
index 9bf794ec00539fe61087934a21d384d5b18af151..748bafd91ef4e84c5272a730ee97978465b51bb6 100644
--- a/includeCU/sfx-model-compute-subfunc.cuh
+++ b/includeCU/sfx-model-compute-subfunc.cuh
@@ -2,10 +2,72 @@
 #include "sfx-data.h"
 #include "sfx-math.cuh"
 
-template<typename T, class sfx_param>
+template<typename T, class model>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T& psi_m, T& psi_h,
+    const T zeta_m, const T zeta_h,
+    const model& param)
+{
+    T x_m, x_h, q_m, q_h;
+
+    q_m = powf((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
+    x_m = powf(1.0 + zeta_m, 1.0 / 3.0);
+
+    psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * 
+            (2.0 * logf((x_m + q_m) / (1.0 + q_m)) - 
+            logf((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 
+            2.0 * sqrtf(3.0) * (atanf((2.0 * x_m - q_m) / (sqrtf(3.0) * q_m)) - 
+            atanf((2.0 - q_m) / (sqrtf(3.0) * q_m))));
+
+    q_h = sqrtf(param.c_h * param.c_h - 4.0);
+    x_h = zeta_h;
+
+    psi_h = -0.5 * param.b_h * logf(1.0 + param.c_h * x_h + x_h * x_h) + 
+            ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * 
+            (logf((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - 
+            logf((param.c_h - q_h) / (param.c_h + q_h)));
+}
+
+template<typename T, class model>
+FUCNTION_DECLARATION_SPECIFIER void get_zeta(T& zeta,
+    const T Rib, const T h, const T z0_m, const T z0_t,
+    const model& param)
+{
+    T Ribl, C1, A1, A2, lne, lnet;
+    T psi_m, psi_h, psi0_m, psi0_h;
+
+    Ribl = (Rib * param.Pr_t_0_inv) * (1 - z0_t / h) / ((1 - z0_m / h)*(1 - z0_m / h));
+
+    get_psi_stable(psi_m, psi_h, param.zeta_a, param.zeta_a, param);
+    get_psi_stable(psi0_m, psi0_h, param.zeta_a * z0_m / h,  param.zeta_a * z0_t / h, param);
+
+    lne = logf(h/z0_m);
+    lnet = logf(h/z0_t);
+    C1 = (lne*lne)/lnet;
+    A1 = powf(lne - psi_m + psi0_m, 2 * (param.gamma-1)) / ( powf(param.zeta_a, param.gamma-1) 
+        * powf(lnet - (psi_h - psi0_h) * param.Pr_t_0_inv, param.gamma-1));
+    A2 = powf(lne - psi_m + psi0_m, 2) / (lnet - (psi_h - psi0_h) * param.Pr_t_0_inv) - C1;
+
+    zeta = C1 * Ribl + A1 * A2 * powf(Ribl, param.gamma);
+}
+
+template<typename T, class model>
+FUCNTION_DECLARATION_SPECIFIER T f_m_conv(const T zeta,
+    const model& param)
+{
+    return powf(1.0 - param.alpha_m * zeta, 0.25);
+}
+
+template<typename T, class model>
+FUCNTION_DECLARATION_SPECIFIER T f_h_conv(const T zeta,
+    const model& param)
+{
+    return sqrtf(1.0 - param.alpha_h * zeta);
+}
+
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
     const T h0_m, const T h0_t, const T B,
-    const sfx_param& param)
+    const model& param)
 {
     T psi_m, psi_h, f_m, f_h, c;
 
@@ -13,9 +75,9 @@ FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim,
     zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - 
                 sqrtf( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
 
-    f_m_lim = powf(1.0 - param.alpha_m * zeta_lim, 0.25);
-    f_h_lim = sqrtf(1.0 - param.alpha_h * zeta_lim);
-
+    f_m_lim = f_m_conv(zeta_lim, param);
+    f_h_lim = f_h_conv(zeta_lim, param);
+    
     f_m = zeta_lim / h0_m;
     f_h = zeta_lim / h0_t;
     if (fabsf(B) < 1.0e-10) f_h = f_m;
@@ -29,10 +91,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim,
     Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
     const T Rib, const T h0_m, const T h0_t, const T B,
-    const sfx_param& param)
+    const model& param)
 {
     T Rib_coeff, psi0_m, psi0_h, phi, c;
     
@@ -49,11 +111,11 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
     psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, 
     const T Rib, const T h0_m, const T h0_t, const T B, 
     const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
-    const sfx_param& param,
+    const model& param,
     const int maxiters_convection)
 {
     T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
@@ -89,10 +151,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &ze
     }
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,   
     const T h0_m, const T h0_t, const T B,
-    const sfx_param& param)
+    const model& param)
 {
     zeta = 0.0;
     psi_m = logf(h0_m);
@@ -101,10 +163,21 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,
         psi_h = psi_m / param.Pr_t_0_inv;
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h,
+    const T h0_m, const T h0_t, const T B,
+    const model& param)
+{
+    psi_m = logf(h0_m);
+    psi_h = logf(h0_t) / param.Pr_t_0_inv;
+    // *: this looks redundant z0_t = z0_m in case |B| ~ 0
+    if (fabsf(B) < 1.0e-10) psi_h = psi_m / param.Pr_t_0_inv;
+}
+
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
     const T Rib, const T h0_m, const T h0_t, const T B, 
-    const sfx_param& param,
+    const model& param,
     const int maxiters_convection)
 {
     T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
@@ -143,10 +216,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h,
     }
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
     const T zeta,
-    const sfx_param& param)
+    const model& param)
 {
     T x_m, x_h;
     T q_m, q_h;
@@ -173,10 +246,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
     }
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
     const T zeta_m, const T zeta_h,
-    const sfx_param& param)
+    const model& param)
 {
     T x_m, x_h;
     T q_m, q_h;
@@ -209,12 +282,12 @@ FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
 }
 
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T& zeta,
     const T U, const T Tsemi, const T dT, const T dQ, 
     const T z, const T z0_m, const T z0_t,
     const T beta,
-    const sfx_param& param,
+    const model& param,
     const int maxiters)
 {
     const T gamma = T(0.61);
@@ -254,10 +327,10 @@ FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn
     }
 }
 
-template<typename T, class sfx_param>
+template<typename T, class model>
 FUCNTION_DECLARATION_SPECIFIER void get_phi(T &phi_m, T &phi_h,
     const T zeta, 
-    const sfx_param& param)
+    const model& param)
 {
     if (zeta >= 0.0) 
     {
diff --git a/includeCXX/cxx-sfx-model-compute-flux.h b/includeCXX/cxx-sfx-model-compute-flux.h
index 44adc935d41df0edcf4e415876e3a029b458df04..0e1bfe1709c730fac50c79827db1927e618b348b 100644
--- a/includeCXX/cxx-sfx-model-compute-flux.h
+++ b/includeCXX/cxx-sfx-model-compute-flux.h
@@ -20,6 +20,14 @@ extern "C" {
                            const struct sfx_sheba_numericsType_C* numerics,
                            const struct sfx_phys_constants* constants,
                            const int grid_size);
+
+    void sheba_noit_compute_flux(struct sfxDataVecTypeC* sfx,
+                           struct meteoDataVecTypeC* meteo,
+                           const struct sfx_sheba_noit_param_C* model_param, 
+                           const struct sfx_surface_param* surface_param,
+                           const struct sfx_sheba_noit_numericsType_C* numerics,
+                           const struct sfx_phys_constants* constants,
+                           const int grid_size);
 #ifdef __cplusplus
 }
 #endif
\ No newline at end of file
diff --git a/includeCXX/sfx-sheba.h b/includeCXX/sfx-sheba.h
index a6623703a4d292165f6ab73a67b19a4dc30444a2..0143d799c9eb9fe0644d3e81eaa73b6c5a19431c 100644
--- a/includeCXX/sfx-sheba.h
+++ b/includeCXX/sfx-sheba.h
@@ -4,85 +4,55 @@
 #include "sfx-data.h"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-class FluxShebaBase : public ModelBase<T, memIn, memOut, RunMem>
-{
-public:
-    using ModelBase<T, memIn, memOut, RunMem>::res_sfx;
-    using ModelBase<T, memIn, memOut, RunMem>::sfx;
-    using ModelBase<T, memIn, memOut, RunMem>::meteo;
-    using ModelBase<T, memIn, memOut, RunMem>::grid_size;
-    using ModelBase<T, memIn, memOut, RunMem>::ifAllocated;
-    using ModelBase<T, memIn, memOut, RunMem>::allocated_size;
-
-    sfx_surface_param surface;
-    sfx_phys_constants phys;
-    sfx_sheba_param_C model;
-    sfx_sheba_numericsType_C numerics;
-
-    FluxShebaBase(sfxDataVecTypeC* sfx,
-                meteoDataVecTypeC* meteo,
-                const sfx_sheba_param_C model, 
-                const sfx_surface_param surface,
-                const sfx_sheba_numericsType_C numerics,
-                const sfx_phys_constants phys,
-                const int grid_size);
-    ~FluxShebaBase();
-};
-
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-class FluxSheba : public FluxShebaBase<T, memIn, memOut, RunMem>
+class FluxSheba : public ModelBase<T, memIn, memOut, RunMem>
 {};
 
 template<typename T, MemType memIn, MemType memOut >
-class FluxSheba<T, memIn, memOut, MemType::CPU> : public FluxShebaBase<T, memIn, memOut, MemType::CPU>
+class FluxSheba<T, memIn, memOut, MemType::CPU> : public ModelBase<T, memIn, memOut, MemType::CPU>
 {
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::res_sfx;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::sfx;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::meteo;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::surface;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::phys;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::grid_size;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::allocated_size;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::model;
-    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::numerics;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::res_sfx;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::sfx;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::meteo;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::grid_size;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
+    using ModelBase<T, memIn, memOut, MemType::CPU>::allocated_size;
 public:
     FluxSheba(sfxDataVecTypeC* sfx,
                 meteoDataVecTypeC* meteo,
-                const sfx_sheba_param_C model, 
+                const int grid_size) : ModelBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, grid_size) {}
+    ~FluxSheba() = default;
+    void compute_flux(const sfx_sheba_param_C model, 
                 const sfx_surface_param surface,
                 const sfx_sheba_numericsType_C numerics,
-                const sfx_phys_constants phys,
-                const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, model, 
-                                       surface, numerics, phys, grid_size) {}
-    ~FluxSheba() = default;
-    void compute_flux();
+                const sfx_phys_constants phys);
+    void noit_compute_flux(const sfx_sheba_noit_param_C model, 
+                const sfx_surface_param surface,
+                const sfx_sheba_noit_numericsType_C numerics,
+                const sfx_phys_constants phys);
 };
 
 #ifdef INCLUDE_CUDA
 template<typename T, MemType memIn, MemType memOut >
-class FluxSheba<T, memIn, memOut, MemType::GPU> : public FluxShebaBase<T, memIn, memOut, MemType::GPU>
+class FluxSheba<T, memIn, memOut, MemType::GPU> : public ModelBase<T, memIn, memOut, MemType::GPU>
 {
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::res_sfx;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::sfx;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::meteo;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::surface;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::phys;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::grid_size;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::allocated_size;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::model;
-    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::numerics;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::res_sfx;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::sfx;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::meteo;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::grid_size;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
+    using ModelBase<T, memIn, memOut, MemType::GPU>::allocated_size;
 public:
     FluxSheba(sfxDataVecTypeC* sfx,
                 meteoDataVecTypeC* meteo,
-                const sfx_sheba_param_C model, 
+                const int grid_size) : ModelBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, grid_size) {}
+    ~FluxSheba() = default;
+    void compute_flux(const sfx_sheba_param_C model, 
                 const sfx_surface_param surface,
                 const sfx_sheba_numericsType_C numerics,
-                const sfx_phys_constants phys,
-                const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, model, 
-                                       surface, numerics, phys, grid_size) {}
-    ~FluxSheba() = default;
-    void compute_flux();
+                const sfx_phys_constants phys);
+    void noit_compute_flux(const sfx_sheba_noit_param_C model, 
+                const sfx_surface_param surface,
+                const sfx_sheba_noit_numericsType_C numerics,
+                const sfx_phys_constants phys);
 };
 #endif
\ No newline at end of file
diff --git a/srcC/c-sfx-model-compute-flux.c b/srcC/c-sfx-model-compute-flux.c
index 60a2a18032713338167d7ba850dcaff3efd52272..5726aedbb008a3bfd1d46ab40bf80a6acccf89b2 100644
--- a/srcC/c-sfx-model-compute-flux.c
+++ b/srcC/c-sfx-model-compute-flux.c
@@ -23,4 +23,15 @@ void c_sheba_compute_flux (struct sfxDataVecTypeC* sfx,
                                  const int *grid_size)
 {
   sheba_compute_flux(sfx, meteo, model_param, surface_param, numerics, constants, *grid_size); 
+}
+
+void c_sheba_noit_compute_flux (struct sfxDataVecTypeC* sfx,
+                                 struct meteoDataVecTypeC* meteo,
+                                 const struct sfx_sheba_noit_param_C* model_param, 
+                                 const struct sfx_surface_param* surface_param,
+                                 const struct sfx_sheba_noit_numericsType_C* numerics,
+                                 const struct sfx_phys_constants* constants,
+                                 const int *grid_size)
+{
+  sheba_noit_compute_flux(sfx, meteo, model_param, surface_param, numerics, constants, *grid_size); 
 }
\ No newline at end of file
diff --git a/srcCU/sfx-sheba.cu b/srcCU/sfx-sheba.cu
index 3fff68a238eec7025427dc06adbb9402e216f7e5..de9a828c06801e5c0d3bf8d51f2e5aaf6191cf95 100644
--- a/srcCU/sfx-sheba.cu
+++ b/srcCU/sfx-sheba.cu
@@ -16,6 +16,15 @@ namespace sfx_kernel
         const sfx_sheba_numericsType_C numerics,
         const sfx_phys_constants phys,
         const int grid_size);
+
+    template<typename T>
+    __global__ void noit_compute_flux(sfxDataVecTypeC sfx,
+        meteoDataVecTypeC meteo,
+        const sfx_sheba_noit_param_C model, 
+        const sfx_surface_param surface,
+        const sfx_sheba_noit_numericsType_C numerics,
+        const sfx_phys_constants phys,
+        const int grid_size);
 }
 
 template<typename T>
@@ -100,8 +109,152 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
     }
 }
 
+template<typename T>
+__global__ void sfx_kernel::noit_compute_flux(sfxDataVecTypeC sfx,
+    meteoDataVecTypeC meteo,
+    const sfx_sheba_noit_param_C model, 
+    const sfx_surface_param surface,
+    const sfx_sheba_noit_numericsType_C numerics,
+    const sfx_phys_constants phys,
+    const int grid_size)
+{
+    const int index = blockIdx.x * blockDim.x + threadIdx.x;
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
+    zeta, Rib, zeta_conv_lim, Rib_conv_lim, 
+    f_m_conv_lim, f_h_conv_lim,
+    psi_m, psi_h, 
+    psi0_m, psi0_h,
+    Udyn, Tdyn, Qdyn, 
+    phi_m, phi_h,
+    Km, Pr_t_inv, Cm, Ct,
+    fval;
+    int surface_type;
+
+    if(index < grid_size)
+    {
+        U = meteo.U[index];
+        Tsemi = meteo.Tsemi[index];
+        dT = meteo.dT[index];
+        dQ = meteo.dQ[index];
+        h = meteo.h[index];
+        z0_m = meteo.z0_m[index];
+
+        surface_type = z0_m < 0.0 ? surface.surface_ocean : surface.surface_land;
+
+        if (surface_type == surface.surface_ocean) 
+        {
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface, numerics.maxiters_charnock);
+            h0_m = h / z0_m;
+        }
+        if (surface_type == surface.surface_land) 
+        {
+            h0_m = h / z0_m;
+            u_dyn0 = U * model.kappa / logf(h0_m);
+        }
+
+        Re = u_dyn0 * z0_m / phys.nu_air;
+        get_thermal_roughness(z0_t, B, z0_m, Re, surface, surface_type);
+
+        // --- define relative height [thermal]
+        h0_t = h / z0_t;
+
+        // --- define Ri-bulk
+        Rib = (phys.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        get_convection_lim(zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model);
+
+        // --- get the fluxes
+        // ----------------------------------------------------------------------------
+        if (Rib > 0.0)
+        {
+            // --- stable stratification block
+
+            // --- restrict bulk Ri value
+            // *: note that this value is written to output
+            // Rib = min(Rib, Rib_max)
+            get_zeta(zeta, Rib, h, z0_m, z0_t, model);
+            get_psi_stable(psi_m, psi_h, zeta, zeta, model);
+            get_psi_stable(psi0_m, psi0_h, zeta * z0_m / h, zeta * z0_t / h, model);
+
+            phi_m = 1.0 + (model.a_m * zeta * powf(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + model.b_m * zeta);
+            phi_h = 1.0 + (model.a_h * zeta + model.b_h * zeta * zeta) / (1.0 + model.c_h * zeta + zeta * zeta);
+
+            Udyn = model.kappa * U / (logf(h / z0_m) - (psi_m - psi0_m));
+            Tdyn = model.kappa * dT * model.Pr_t_0_inv / (logf(h / z0_t) - (psi_h - psi0_h));
+        }
+        else if (Rib < Rib_conv_lim) 
+        {    
+            // --- strong instability block
+
+            get_psi_convection(psi_m, psi_h, zeta, Rib,
+                zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model, numerics.maxiters_convection);
+
+            fval = powf(zeta_conv_lim / zeta, 1.0/3.0);
+            phi_m = fval / f_m_conv_lim;
+            phi_h = fval / (model.Pr_t_0_inv * f_h_conv_lim);
+        }
+        else if (Rib > -0.001)
+        { 
+            // --- nearly neutral [-0.001, 0] block
+
+            get_psi_neutral(psi_m, psi_h, h0_m, h0_t, B, model);
+
+            zeta = 0.0;
+            phi_m = 1.0;
+            phi_h = 1.0 / model.Pr_t_0_inv;
+        }
+        else
+        {
+            // --- weak & semistrong instability block
+
+            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model, numerics.maxiters_convection);
+
+            phi_m = powf(1.0 - model.alpha_m * zeta, -0.25);
+            phi_h = 1.0 / (model.Pr_t_0_inv * sqrtf(1.0 - model.alpha_h_fix * zeta));
+        } 
+        // ----------------------------------------------------------------------------
+
+        // --- define transfer coeff. (momentum) & (heat)
+        if(Rib > 0)
+        {    
+            Cm = 0.0;
+            if (U > 0.0)
+                Cm = Udyn / U;
+        
+            Ct = 0.0;
+            if (fabs(dT) > 0.0)
+                Ct = Tdyn / dT;
+        }
+        else
+        { 
+            Cm = model.kappa / psi_m;
+            Ct = model.kappa / psi_h;
+        }
+
+        // --- define eddy viscosity & inverse Prandtl number
+        Km = model.kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        sfx.zeta[index]         = zeta;
+        sfx.Rib[index]          = Rib;
+        sfx.Re[index]           = Re;
+        sfx.B[index]            = B;
+        sfx.z0_m[index]         = z0_m;
+        sfx.z0_t[index]         = z0_t;
+        sfx.Rib_conv_lim[index] = Rib_conv_lim;
+        sfx.Cm[index]           = Cm;
+        sfx.Ct[index]           = Ct;
+        sfx.Km[index]           = Km;
+        sfx.Pr_t_inv[index]     = Pr_t_inv;
+    }
+}
+
 template<typename T, MemType memIn, MemType memOut >
-void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
+void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux(const sfx_sheba_param_C model, 
+    const sfx_surface_param surface,
+    const sfx_sheba_numericsType_C numerics,
+    const sfx_phys_constants phys)
 {
     const int BlockCount = int(ceil(float(grid_size) / 1024.0));
     dim3 cuBlock = dim3(1024, 1, 1);
@@ -127,13 +280,35 @@ void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
     }
 }
 
-template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
-template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
-template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
-template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
-template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
-template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
-template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
+template<typename T, MemType memIn, MemType memOut >
+void FluxSheba<T, memIn, memOut, MemType::GPU>::noit_compute_flux(const sfx_sheba_noit_param_C model, 
+    const sfx_surface_param surface,
+    const sfx_sheba_noit_numericsType_C numerics,
+    const sfx_phys_constants phys)
+{
+    const int BlockCount = int(ceil(float(grid_size) / 1024.0));
+    dim3 cuBlock = dim3(1024, 1, 1);
+	dim3 cuGrid = dim3(BlockCount, 1, 1);
+
+    sfx_kernel::noit_compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model, 
+                                                    surface, numerics, phys, grid_size);
+
+    if(MemType::GPU != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
+    }
+}
 
 template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
 template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
diff --git a/srcCXX/cxx-sfx-model-compute-flux.cpp b/srcCXX/cxx-sfx-model-compute-flux.cpp
index 493f4ca0c77bdf2ada9d64df84652f36a61733ca..40873cd31661d001f858b14603d8387810523231 100644
--- a/srcCXX/cxx-sfx-model-compute-flux.cpp
+++ b/srcCXX/cxx-sfx-model-compute-flux.cpp
@@ -32,10 +32,27 @@ void sheba_compute_flux (sfxDataVecTypeC* sfx,
                            const int grid_size)
 {
 #ifdef INCLUDE_CUDA
-    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, *model_param, *surface_param, *numerics, *constants, grid_size);
-    F.compute_flux();
+    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, grid_size);
+    F.compute_flux(*model_param, *surface_param, *numerics, *constants);
 #else
-    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, *model_param, *surface_param, *numerics, *constants, grid_size);
-    F.compute_flux();
+    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, grid_size);
+    F.compute_flux(*model_param, *surface_param, *numerics, *constants);
+#endif
+}
+
+void sheba_noit_compute_flux (sfxDataVecTypeC* sfx,
+                           meteoDataVecTypeC* meteo,
+                           const sfx_sheba_noit_param_C* model_param, 
+                           const sfx_surface_param* surface_param,
+                           const sfx_sheba_noit_numericsType_C* numerics,
+                           const sfx_phys_constants* constants,
+                           const int grid_size)
+{
+#ifdef INCLUDE_CUDA
+    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU> F(sfx, meteo, grid_size);
+    F.noit_compute_flux(*model_param, *surface_param, *numerics, *constants);
+#else
+    static FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU> F(sfx, meteo, grid_size);
+    F.noit_compute_flux(*model_param, *surface_param, *numerics, *constants);
 #endif
 }
\ No newline at end of file
diff --git a/srcCXX/sfx-sheba.cpp b/srcCXX/sfx-sheba.cpp
index bf389dc2066e816d1cf51c8059f7bf8eb6ad12ad..6d905f9ee71b9cef6ab5f077e0e4846139cd50f9 100644
--- a/srcCXX/sfx-sheba.cpp
+++ b/srcCXX/sfx-sheba.cpp
@@ -10,26 +10,11 @@
 #include "sfx-surface.cuh"
 #include "sfx-model-compute-subfunc.cuh"
 
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-FluxShebaBase<T, memIn, memOut, RunMem>::FluxShebaBase(sfxDataVecTypeC* sfx_in,
-                meteoDataVecTypeC* meteo_in,
-                const sfx_sheba_param_C model_param_in, 
-                const sfx_surface_param surface_param_in,
-                const sfx_sheba_numericsType_C numerics_in,
-                const sfx_phys_constants phys_constants_in,
-                const int grid_size_in) : ModelBase<T, memIn, memOut, RunMem>(sfx_in, meteo_in, grid_size_in)
-{
-    surface = surface_param_in;
-    phys = phys_constants_in;
-    model = model_param_in;
-    numerics = numerics_in;
-}
-
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-FluxShebaBase<T, memIn, memOut, RunMem>::~FluxShebaBase() {}
-
 template<typename T, MemType memIn, MemType memOut >
-void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
+void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux(const sfx_sheba_param_C model, 
+                const sfx_surface_param surface,
+                const sfx_sheba_numericsType_C numerics,
+                const sfx_phys_constants phys)
 {
     T h, U, dT, Tsemi, dQ, z0_m;
     T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
@@ -101,6 +86,158 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
         sfx.Km[step]           = Km;
         sfx.Pr_t_inv[step]     = Pr_t_inv;
     }
+    if(MemType::CPU != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
+    }
+}
+
+template<typename T, MemType memIn, MemType memOut >
+void FluxSheba<T, memIn, memOut, MemType::CPU>::noit_compute_flux(const sfx_sheba_noit_param_C model, 
+                const sfx_surface_param surface,
+                const sfx_sheba_noit_numericsType_C numerics,
+                const sfx_phys_constants phys)
+{
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
+    zeta, Rib, zeta_conv_lim, Rib_conv_lim, 
+    f_m_conv_lim, f_h_conv_lim,
+    psi_m, psi_h, 
+    psi0_m, psi0_h,
+    Udyn, Tdyn, Qdyn, 
+    phi_m, phi_h,
+    Km, Pr_t_inv, Cm, Ct,
+    fval;
+    int surface_type;
+
+    for (int step = 0; step < grid_size; step++)
+    {
+        U = meteo.U[step];
+        Tsemi = meteo.Tsemi[step];
+        dT = meteo.dT[step];
+        dQ = meteo.dQ[step];
+        h = meteo.h[step];
+        z0_m = meteo.z0_m[step];
+
+        surface_type = z0_m < 0.0 ? surface.surface_ocean : surface.surface_land;
+
+        if (surface_type == surface.surface_ocean) 
+        {
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface, numerics.maxiters_charnock);
+            h0_m = h / z0_m;
+        }
+        if (surface_type == surface.surface_land) 
+        {
+            h0_m = h / z0_m;
+            u_dyn0 = U * model.kappa / logf(h0_m);
+        }
+
+        Re = u_dyn0 * z0_m / phys.nu_air;
+        get_thermal_roughness(z0_t, B, z0_m, Re, surface, surface_type);
+
+        // --- define relative height [thermal]
+        h0_t = h / z0_t;
+
+        // --- define Ri-bulk
+        Rib = (phys.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        get_convection_lim(zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model);
+
+        // --- get the fluxes
+        // ----------------------------------------------------------------------------
+        if (Rib > 0.0)
+        {
+            // --- stable stratification block
+
+            // --- restrict bulk Ri value
+            // *: note that this value is written to output
+            // Rib = min(Rib, Rib_max)
+            get_zeta(zeta, Rib, h, z0_m, z0_t, model);
+            get_psi_stable(psi_m, psi_h, zeta, zeta, model);
+            get_psi_stable(psi0_m, psi0_h, zeta * z0_m / h, zeta * z0_t / h, model);
+
+            phi_m = 1.0 + (model.a_m * zeta * powf(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + model.b_m * zeta);
+            phi_h = 1.0 + (model.a_h * zeta + model.b_h * zeta * zeta) / (1.0 + model.c_h * zeta + zeta * zeta);
+
+            Udyn = model.kappa * U / (logf(h / z0_m) - (psi_m - psi0_m));
+            Tdyn = model.kappa * dT * model.Pr_t_0_inv / (logf(h / z0_t) - (psi_h - psi0_h));
+        }
+        else if (Rib < Rib_conv_lim) 
+        {    
+            // --- strong instability block
+
+            get_psi_convection(psi_m, psi_h, zeta, Rib,
+                zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, h0_m, h0_t, B, model, numerics.maxiters_convection);
+
+            fval = powf(zeta_conv_lim / zeta, 1.0/3.0);
+            phi_m = fval / f_m_conv_lim;
+            phi_h = fval / (model.Pr_t_0_inv * f_h_conv_lim);
+        }
+        else if (Rib > -0.001)
+        { 
+            // --- nearly neutral [-0.001, 0] block
+
+            get_psi_neutral(psi_m, psi_h, h0_m, h0_t, B, model);
+
+            zeta = 0.0;
+            phi_m = 1.0;
+            phi_h = 1.0 / model.Pr_t_0_inv;
+        }
+        else
+        {
+            // --- weak & semistrong instability block
+
+            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model, numerics.maxiters_convection);
+
+            phi_m = powf(1.0 - model.alpha_m * zeta, -0.25);
+            phi_h = 1.0 / (model.Pr_t_0_inv * sqrtf(1.0 - model.alpha_h_fix * zeta));
+        } 
+        // ----------------------------------------------------------------------------
+
+        // --- define transfer coeff. (momentum) & (heat)
+        if(Rib > 0)
+        {    
+            Cm = 0.0;
+            if (U > 0.0)
+                Cm = Udyn / U;
+        
+            Ct = 0.0;
+            if (fabs(dT) > 0.0)
+                Ct = Tdyn / dT;
+        }
+        else
+        { 
+            Cm = model.kappa / psi_m;
+            Ct = model.kappa / psi_h;
+        }
+
+        // --- define eddy viscosity & inverse Prandtl number
+        Km = model.kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        sfx.zeta[step]         = zeta;
+        sfx.Rib[step]          = Rib;
+        sfx.Re[step]           = Re;
+        sfx.B[step]            = B;
+        sfx.z0_m[step]         = z0_m;
+        sfx.z0_t[step]         = z0_t;
+        sfx.Rib_conv_lim[step] = Rib_conv_lim;
+        sfx.Cm[step]           = Cm;
+        sfx.Ct[step]           = Ct;
+        sfx.Km[step]           = Km;
+        sfx.Pr_t_inv[step]     = Pr_t_inv;
+    }
 
     if(MemType::CPU != memOut)
     {
@@ -120,16 +257,8 @@ void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
 }
 
 template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU>;
-template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::CPU>;
 
 #ifdef INCLUDE_CUDA
-    template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
-    template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
-    template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
-    template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
-    template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
-    template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
-    template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
 
     template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
     template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
diff --git a/srcF/sfx_sheba.f90 b/srcF/sfx_sheba.f90
index 4fd6142bb7ddd9a16e553d54e5b075a6e43d6f36..f6160dd1af53e6be98bf42204602cfba142579ff 100644
--- a/srcF/sfx_sheba.f90
+++ b/srcF/sfx_sheba.f90
@@ -41,8 +41,7 @@ module sfx_sheba
 #if defined(INCLUDE_CXX)
     type, BIND(C), public :: sfx_sheba_param_C
         real(C_FLOAT) :: kappa           
-        real(C_FLOAT) :: 
-        Pr_t_0_inv
+        real(C_FLOAT) :: Pr_t_0_inv
 
         real(C_FLOAT) :: alpha_m           
         real(C_FLOAT) :: alpha_h
diff --git a/srcF/sfx_sheba_noniterative.f90 b/srcF/sfx_sheba_noniterative.f90
index 178d89ef707274e185ebb6f601233e69581d0e93..72e1304271addd0b2658a65a6c93c58f2ff7d7d4 100644
--- a/srcF/sfx_sheba_noniterative.f90
+++ b/srcF/sfx_sheba_noniterative.f90
@@ -58,6 +58,7 @@ module sfx_sheba_noniterative
 
     type, BIND(C), public :: sfx_sheba_noit_numericsType_C 
         integer(C_INT) :: maxiters_charnock 
+        integer(C_INT) :: maxiters_convection
     end type
 
     INTERFACE
@@ -71,7 +72,7 @@ module sfx_sheba_noniterative
             type(C_PTR), value :: sfx
             type(C_PTR), value :: meteo
             type(sfx_sheba_noit_param_C) :: model_param
-            type(sfx_surface_noit_param) :: surface_param
+            type(sfx_surface_param) :: surface_param
             type(sfx_sheba_noit_numericsType_C) :: numerics
             type(sfx_phys_constants) :: constants
         END SUBROUTINE c_sheba_noit_compute_flux