diff --git a/CMakeLists.txt b/CMakeLists.txt
index df461be3016ff213e48b8ba55601d301bb048d0e..492fe28ef1824427466a0ac32323835754271a37 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,8 +32,6 @@ set(CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/modules)
 if(USE_CONFIG_PARSER)
     add_subdirectory(config-parser/)
     add_definitions(-DUSE_CONFIG_PARSER)
-    # list(APPEND RUN_MACRO -DUSE_CONFIG_PARSER)
-    set(USE_CXX ON)
 endif(USE_CONFIG_PARSER)
 
 if(INCLUDE_CUDA)
@@ -55,7 +53,6 @@ if(USE_CXX)
     enable_language(CXX)
     set(CMAKE_CXX_STANDARD 11)
     add_definitions(-DINCLUDE_CXX)
-    # list(APPEND RUN_MACRO -DINCLUDE_CXX)
 endif(USE_CXX)
 
 set(SOURCES_F 
@@ -90,34 +87,35 @@ if(USE_CXX)
     set(HEADERS_C
         includeC/sfx_data.h
     )
+    list(APPEND HEADERS_DIRS includeC/)
 
     set(SOURCES_CXX 
             srcCXX/model_base.cpp
             srcCXX/sfx_esm.cpp
             srcCXX/sfx_sheba.cpp
-            srcCXX/sfx_compute_sheba.cpp
             srcCXX/sfx_call_class_func.cpp
     )
     set(HEADERS_CXX 
             includeCU/sfx_surface.cuh
             includeCU/sfx_math.cuh
-            includeCU/sfx_esm_compute_subfunc.cuh
+            includeCU/sfx_model_compute_subfunc.cuh
 
             includeCXX/model_base.h
             includeCXX/sfx_esm.h
             includeCXX/sfx_sheba.h
-            includeCXX/sfx_compute_sheba.h
             includeCXX/sfx_call_class_func.h
         )
+        list(APPEND HEADERS_DIRS includeCU/)
+        list(APPEND HEADERS_DIRS includeCXX/)
 endif(USE_CXX)
 
 if(INCLUDE_CUDA)
     set(SOURCES_CU 
         srcCU/sfx_esm.cu
-        srcCU/sfx_compute_sheba.cu
+        srcCU/sfx_sheba.cu
     )
     set(HEADERS_CU
-        includeCU/sfx_compute_sheba.cuh
+    
     )
 endif(INCLUDE_CUDA)
 
@@ -142,23 +140,29 @@ endif(USE_CXX OR INCLUDE_CUDA)
 
 set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_C} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F})
 
-set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ")
-
-if(USE_CXX OR INCLUDE_CUDA)
-    set(CMAKE_CXX_FLAGS  " -g -Wunused-variable ")
-    set(CMAKE_C_FLAGS " -g ")
-
-    set(CMAKE_CUDA_FLAGS " -g ")
-endif(USE_CXX OR INCLUDE_CUDA)
+set(CMAKE_Fortran_FLAGS " -cpp ")
 
 add_executable(sfx ${SOURCES})
 set_property(TARGET sfx PROPERTY LINKER_LANGUAGE Fortran)
 target_include_directories(sfx PUBLIC ${CMAKE_BINARY_DIR}/modules/)
+target_include_directories(sfx PUBLIC ${HEADERS_DIRS})
 
 if(USE_CONFIG_PARSER)
     target_link_libraries(sfx config_parser_F config_parser_CXX)
 endif(USE_CONFIG_PARSER)
 
+if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+    target_compile_options(sfx
+        PUBLIC $<$<COMPILE_LANGUAGE:C>: -g >)
+	target_compile_options(sfx
+        PUBLIC $<$<COMPILE_LANGUAGE:CXX>: -g >)
+    target_compile_options(sfx
+        PUBLIC $<$<COMPILE_LANGUAGE:CUDA>: -g >)
+    target_compile_options(sfx
+        PUBLIC $<$<COMPILE_LANGUAGE:Fortran>: -g -fbacktrace -ffpe-trap=zero,overflow,underflow >)
+endif()
+
+
 # copy data & configs on post build
 add_custom_command(
         TARGET sfx POST_BUILD
diff --git a/README.md b/README.md
index ae3d0d2f263628c2951d62ed68e08946b30ed933..1a25762ec162bd7c23e5d78c17436e973caf997a 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,34 @@ Option `COMPILER` enables set of compiler-specific options. Most common are `gnu
 ./sfx.exe
 ```
 
+## Building with CMake
+
+Currently there is an implementation of Cmake-based toolchain which allows to build the model quicker and with less hustle (minimum required cmake version is 3.19). Clone the project and load appropriate modules. Create a build directory outside of cloned projects:
+
+```bash
+mkdir build && cd build
+```
+
+The code supports fluxes computations on both CPU and GPU and the CPU implementation has two versions: Fortran and C++ implementation. 
+
+1. Building the Fortran CPU version:
+```bash
+cmake ..
+```
+
+2. Building the C++ CPU version:
+```bash
+cmake -DUSE_CXX=ON ..
+```
+
+3. Building the C++ GPU version:
+```bash
+cmake -DINCLUDE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=<N> ..
+```
+where `N` is the architecture to generate device code for.
+
+
+
 ## Running test cases
 We have prepared several dataseta to make the test calculations. Data is prepared on the basis of measurements.
 
diff --git a/includeC/sfx_data.h b/includeC/sfx_data.h
index d1bf113c04df7116ac93176c39702f86037fc048..dba42c80adea9bbcfc6fe5292abf65f153396e01 100644
--- a/includeC/sfx_data.h
+++ b/includeC/sfx_data.h
@@ -60,6 +60,7 @@ extern "C" {
         int surface_land;
         int surface_lake;
 
+        float kappa;
         float gamma_c;
         float Re_visc_min;
         float h_charnock;
diff --git a/includeCU/sfx_compute_sheba.cuh b/includeCU/sfx_compute_sheba.cuh
deleted file mode 100644
index acd67d6ec11ad91aeef3f7291e4c283a2c9e9a2d..0000000000000000000000000000000000000000
--- a/includeCU/sfx_compute_sheba.cuh
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once 
-
-template<typename T>
-void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/includeCU/sfx_esm_compute_subfunc.cuh b/includeCU/sfx_esm_compute_subfunc.cuh
deleted file mode 100644
index 46812fa9320532355569ec82040a6be9f3a900f5..0000000000000000000000000000000000000000
--- a/includeCU/sfx_esm_compute_subfunc.cuh
+++ /dev/null
@@ -1,144 +0,0 @@
-#pragma once 
-#include "../includeC/sfx_data.h"
-#include "../includeCU/sfx_math.cuh"
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
-    const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    T psi_m, psi_h, f_m, f_h, c;
-
-    c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4);
-    zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - 
-                sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
-
-    f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25);
-    f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim);
-
-    f_m = zeta_lim / h0_m;
-    f_h = zeta_lim / h0_t;
-    if (fabs(B) < 1.0e-10) f_h = f_m;
-
-    f_m = pow(1.0 - param.alpha_m * f_m, 0.25);
-    f_h = sqrt(1.0 - param.alpha_h_fix * f_h);
-
-    psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0)));
-    psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv;
-
-    Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
-    const T Rib, const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    T Rib_coeff, psi0_m, psi0_h, phi, c;
-    
-    psi0_m = log(h0_m);
-    psi0_h = B / psi0_m;
-
-    Rib_coeff = param.beta_m * Rib;
-    c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff;
-    zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff));
-
-    phi = param.beta_m * zeta;
-
-    psi_m = psi0_m + phi;
-    psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, 
-    const T Rib, const T h0_m, const T h0_t, const T B, 
-    const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
-    const sfx_esm_param& param,
-    const sfx_esm_numericsTypeC& numerics)
-{
-    T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
-
-    p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0));
-    p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0));
-
-    zeta = zeta_conv_lim;
-
-    for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
-    {
-        zeta0_m = zeta / h0_m;
-        zeta0_h = zeta / h0_t;
-        if (fabs(B) < 1.0e-10) 
-            zeta0_h = zeta0_m;
-
-        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25);
-        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
-
-        a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0));
-        a_h = log((f0_h + 1.0)/(f0_h - 1.0));
-
-        c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0);
-        f = 3.0 * (1.0 - c_lim);
-
-        psi_m = f / f_m_conv_lim + p_m + a_m;
-        psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv;
-
-        if (i == numerics.maxiters_convection + 1) 
-            break;
-
-        zeta = Rib * psi_m * psi_m / psi_h;
-    }
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,   
-    const T h0_m, const T h0_t, const T B,
-    const sfx_esm_param& param)
-{
-    zeta = 0.0;
-    psi_m = log(h0_m);
-    psi_h = log(h0_t) / param.Pr_t_0_inv;
-    if (fabs(B) < 1.0e-10) 
-        psi_h = psi_m / param.Pr_t_0_inv;
-}
-
-template<typename T>
-FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
-    const T Rib, const T h0_m, const T h0_t, const T B, 
-    const sfx_esm_param& param,
-    const sfx_esm_numericsTypeC& numerics)
-{
-    T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
-
-    psi_m = log(h0_m);
-    psi_h = log(h0_t);
-
-    if (fabs(B) < 1.0e-10) 
-        psi_h = psi_m;
-
-    zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h;
-
-    for (int i = 1; i <= numerics.maxiters_convection + 1; i++)
-    {
-        zeta0_m = zeta / h0_m;
-        zeta0_h = zeta / h0_t;
-        if (fabs(B) < 1.0e-10) 
-            zeta0_h = zeta0_m;
-
-        f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0);
-        f_h = sqrt(1.0 - param.alpha_h_fix * zeta);
-
-        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0);
-        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
-
-        f0_m = sfx_math::max(f0_m, T(1.000001e0));
-        f0_h = sfx_math::max(f0_h, T(1.000001e0));
-
-        psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
-        psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv;
-
-        if (i == numerics.maxiters_convection + 1) 
-            break;
-
-        zeta = Rib * psi_m * psi_m / psi_h;
-    }
-}
\ No newline at end of file
diff --git a/includeCU/sfx_memory_processing.cuh b/includeCU/sfx_memory_processing.cuh
index c68c858db0764633b37d9cfce0ee0f92bd1d1d69..2e68b1402dcfd009ef694a22730946e0728d4729 100644
--- a/includeCU/sfx_memory_processing.cuh
+++ b/includeCU/sfx_memory_processing.cuh
@@ -1,5 +1,5 @@
 #pragma once
-#include "../includeCXX/sfx_template_parameters.h"
+#include "sfx_template_parameters.h"
 #include <cstddef>
 
 namespace memproc
diff --git a/includeCU/sfx_model_compute_subfunc.cuh b/includeCU/sfx_model_compute_subfunc.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..ecc53f20a1889bfe7024ce652b070c610090cfd0
--- /dev/null
+++ b/includeCU/sfx_model_compute_subfunc.cuh
@@ -0,0 +1,272 @@
+#pragma once 
+#include "sfx_data.h"
+#include "sfx_math.cuh"
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_convection_lim(T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim,
+    const T h0_m, const T h0_t, const T B,
+    const sfx_param& param)
+{
+    T psi_m, psi_h, f_m, f_h, c;
+
+    c = pow(param.Pr_t_inf_inv / param.Pr_t_0_inv, 4);
+    zeta_lim = (2.0 * param.alpha_h - c * param.alpha_m - 
+                sqrt( (c * param.alpha_m)*(c * param.alpha_m) + 4.0 * c * param.alpha_h * (param.alpha_h - param.alpha_m))) / (2.0 * param.alpha_h*param.alpha_h);
+
+    f_m_lim = pow(1.0 - param.alpha_m * zeta_lim, 0.25);
+    f_h_lim = sqrt(1.0 - param.alpha_h * zeta_lim);
+
+    f_m = zeta_lim / h0_m;
+    f_h = zeta_lim / h0_t;
+    if (fabs(B) < 1.0e-10) f_h = f_m;
+
+    f_m = pow(1.0 - param.alpha_m * f_m, 0.25);
+    f_h = sqrt(1.0 - param.alpha_h_fix * f_h);
+
+    psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0)));
+    psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / param.Pr_t_0_inv;
+
+    Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_stable(T &psi_m, T &psi_h, T &zeta,
+    const T Rib, const T h0_m, const T h0_t, const T B,
+    const sfx_param& param)
+{
+    T Rib_coeff, psi0_m, psi0_h, phi, c;
+    
+    psi0_m = log(h0_m);
+    psi0_h = B / psi0_m;
+
+    Rib_coeff = param.beta_m * Rib;
+    c = (psi0_h + 1.0) / param.Pr_t_0_inv - 2.0 * Rib_coeff;
+    zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * param.beta_m * (1.0 - Rib_coeff));
+
+    phi = param.beta_m * zeta;
+
+    psi_m = psi0_m + phi;
+    psi_h = (psi0_m + B) / param.Pr_t_0_inv + phi;
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_convection(T &psi_m, T &psi_h, T &zeta, 
+    const T Rib, const T h0_m, const T h0_t, const T B, 
+    const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
+    const sfx_param& param,
+    const int maxiters_convection)
+{
+    T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
+
+    p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0));
+    p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0));
+
+    zeta = zeta_conv_lim;
+
+    for (int i = 1; i <= maxiters_convection + 1; i++)
+    {
+        zeta0_m = zeta / h0_m;
+        zeta0_h = zeta / h0_t;
+        if (fabs(B) < 1.0e-10) 
+            zeta0_h = zeta0_m;
+
+        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25);
+        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
+
+        a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0));
+        a_h = log((f0_h + 1.0)/(f0_h - 1.0));
+
+        c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0);
+        f = 3.0 * (1.0 - c_lim);
+
+        psi_m = f / f_m_conv_lim + p_m + a_m;
+        psi_h = (f / f_h_conv_lim + p_h + a_h) / param.Pr_t_0_inv;
+
+        if (i == maxiters_convection + 1) 
+            break;
+
+        zeta = Rib * psi_m * psi_m / psi_h;
+    }
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_neutral(T &psi_m, T &psi_h, T &zeta,   
+    const T h0_m, const T h0_t, const T B,
+    const sfx_param& param)
+{
+    zeta = 0.0;
+    psi_m = log(h0_m);
+    psi_h = log(h0_t) / param.Pr_t_0_inv;
+    if (fabs(B) < 1.0e-10) 
+        psi_h = psi_m / param.Pr_t_0_inv;
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_semi_convection(T &psi_m, T &psi_h, T &zeta,
+    const T Rib, const T h0_m, const T h0_t, const T B, 
+    const sfx_param& param,
+    const int maxiters_convection)
+{
+    T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
+
+    psi_m = log(h0_m);
+    psi_h = log(h0_t);
+
+    if (fabs(B) < 1.0e-10) 
+        psi_h = psi_m;
+
+    zeta = Rib * param.Pr_t_0_inv * psi_m * psi_m / psi_h;
+
+    for (int i = 1; i <= maxiters_convection + 1; i++)
+    {
+        zeta0_m = zeta / h0_m;
+        zeta0_h = zeta / h0_t;
+        if (fabs(B) < 1.0e-10) 
+            zeta0_h = zeta0_m;
+
+        f_m = pow(1.0 - param.alpha_m * zeta, 0.25e0);
+        f_h = sqrt(1.0 - param.alpha_h_fix * zeta);
+
+        f0_m = pow(1.0 - param.alpha_m * zeta0_m, 0.25e0);
+        f0_h = sqrt(1.0 - param.alpha_h_fix * zeta0_h);
+
+        f0_m = sfx_math::max(f0_m, T(1.000001e0));
+        f0_h = sfx_math::max(f0_h, T(1.000001e0));
+
+        psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
+        psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / param.Pr_t_0_inv;
+
+        if (i == maxiters_convection + 1) 
+            break;
+
+        zeta = Rib * psi_m * psi_m / psi_h;
+    }
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi(T &psi_m, T &psi_h,
+    const T zeta,
+    const sfx_param& param)
+{
+    T x_m, x_h;
+    T q_m, q_h;
+
+    if (zeta >= 0.0) 
+    {
+        q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
+        q_h = sqrt(param.c_h * param.c_h - 4.0);
+
+        x_m = pow(1.0 + zeta, 1.0 / 3.0);
+        x_h = zeta;
+
+        psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
+
+        psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h)));
+    }
+    else
+    {
+        x_m = pow(1.0 - param.alpha_m * zeta, 0.25);
+        x_h = pow(1.0 - param.alpha_h * zeta, 0.25); 
+
+        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
+        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
+    }
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_psi_mh(T &psi_m, T &psi_h,
+    const T zeta_m, const T zeta_h,
+    const sfx_param& param)
+{
+    T x_m, x_h;
+    T q_m, q_h;
+
+    if (zeta_m >= 0.0) 
+    {
+        q_m = pow((1.0 - param.b_m) / param.b_m, 1.0 / 3.0);
+        x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
+
+        psi_m = -3.0 * (param.a_m / param.b_m) * (x_m - 1.0) + 0.5 * (param.a_m / param.b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
+    }                                
+    else
+    {    
+        x_m = pow(1.0 - param.alpha_m * zeta_m, 0.25);
+        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
+    }
+
+    if (zeta_h >= 0.0)
+    {    
+        q_h = sqrt(param.c_h * param.c_h - 4.0);
+        x_h = zeta_h;
+
+        psi_h = -0.5 * param.b_h * log(1.0 + param.c_h * x_h + x_h * x_h) + ((-param.a_h / q_h) + ((param.b_h * param.c_h) / (2.0 * q_h))) * (log((2.0 * x_h + param.c_h - q_h) / (2.0 * x_h + param.c_h + q_h)) - log((param.c_h - q_h) / (param.c_h + q_h)));
+    }
+    else
+    {
+        x_h = pow(1.0 - param.alpha_h * zeta_h, 0.25);
+        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
+    }
+}
+
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T& zeta,
+    const T U, const T Tsemi, const T dT, const T dQ, 
+    const T z, const T z0_m, const T z0_t,
+    const T beta,
+    const sfx_param& param,
+    const int maxiters)
+{
+    const T gamma = T(0.61);
+    T psi_m, psi_h, psi0_m, psi0_h, Linv;
+
+    Udyn = param.kappa * U / log(z / z0_m);
+    Tdyn = param.kappa * dT * param.Pr_t_0_inv / log(z / z0_t);
+    Qdyn = param.kappa * dQ * param.Pr_t_0_inv / log(z / z0_t);
+    zeta = 0.0;
+
+    // --- no wind
+    if (Udyn < 1e-5) 
+        return;
+
+    Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
+    zeta = z * Linv;
+
+    // --- near neutral case
+    if (Linv < 1e-5) 
+        return;
+
+    for (int i = 0; i < maxiters; i++)
+    {
+        get_psi(psi_m, psi_h, zeta, param);
+        get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, 
+            param);
+
+        Udyn = param.kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
+        Tdyn = param.kappa * dT * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
+        Qdyn = param.kappa * dQ * param.Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
+
+        if (Udyn < 1e-5) 
+            break;
+
+        Linv = param.kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
+        zeta = z * Linv;
+    }
+}
+
+template<typename T, class sfx_param>
+FUCNTION_DECLARATION_SPECIFIER void get_phi(T &phi_m, T &phi_h,
+    const T zeta, 
+    const sfx_param& param)
+{
+    if (zeta >= 0.0) 
+    {
+        phi_m = 1.0 + (param.a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + param.b_m * zeta);
+        phi_h = 1.0 + (param.a_h * zeta + param.b_h * zeta * zeta) / (1.0 + param.c_h * zeta + zeta * zeta);
+    }
+    else
+    {
+        phi_m = pow(1.0 - param.alpha_m * zeta, -0.25);
+        phi_h = pow(1.0 - param.alpha_h * zeta, -0.5);
+    }
+}
diff --git a/includeCU/sfx_surface.cuh b/includeCU/sfx_surface.cuh
index 2344f41c42c0c6de0b8805863a1972833b59f954..c2d6ebe54abd4d45fafbeddcea558c3800f0e236 100644
--- a/includeCU/sfx_surface.cuh
+++ b/includeCU/sfx_surface.cuh
@@ -1,6 +1,6 @@
 #pragma once
-#include "../includeC/sfx_data.h"
-#include "../includeCU/sfx_math.cuh"
+#include "sfx_data.h"
+#include "sfx_math.cuh"
 
 template<typename T>
 FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
@@ -14,8 +14,8 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
 
     //   --- apply max restriction based on surface type
     if (surface_type == param.surface_ocean)  B = sfx_math::min(B, param.B_max_ocean);
-    if (surface_type == param.surface_lake)   B = sfx_math::min(B, param.B_max_land);
-    if (surface_type == param.surface_land)   B = sfx_math::min(B, param.B_max_lake);
+    if (surface_type == param.surface_lake)   B = sfx_math::min(B, param.B_max_lake);
+    if (surface_type == param.surface_land)   B = sfx_math::min(B, param.B_max_land);
 
     // --- define roughness [thermal]
     z0_t = z0_m / exp(B);
@@ -23,30 +23,29 @@ FUCNTION_DECLARATION_SPECIFIER void get_thermal_roughness(T &z0_t, T &B,
 
 template<typename T>
 FUCNTION_DECLARATION_SPECIFIER void get_charnock_roughness(T &z0_m, T &u_dyn0,
-    const T h, const T U,
-    const sfx_esm_param& model_param,
+    const T U, const T h,
     const sfx_surface_param& surface_param,
-    const sfx_esm_numericsTypeC& numerics)
+    const int maxiters)
 {
     T Uc, a, b, c, c_min, f;
 
     Uc = U;
     a = 0.0;
     b = 25.0;
-    c_min = log(surface_param.h_charnock) / model_param.kappa;
+    c_min = log(surface_param.h_charnock) / surface_param.kappa;
 
-    for (int i = 0; i < numerics.maxiters_charnock; i++)
+    for (int i = 0; i < maxiters; i++)
     {
         f = surface_param.c1_charnock - 2.0 * log(Uc);
-        for (int j = 0; j < numerics.maxiters_charnock; j++)
+        for (int j = 0; j < maxiters; j++)
         {
-            c = (f + 2.0 * log(b)) / model_param.kappa;
+            c = (f + 2.0 * log(b)) / surface_param.kappa;
             if (U <= 8.0e0) 
-                a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / model_param.kappa;
+                a = log(1.0 + surface_param.c2_charnock * ( pow(b / Uc, 3) ) ) / surface_param.kappa;
             c = sfx_math::max(c - a, c_min);
             b = c;
         }
-        z0_m = surface_param.h_charnock * exp(-c * model_param.kappa);
+        z0_m = surface_param.h_charnock * exp(-c * surface_param.kappa);
         z0_m = sfx_math::max(z0_m, T(0.000015e0));
         Uc = U * log(surface_param.h_charnock / z0_m) / log(h / z0_m);
     }
diff --git a/includeCXX/model_base.h b/includeCXX/model_base.h
index f51c7e7b4aa08d2864626f847a1008835a6f8165..2a3bb893d0d9eef0bcb2f5bb10fa2a2f920e4f52 100644
--- a/includeCXX/model_base.h
+++ b/includeCXX/model_base.h
@@ -1,7 +1,7 @@
 #pragma once
 #include <cstddef>
 #include "sfx_template_parameters.h"
-#include "../includeC/sfx_data.h"
+#include "sfx_data.h"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
 class ModelBase
diff --git a/includeCXX/sfx_call_class_func.h b/includeCXX/sfx_call_class_func.h
index 0d79140b7adb6b27f81f4e03a655fcb976f5afed..8cdb0a77e3e57a1e9f64fccb2dcb1e67a71cb850 100644
--- a/includeCXX/sfx_call_class_func.h
+++ b/includeCXX/sfx_call_class_func.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "../includeC/sfx_data.h"
+#include "sfx_data.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/includeCXX/sfx_compute_sheba.h b/includeCXX/sfx_compute_sheba.h
deleted file mode 100644
index d13bf591f340e95fe5257c03b6796eebb10387fc..0000000000000000000000000000000000000000
--- a/includeCXX/sfx_compute_sheba.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-template<typename T>
-void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv, 
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/includeCXX/sfx_esm.h b/includeCXX/sfx_esm.h
index c1cc836a5c721f349b722a73002ba3128ceae292..d4040389d69569e6ba7799606906abe7f13c8db8 100644
--- a/includeCXX/sfx_esm.h
+++ b/includeCXX/sfx_esm.h
@@ -1,7 +1,7 @@
 #pragma once
 #include "sfx_template_parameters.h"
-#include "../includeCXX/model_base.h"
-#include "../includeC/sfx_data.h"
+#include "model_base.h"
+#include "sfx_data.h"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
 class FluxEsmBase : public ModelBase<T, memIn, memOut, RunMem>
diff --git a/includeCXX/sfx_sheba.h b/includeCXX/sfx_sheba.h
index 676980f1fe273a040481b2a3dc414ab84c7daba8..33c93aa76be928524577774d0831e41b83546c76 100644
--- a/includeCXX/sfx_sheba.h
+++ b/includeCXX/sfx_sheba.h
@@ -1,48 +1,88 @@
 #pragma once
 #include "sfx_template_parameters.h"
-#include <cstddef>
+#include "model_base.h"
+#include "sfx_data.h"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-class FluxSheba
+class FluxShebaBase : public ModelBase<T, memIn, memOut, RunMem>
 {
-private:
-    T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m;
-    T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv;
+public:
+    using ModelBase<T, memIn, memOut, RunMem>::res_sfx;
+    using ModelBase<T, memIn, memOut, RunMem>::sfx;
+    using ModelBase<T, memIn, memOut, RunMem>::meteo;
+    using ModelBase<T, memIn, memOut, RunMem>::grid_size;
+    using ModelBase<T, memIn, memOut, RunMem>::ifAllocated;
+    using ModelBase<T, memIn, memOut, RunMem>::allocated_size;
 
-    T kappa, Pr_t_0_inv, 
-    alpha_m, alpha_h,
-    a_m, a_h, 
-    b_m, b_h,
-    c_h,
-    Re_rough_min, 
-    B1_rough, B2_rough, 
-    B_max_land, B_max_ocean, B_max_lake,
-    gamma_c,  
-    Re_visc_min,
-    Pr_m, nu_air, g;
+    sfx_surface_param surface_param;
+    sfx_phys_constants phys_constants;
+    sfx_sheba_param model_param;
+    sfx_sheba_numericsTypeC numerics;
 
-    int grid_size;
-    bool ifAllocated;
-    size_t allocated_size;
-public:
-    FluxSheba();
-    void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv, 
-    const T alpha_m, const T alpha_h, 
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g);
-    ~FluxSheba();
+    FluxShebaBase(sfxDataVecTypeC* sfx,
+                meteoDataVecTypeC* meteo,
+                const sfx_sheba_param model_param, 
+                const sfx_surface_param surface_param,
+                const sfx_sheba_numericsTypeC numerics,
+                const sfx_phys_constants phys_constants,
+                const int grid_size);
+    ~FluxShebaBase();
+};
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+class FluxSheba : public FluxShebaBase<T, memIn, memOut, RunMem>
+{};
 
-    void compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
-    const int maxiters_charnock);
+template<typename T, MemType memIn, MemType memOut >
+class FluxSheba<T, memIn, memOut, MemType::CPU> : public FluxShebaBase<T, memIn, memOut, MemType::CPU>
+{
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::res_sfx;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::sfx;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::meteo;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::surface_param;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::phys_constants;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::grid_size;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::ifAllocated;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::allocated_size;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::model_param;
+    using FluxShebaBase<T, memIn, memOut, MemType::CPU>::numerics;
+public:
+    FluxSheba(sfxDataVecTypeC* sfx,
+                meteoDataVecTypeC* meteo,
+                const sfx_sheba_param model_param, 
+                const sfx_surface_param surface_param,
+                const sfx_sheba_numericsTypeC numerics,
+                const sfx_phys_constants phys_constants,
+                const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::CPU>(sfx, meteo, model_param, 
+                                       surface_param, numerics, phys_constants, grid_size) {}
+    ~FluxSheba() = default;
+    void compute_flux();
+};
 
-private:
-    void set_data( T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_);
-};
\ No newline at end of file
+#ifdef INCLUDE_CUDA
+template<typename T, MemType memIn, MemType memOut >
+class FluxSheba<T, memIn, memOut, MemType::GPU> : public FluxShebaBase<T, memIn, memOut, MemType::GPU>
+{
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::res_sfx;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::sfx;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::meteo;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::surface_param;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::phys_constants;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::grid_size;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::ifAllocated;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::allocated_size;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::model_param;
+    using FluxShebaBase<T, memIn, memOut, MemType::GPU>::numerics;
+public:
+    FluxSheba(sfxDataVecTypeC* sfx,
+                meteoDataVecTypeC* meteo,
+                const sfx_sheba_param model_param, 
+                const sfx_surface_param surface_param,
+                const sfx_sheba_numericsTypeC numerics,
+                const sfx_phys_constants phys_constants,
+                const int grid_size) : FluxShebaBase<T, memIn, memOut, MemType::GPU>(sfx, meteo, model_param, 
+                                       surface_param, numerics, phys_constants, grid_size) {}
+    ~FluxSheba() = default;
+    void compute_flux();
+};
+#endif
\ No newline at end of file
diff --git a/srcC/sfx_call_cxx.c b/srcC/sfx_call_cxx.c
index 506e4646936355e7512ff74b3f76ecc36dfb064b..208a8ed67505caacbadf5cc8794d5ebb12dfc65a 100644
--- a/srcC/sfx_call_cxx.c
+++ b/srcC/sfx_call_cxx.c
@@ -1,6 +1,6 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "../includeCXX/sfx_call_class_func.h"
+#include "sfx_call_class_func.h"
 // #include "../includeC/sfx_call_cxx.h"
 // -------------------------------------------------------------------------- //
 
diff --git a/srcCU/sfx_compute_sheba.cu b/srcCU/sfx_compute_sheba.cu
deleted file mode 100644
index 56a93bb49c28ac039c681e6ad71ffc1aa6f33f87..0000000000000000000000000000000000000000
--- a/srcCU/sfx_compute_sheba.cu
+++ /dev/null
@@ -1,486 +0,0 @@
-#include <iostream>
-#include "../includeCU/sfx_compute_sheba.cuh"
-#include "../includeCU/sfx_surface.cuh"
-
-#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
-inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
-{
-   if (code != cudaSuccess) 
-   {
-      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
-      if (abort) exit(code);
-   }
-}
-
-template<typename T>
-__device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
-    const T h, const T U,
-    const T kappa, 
-    const T h_charnock, const T c1_charnock, const T c2_charnock, 
-    const int maxiters)
-{
-    T Uc, a, b, c, c_min, f;
-
-    Uc = U;
-    a = 0.0;
-    b = 25.0;
-    c_min = log(h_charnock) / kappa;
-
-    for (int i = 0; i < maxiters; i++)
-    {
-        f = c1_charnock - 2.0 * log(Uc);
-        for (int j = 0; j < maxiters; j++)
-        {
-            c = (f + 2.0 * log(b)) / kappa;
-            if (U <= 8.0e0) 
-                a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
-            c = max(c - a, c_min);
-            b = c;
-        }
-        z0_m = h_charnock * exp(-c * kappa);
-        z0_m = max(z0_m, T(0.000015e0));
-        Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
-    }
-    
-    u_dyn0 = Uc / c;
-}
-
-template __device__ void get_charnock_roughness(float &z0_m, float &u_dyn0,
-    const float h, const float U,
-    const float kappa, 
-    const float h_charnock, const float c1_charnock, const float c2_charnock, 
-    const int maxiters);
-template __device__ void get_charnock_roughness(double &z0_m, double &u_dyn0, 
-    const double h, const double U,
-    const double kappa, 
-    const double h_charnock, const double c1_charnock, const double c2_charnock,
-    const int maxiters);
-
-template<typename T>
-__device__ void get_thermal_roughness(T &z0_t, T &B,
-    const T z0_m, const T Re, 
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough, const T B3_rough, const T B4_rough, 
-    const T B_max_ocean, const T B_max_lake, const T B_max_land,
-    const int surface_type)
-{
-    // --- define B = log(z0_m / z0_t)
-    if (Re <= Re_rough_min) 
-        B = B1_rough * log(B3_rough * Re) + B2_rough;
-    else
-        // *: B4 takes into account Re value at z' ~ O(10) z0
-        B = B4_rough * (pow(Re, B2_rough));
-
-    //   --- apply max restriction based on surface type
-    if (surface_type == 0) 
-        B = min(B, B_max_ocean);
-    else if (surface_type == 2) 
-        B = min(B, B_max_lake);
-    else if (surface_type == 1)
-        B = min(B, B_max_land);
-
-    // --- define roughness [thermal]
-    z0_t = z0_m / exp(B);
-}
-
-template __device__ void get_thermal_roughness(float &z0_t, float &B,
-    const float z0_m, const float Re, 
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough, const float B3_rough, const float B4_rough, 
-    const float B_max_ocean, const float B_max_lake, const float B_max_land,
-    const int surface_type);
-template __device__ void get_thermal_roughness(double &z0_t, double &B,
-    const double z0_m, const double Re, 
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough, const double B3_rough, const double B4_rough, 
-    const double B_max_ocean, const double B_max_lake, const double B_max_land,
-    const int surface_type);
-
-template<typename T>
-__device__ void get_psi_mh(T &psi_m, T &psi_h,
-    const T zeta_m, const T zeta_h,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta_m >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-    }                                
-    else
-    {    x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-    }
-
-    if (zeta_h >= 0.0)
-    {    
-        q_h = sqrt(c_h * c_h - 4.0);
-        x_h = zeta_h;
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template __device__ void get_psi_mh(float &psi_m, float &psi_h,
-    const float zeta_m, const float zeta_h,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_psi_mh(double &psi_m, double &psi_h,
-    const double zeta_m, const double zeta_h,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__device__ void get_psi(T &psi_m, T &psi_h,
-    const T zeta,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        q_h = sqrt(c_h * c_h - 4.0);
-
-        x_m = pow(1.0 + zeta, 1.0 / 3.0);
-        x_h = zeta;
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_m = pow(1.0 - alpha_m * zeta, 0.25);
-        x_h = pow(1.0 - alpha_h * zeta, 0.25); 
-
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template __device__ void get_psi(float &psi_m, float &psi_h,
-    const float zeta,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_psi(double &psi_m, double &psi_h,
-    const double zeta,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__device__ void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
-    const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const int maxiters)
-{
-    T psi_m, psi_h, psi0_m, psi0_h, Linv;
-    const T gamma = 0.61;
-
-    Udyn = kappa * U / log(z / z0_m);
-    Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
-    Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
-    zeta = 0.0;
-
-    // --- no wind
-    if (Udyn < 1e-5) 
-        return;
-
-    Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-    zeta = z * Linv;
-
-    // --- near neutral case
-    if (Linv < 1e-5) 
-        return;
-
-    for (int i = 0; i < maxiters; i++)
-    {
-        get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-        
-        get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, 
-        alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-
-        Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
-        Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-        Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-
-        if (Udyn < 1e-5) 
-            break;
-
-        Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-        zeta = z * Linv;
-    }
-}
-
-template __device__ void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
-    const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const int maxiters);
-template __device__ void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
-    const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const int maxiters);
-
-template<typename T>
-__device__ void get_phi(T &phi_m, T &phi_h,
-    const T zeta, 
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    if (zeta >= 0.0) 
-    {
-        phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
-        phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
-    }
-    else
-    {
-        phi_m = pow(1.0 - alpha_m * zeta, -0.25);
-        phi_h = pow(1.0 - alpha_h * zeta, -0.5);
-    }
-}
-
-template __device__ void get_phi(float &phi_m, float &phi_h,
-    const float zeta, 
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template __device__ void get_phi(double &phi_m, double &phi_h,
-    const double zeta, 
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-__global__ void kernel_compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size)
-{
-    const int index = blockIdx.x * blockDim.x + threadIdx.x;
-
-    T h, U, dT, Tsemi, dQ, z0_m;
-    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
-    zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
-    Km, Pr_t_inv, Cm, Ct;
-
-    const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
-    const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
-
-    int surface_type;
-
-    if(index < grid_size)
-    {
-        U = U_[index];
-        Tsemi = Tsemi_[index];
-        dT = dT_[index];
-        dQ = dQ_[index];
-        h = h_[index];
-        z0_m = in_z0_m_[index];
-
-        if (z0_m < 0.0) surface_type = 0;
-        else            surface_type = 1;
-
-        if (surface_type == 0) 
-        {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
-            h0_m = h / z0_m;
-        }
-        if (surface_type == 1) 
-        {
-            h0_m = h / z0_m;
-            u_dyn0 = U * kappa / log(h0_m);
-        }
-
-        Re = u_dyn0 * z0_m / nu_air;
-        get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
-
-        // --- define relative height [thermal]
-        h0_t = h / z0_t;
-
-        // --- define Ri-bulk
-        Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
-
-        // --- get the fluxes
-        // ----------------------------------------------------------------------------
-        get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
-        // ----------------------------------------------------------------------------
-
-        get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
-        // ----------------------------------------------------------------------------
-
-        // --- define transfer coeff. (momentum) & (heat)
-        Cm = 0.0;
-        if (U > 0.0)
-            Cm = Udyn / U;
-        Ct = 0.0;
-        if (fabs(dT) > 0.0) 
-            Ct = Tdyn / dT;
-
-        // --- define eddy viscosity & inverse Prandtl number
-        Km = kappa * Cm * U * h / phi_m;
-        Pr_t_inv = phi_m / phi_h;
-
-        zeta_[index]         = zeta;
-        Rib_[index]          = Rib;
-        Re_[index]           = Re;
-        B_[index]            = B;
-        z0_m_[index]         = z0_m;
-        z0_t_[index]         = z0_t;
-        Rib_conv_lim_[index] = 0.0;
-        Cm_[index]           = Cm;
-        Ct_[index]           = Ct;
-        Km_[index]           = Km;
-        Pr_t_inv_[index]     = Pr_t_inv;
-    }
-}
-
-template __global__ void kernel_compute_flux_sheba(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
-    const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h, 
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough,
-    const float B_max_land, const float B_max_ocean, const float B_max_lake,
-    const float gamma_c, const float Re_visc_min,
-    const float Pr_m, const float nu_air, const float g, 
-    const int maxiters_charnock,
-    const int grid_size);
-template __global__ void kernel_compute_flux_sheba(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
-    const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h, 
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough,
-    const double B_max_land, const double B_max_ocean, const double B_max_lake,
-    const double gamma_c, const double Re_visc_min,
-    const double Pr_m, const double nu_air, const double g, 
-    const int maxiters_charnock,
-    const int grid_size);
-
-template<typename T>
-void compute_flux_sheba_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size)
-{
-    const int BlockCount = int(ceil(float(grid_size) / 512.0));
-    dim3 cuBlock = dim3(512, 1, 1);
-	dim3 cuGrid = dim3(BlockCount, 1, 1);
-
-    kernel_compute_flux_sheba<<<cuGrid, cuBlock>>>(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
-    U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
-    kappa, Pr_t_0_inv, 
-    alpha_m, alpha_h, 
-    a_m, a_h, 
-    b_m, b_h,
-    c_h,
-    Re_rough_min, 
-    B1_rough, B2_rough,
-    B_max_land, B_max_ocean, B_max_lake,
-    gamma_c, Re_visc_min,
-    Pr_m, nu_air, g, 
-    maxiters_charnock, 
-    grid_size);
-    gpuErrchk( cudaPeekAtLastError() );
-}
-
-template void compute_flux_sheba_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
-    const float *U_, const float *dT_, const float *Tsemi_, const float *dQ_, const float *h_, const float *in_z0_m_,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h, 
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough,
-    const float B_max_land, const float B_max_ocean, const float B_max_lake,
-    const float gamma_c, const float Re_visc_min,
-    const float Pr_m, const float nu_air, const float g, 
-    const int maxiters_charnock,
-    const int grid_size);
-template void compute_flux_sheba_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
-    const double *U_, const double *dT_, const double *Tsemi_, const double *dQ_, const double *h_, const double *in_z0_m_,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h, 
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough,
-    const double B_max_land, const double B_max_ocean, const double B_max_lake,
-    const double gamma_c, const double Re_visc_min,
-    const double Pr_m, const double nu_air, const double g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/srcCU/sfx_esm.cu b/srcCU/sfx_esm.cu
index c2d7aef58647901e46f277a699ce3dea8573a478..f8028d63a82ee44d920518671ce728799a2030e6 100644
--- a/srcCU/sfx_esm.cu
+++ b/srcCU/sfx_esm.cu
@@ -1,10 +1,10 @@
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 
-#include "../includeCXX/sfx_esm.h"
-#include "../includeCU/sfx_esm_compute_subfunc.cuh"
-#include "../includeCU/sfx_surface.cuh"
-#include "../includeCU/sfx_memory_processing.cuh"
+#include "sfx_esm.h"
+#include "sfx_model_compute_subfunc.cuh"
+#include "sfx_surface.cuh"
+#include "sfx_memory_processing.cuh"
 
 namespace sfx_kernel
 {
@@ -46,7 +46,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
 
         if (surface_type == surface_param.surface_ocean)
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, model_param, surface_param, numerics);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
@@ -77,7 +77,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
         }
         else if (Rib < Rib_conv_lim) 
         {
-            get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics);
+            get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics.maxiters_convection);
 
             fval = pow(zeta_conv_lim / zeta, 1.0/3.0);
             phi_m = fval / f_m_conv_lim;
@@ -92,7 +92,7 @@ __global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
         }
         else
         {
-            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics);
+            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics.maxiters_convection);
             
             phi_m = pow(1.0 - model_param.alpha_m * zeta, -0.25);
             phi_h = 1.0 / (model_param.Pr_t_0_inv * sqrt(1.0 - model_param.alpha_h_fix * zeta));
diff --git a/srcCU/sfx_memory_processing.cu b/srcCU/sfx_memory_processing.cu
index 7dcc6cb5aedd07261cbe7d38ae1dcb5630859e73..8d622c1962312c9bb57f59832b099ed103c22e66 100644
--- a/srcCU/sfx_memory_processing.cu
+++ b/srcCU/sfx_memory_processing.cu
@@ -1,4 +1,4 @@
-#include "../includeCU/sfx_memory_processing.cuh"
+#include "sfx_memory_processing.cuh"
 #include <cuda.h>
 #include <cuda_runtime_api.h>
 
diff --git a/srcCU/sfx_sheba.cu b/srcCU/sfx_sheba.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2161b18798a531a8c81135c9a7e4c4058ff8bb45
--- /dev/null
+++ b/srcCU/sfx_sheba.cu
@@ -0,0 +1,144 @@
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+#include "sfx_sheba.h"
+#include "sfx_model_compute_subfunc.cuh"
+#include "sfx_surface.cuh"
+#include "sfx_memory_processing.cuh"
+
+namespace sfx_kernel
+{
+    template<typename T>
+    __global__ void compute_flux(sfxDataVecTypeC sfx,
+        meteoDataVecTypeC meteo,
+        const sfx_sheba_param model_param, 
+        const sfx_surface_param surface_param,
+        const sfx_sheba_numericsTypeC numerics,
+        const sfx_phys_constants phys_constants,
+        const int grid_size);
+}
+
+template<typename T>
+__global__ void sfx_kernel::compute_flux(sfxDataVecTypeC sfx,
+    meteoDataVecTypeC meteo,
+    const sfx_sheba_param model_param, 
+    const sfx_surface_param surface_param,
+    const sfx_sheba_numericsTypeC numerics,
+    const sfx_phys_constants phys_constants,
+    const int grid_size)
+{
+    const int index = blockIdx.x * blockDim.x + threadIdx.x;
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
+    zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
+    Km, Pr_t_inv, Cm, Ct;
+    int surface_type;
+
+    if(index < grid_size)
+    {
+        U = meteo.U[index];
+        Tsemi = meteo.Tsemi[index];
+        dT = meteo.dT[index];
+        dQ = meteo.dQ[index];
+        h = meteo.h[index];
+        z0_m = meteo.z0_m[index];
+
+        surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land;
+
+        if (surface_type == surface_param.surface_ocean) 
+        {
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
+            h0_m = h / z0_m;
+        }
+        if (surface_type == surface_param.surface_land) 
+        {
+            h0_m = h / z0_m;
+            u_dyn0 = U * model_param.kappa / log(h0_m);
+        }
+
+        Re = u_dyn0 * z0_m / phys_constants.nu_air;
+        get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type);
+
+        // --- define relative height [thermal]
+        h0_t = h / z0_t;
+
+        // --- define Ri-bulk
+        Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        // --- get the fluxes
+        // ----------------------------------------------------------------------------
+        get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, 
+            U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10);
+        // ----------------------------------------------------------------------------
+
+        get_phi(phi_m, phi_h, zeta, model_param);
+        // ----------------------------------------------------------------------------
+
+        // --- define transfer coeff. (momentum) & (heat)
+        Cm = 0.0;
+        if (U > 0.0)
+            Cm = Udyn / U;
+        Ct = 0.0;
+        if (fabs(dT) > 0.0) 
+            Ct = Tdyn / dT;
+
+        // --- define eddy viscosity & inverse Prandtl number
+        Km = model_param.kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        sfx.zeta[index]         = zeta;
+        sfx.Rib[index]          = Rib;
+        sfx.Re[index]           = Re;
+        sfx.B[index]            = B;
+        sfx.z0_m[index]         = z0_m;
+        sfx.z0_t[index]         = z0_t;
+        sfx.Rib_conv_lim[index] = T(0.0);
+        sfx.Cm[index]           = Cm;
+        sfx.Ct[index]           = Ct;
+        sfx.Km[index]           = Km;
+        sfx.Pr_t_inv[index]     = Pr_t_inv;
+    }
+}
+
+template<typename T, MemType memIn, MemType memOut >
+void FluxSheba<T, memIn, memOut, MemType::GPU>::compute_flux()
+{
+    const int BlockCount = int(ceil(float(grid_size) / 1024.0));
+    dim3 cuBlock = dim3(1024, 1, 1);
+	dim3 cuGrid = dim3(BlockCount, 1, 1);
+
+    sfx_kernel::compute_flux<T><<<cuGrid, cuBlock>>>(sfx, meteo, model_param, 
+                                                    surface_param, numerics, phys_constants, grid_size);
+
+    if(MemType::GPU != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
+        memproc::memcopy<memOut, MemType::GPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
+    }
+}
+
+template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
+template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
+template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
+template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
+template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
+template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
+template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
+
+template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
+template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
+template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>;
+template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::GPU>;
+template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>;
+template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>;
+template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>;
\ No newline at end of file
diff --git a/srcCXX/model_base.cpp b/srcCXX/model_base.cpp
index ca1bcdb48c8633d6b6e6e21f37d7fab9d5a0c1b4..a72d47d5973035b4a7dd639c004f566802273a8b 100644
--- a/srcCXX/model_base.cpp
+++ b/srcCXX/model_base.cpp
@@ -1,9 +1,9 @@
-#include "../includeCXX/model_base.h"
+#include "model_base.h"
 
 #ifdef INCLUDE_CUDA
-    #include "../includeCU/sfx_memory_processing.cuh"
+    #include "sfx_memory_processing.cuh"
 #endif
-#include "../includeCXX/sfx_memory_processing.h"
+#include "sfx_memory_processing.h"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
 ModelBase<T, memIn, memOut, RunMem>::ModelBase(sfxDataVecTypeC* sfx_in,
diff --git a/srcCXX/sfx_call_class_func.cpp b/srcCXX/sfx_call_class_func.cpp
index 1cc665d1c8da5d3ae8c5f89e14c35dfb946811cc..9659c17b691ecb848a596bbf94de4accd181506f 100644
--- a/srcCXX/sfx_call_class_func.cpp
+++ b/srcCXX/sfx_call_class_func.cpp
@@ -1,8 +1,8 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "../includeCXX/sfx_call_class_func.h"
-#include "../includeCXX/sfx_esm.h"
-#include "../includeCXX/sfx_sheba.h"
+#include "sfx_call_class_func.h"
+#include "sfx_esm.h"
+#include "sfx_sheba.h"
 #include <vector>
 
 // -------------------------------------------------------------------------- //
diff --git a/srcCXX/sfx_compute_sheba.cpp b/srcCXX/sfx_compute_sheba.cpp
deleted file mode 100644
index 167fcbb65fad8863ca7837721e61ac23ff82c747..0000000000000000000000000000000000000000
--- a/srcCXX/sfx_compute_sheba.cpp
+++ /dev/null
@@ -1,325 +0,0 @@
-#include <cmath>
-#include <iostream>
-#include "../includeCXX/sfx_compute_sheba.h"
-// #include "../includeCXX/sfx_surface.h"
-
-template<typename T>
-void get_psi_mh(T &psi_m, T &psi_h,
-    const T zeta_m, const T zeta_h,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta_m >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        x_m = pow(1.0 + zeta_m, 1.0 / 3.0);
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-    }                                
-    else
-    {    x_m = pow(1.0 - alpha_m * zeta_m, 0.25);
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-    }
-
-    if (zeta_h >= 0.0)
-    {    
-        q_h = sqrt(c_h * c_h - 4.0);
-        x_h = zeta_h;
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_h = pow(1.0 - alpha_h * zeta_h, 0.25);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template void get_psi_mh(float &psi_m, float &psi_h,
-    const float zeta_m, const float zeta_h,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template void get_psi_mh(double &psi_m, double &psi_h,
-    const double zeta_m, const double zeta_h,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-void get_psi(T &psi_m, T &psi_h,
-    const T zeta,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    T x_m, x_h;
-    T q_m, q_h;
-
-    if (zeta >= 0.0) 
-    {
-        q_m = pow((1.0 - b_m) / b_m, 1.0 / 3.0);
-        q_h = sqrt(c_h * c_h - 4.0);
-
-        x_m = pow(1.0 + zeta, 1.0 / 3.0);
-        x_h = zeta;
-
-        psi_m = -3.0 * (a_m / b_m) * (x_m - 1.0) + 0.5 * (a_m / b_m) * q_m * (2.0 * log((x_m + q_m) / (1.0 + q_m)) - log((x_m * x_m - x_m * q_m + q_m * q_m) / (1.0 - q_m + q_m * q_m)) + 2.0 * sqrt(3.0) * (atan((2.0 * x_m - q_m) / (sqrt(3.0) * q_m)) - atan((2.0 - q_m) / (sqrt(3.0) * q_m))));
-
-        psi_h = -0.5 * b_h * log(1.0 + c_h * x_h + x_h * x_h) + ((-a_h / q_h) + ((b_h * c_h) / (2.0 * q_h))) * (log((2.0 * x_h + c_h - q_h) / (2.0 * x_h + c_h + q_h)) - log((c_h - q_h) / (c_h + q_h)));
-    }
-    else
-    {
-        x_m = pow(1.0 - alpha_m * zeta, 0.25);
-        x_h = pow(1.0 - alpha_h * zeta, 0.25); 
-
-        psi_m = (4.0 * atan(1.0) / 2.0) + 2.0 * log(0.5 * (1.0 + x_m)) + log(0.5 * (1.0 + x_m * x_m)) - 2.0 * atan(x_m);
-        psi_h = 2.0 * log(0.5 * (1.0 + x_h * x_h));
-    }
-}
-
-template void get_psi(float &psi_m, float &psi_h,
-    const float zeta,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template void get_psi(double &psi_m, double &psi_h,
-    const double zeta,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-void get_dynamic_scales(T &Udyn, T &Tdyn, T &Qdyn, T &zeta,
-    const T U, const T Tsemi, const T dT, const T dQ, const T z, const T z0_m, const T z0_t, const T beta,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const int maxiters)
-{
-    T psi_m, psi_h, psi0_m, psi0_h, Linv;
-    const T gamma = 0.61;
-
-    Udyn = kappa * U / log(z / z0_m);
-    Tdyn = kappa * dT * Pr_t_0_inv / log(z / z0_t);
-    Qdyn = kappa * dQ * Pr_t_0_inv / log(z / z0_t);
-    zeta = 0.0;
-
-    // --- no wind
-    if (Udyn < 1e-5) 
-        return;
-
-    Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-    zeta = z * Linv;
-
-    // --- near neutral case
-    if (Linv < 1e-5) 
-        return;
-
-    for (int i = 0; i < maxiters; i++)
-    {
-        get_psi(psi_m, psi_h, zeta, alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-        
-        get_psi_mh(psi0_m, psi0_h, z0_m * Linv, z0_t * Linv, 
-        alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h);
-
-        Udyn = kappa * U / (log(z / z0_m) - (psi_m - psi0_m));
-        Tdyn = kappa * dT * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-        Qdyn = kappa * dQ * Pr_t_0_inv / (log(z / z0_t) - (psi_h - psi0_h));
-
-        if (Udyn < 1e-5) 
-            break;
-
-        Linv = kappa * beta * (Tdyn + gamma * Qdyn * Tsemi) / (Udyn * Udyn);
-        zeta = z * Linv;
-    }
-}
-
-template void get_dynamic_scales(float &Udyn, float &Tdyn, float &Qdyn, float & zeta,
-    const float U, const float Tsemi, const float dT, const float dQ, const float z, const float z0_m, const float z0_t, const float beta,
-    const float kappa, const float Pr_t_0_inv,
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const int maxiters);
-template void get_dynamic_scales(double &Udyn, double &Tdyn, double &Qdyn, double & zeta,
-    const double U, const double Tsemi, const double dT, const double dQ, const double z, const double z0_m, const double z0_t, const double beta,
-    const double kappa, const double Pr_t_0_inv,
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const int maxiters);
-
-template<typename T>
-void get_phi(T &phi_m, T &phi_h,
-    const T zeta, 
-    const T alpha_m, const T alpha_h,
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h)
-{
-    if (zeta >= 0.0) 
-    {
-        phi_m = 1.0 + (a_m * zeta * pow(1.0 + zeta, 1.0 / 3.0) ) / (1.0 + b_m * zeta);
-        phi_h = 1.0 + (a_h * zeta + b_h * zeta * zeta) / (1.0 + c_h * zeta + zeta * zeta);
-    }
-    else
-    {
-        phi_m = pow(1.0 - alpha_m * zeta, -0.25);
-        phi_h = pow(1.0 - alpha_h * zeta, -0.5);
-    }
-}
-
-template void get_phi(float &phi_m, float &phi_h,
-    const float zeta, 
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h);
-template void get_phi(double &phi_m, double &phi_h,
-    const double zeta, 
-    const double alpha_m, const double alpha_h,
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h);
-
-template<typename T>
-void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
-    const T kappa, const T Pr_t_0_inv,
-    const T alpha_m, const T alpha_h, 
-    const T a_m, const T a_h, 
-    const T b_m, const T b_h,
-    const T c_h,
-    const T Re_rough_min, 
-    const T B1_rough, const T B2_rough,
-    const T B_max_land, const T B_max_ocean, const T B_max_lake,
-    const T gamma_c, const T Re_visc_min,
-    const T Pr_m, const T nu_air, const T g, 
-    const int maxiters_charnock,
-    const int grid_size)
-{
-    // T h, U, dT, Tsemi, dQ, z0_m;
-    // T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
-    // zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
-    // Km, Pr_t_inv, Cm, Ct;
-
-    // const T B3_rough = kappa * Pr_m, B4_rough =(0.14 * (pow(30.0, B2_rough))) * (pow(Pr_m, 0.8));
-    // const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
-
-    // int surface_type;
-
-    // for (int step = 0; step < grid_size; step++)
-    // {
-    //     U = U_[step];
-    //     Tsemi = Tsemi_[step];
-    //     dT = dT_[step];
-    //     dQ = dQ_[step];
-    //     h = h_[step];
-    //     z0_m = in_z0_m_[step];
-
-    //     if (z0_m < 0.0) surface_type = 0;
-    //     else            surface_type = 1;
-
-    //     if (surface_type == 0) 
-    //     {
-    //         get_charnock_roughness(z0_m, u_dyn0, h, U, kappa, h_charnock, c1_charnock, c2_charnock, maxiters_charnock);
-    //         h0_m = h / z0_m;
-    //     }
-    //     if (surface_type == 1) 
-    //     {
-    //         h0_m = h / z0_m;
-    //         u_dyn0 = U * kappa / log(h0_m);
-    //     }
-
-    //     Re = u_dyn0 * z0_m / nu_air;
-    //     get_thermal_roughness(z0_t, B, z0_m, Re, Re_rough_min, B1_rough, B2_rough, B3_rough, B4_rough, B_max_ocean, B_max_lake, B_max_land, surface_type);
-
-    //     // --- define relative height [thermal]
-    //     h0_t = h / z0_t;
-
-    //     // --- define Ri-bulk
-    //     Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
-
-    //     // --- get the fluxes
-    //     // ----------------------------------------------------------------------------
-    //     get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), kappa, Pr_t_0_inv, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h, 10);
-    //     // ----------------------------------------------------------------------------
-
-    //     get_phi(phi_m, phi_h, zeta, alpha_m, alpha_h, a_m, a_h, b_m, b_h, c_h);
-    //     // ----------------------------------------------------------------------------
-
-    //     // --- define transfer coeff. (momentum) & (heat)
-    //     Cm = 0.0;
-    //     if (U > 0.0)
-    //         Cm = Udyn / U;
-    //     Ct = 0.0;
-    //     if (fabs(dT) > 0.0) 
-    //         Ct = Tdyn / dT;
-
-    //     // --- define eddy viscosity & inverse Prandtl number
-    //     Km = kappa * Cm * U * h / phi_m;
-    //     Pr_t_inv = phi_m / phi_h;
-
-    //     zeta_[step]         = zeta;
-    //     Rib_[step]          = Rib;
-    //     Re_[step]           = Re;
-    //     B_[step]            = B;
-    //     z0_m_[step]         = z0_m;
-    //     z0_t_[step]         = z0_t;
-    //     Rib_conv_lim_[step] = 0.0;
-    //     Cm_[step]           = Cm;
-    //     Ct_[step]           = Ct;
-    //     Km_[step]           = Km;
-    //     Pr_t_inv_[step]     = Pr_t_inv;
-    // }
-}
-
-template void compute_flux_sheba_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
-    const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
-    const float kappa, const float Pr_t_0_inv, 
-    const float alpha_m, const float alpha_h,
-    const float a_m, const float a_h, 
-    const float b_m, const float b_h,
-    const float c_h,
-    const float Re_rough_min, 
-    const float B1_rough, const float B2_rough,
-    const float B_max_land, const float B_max_ocean, const float B_max_lake,
-    const float gamma_c, const float Re_visc_min,
-    const float Pr_m, const float nu_air, const float g, 
-    const int maxiters_charnock, 
-    const int grid_size);
-template void compute_flux_sheba_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
-    const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, 
-    const double kappa, const double Pr_t_0_inv, 
-    const double alpha_m, const double alpha_h, 
-    const double a_m, const double a_h, 
-    const double b_m, const double b_h,
-    const double c_h,
-    const double Re_rough_min, 
-    const double B1_rough, const double B2_rough,
-    const double B_max_land, const double B_max_ocean, const double B_max_lake,
-    const double gamma_c, const double Re_visc_min,
-    const double Pr_m, const double nu_air, const double g, 
-    const int maxiters_charnock,
-    const int grid_size);
\ No newline at end of file
diff --git a/srcCXX/sfx_esm.cpp b/srcCXX/sfx_esm.cpp
index ad77a702428a957d5c18d27f3e227d2f35e0b72b..c324e80f1fa91fd1fa26e7c5acfaa7dbd1af1713 100644
--- a/srcCXX/sfx_esm.cpp
+++ b/srcCXX/sfx_esm.cpp
@@ -1,14 +1,14 @@
 #include <iostream>
 #include <cmath>
 
-#include "../includeCXX/sfx_esm.h"
+#include "sfx_esm.h"
 #ifdef INCLUDE_CUDA
-    #include "../includeCU/sfx_memory_processing.cuh"
+    #include "sfx_memory_processing.cuh"
 #endif
 
-#include "../includeCXX/sfx_memory_processing.h"
-#include "../includeCU/sfx_surface.cuh"
-#include "../includeCU/sfx_esm_compute_subfunc.cuh"
+#include "sfx_memory_processing.h"
+#include "sfx_surface.cuh"
+#include "sfx_model_compute_subfunc.cuh"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
 FluxEsmBase<T, memIn, memOut, RunMem>::FluxEsmBase(sfxDataVecTypeC* sfx_in,
@@ -49,7 +49,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux()
 
         if (surface_type == surface_param.surface_ocean)
         {
-            get_charnock_roughness(z0_m, u_dyn0, h, U, model_param, surface_param, numerics);
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
             h0_m = h / z0_m;
         }
         if (surface_type == surface_param.surface_land) 
@@ -80,7 +80,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux()
         }
         else if (Rib < Rib_conv_lim) 
         {
-            get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics);
+            get_psi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, model_param, numerics.maxiters_convection);
 
             fval = pow(zeta_conv_lim / zeta, 1.0/3.0);
             phi_m = fval / f_m_conv_lim;
@@ -95,7 +95,7 @@ void FluxEsm<T, memIn, memOut, MemType::CPU>::compute_flux()
         }
         else
         {
-            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics);
+            get_psi_semi_convection(psi_m, psi_h, zeta, Rib, h0_m, h0_t, B, model_param, numerics.maxiters_convection);
             
             phi_m = pow(1.0 - model_param.alpha_m * zeta, -0.25);
             phi_h = 1.0 / (model_param.Pr_t_0_inv * sqrt(1.0 - model_param.alpha_h_fix * zeta));
diff --git a/srcCXX/sfx_memory_processing.cpp b/srcCXX/sfx_memory_processing.cpp
index 1d5805a4716eb7c1068a5665c432a8caa95441ba..6b13745da36f58e8cf909206678e6d3583a11da4 100644
--- a/srcCXX/sfx_memory_processing.cpp
+++ b/srcCXX/sfx_memory_processing.cpp
@@ -1,4 +1,4 @@
-#include "../includeCXX/sfx_memory_processing.h"
+#include "sfx_memory_processing.h"
 #include <cstdlib>
 #include <cstring>
 
diff --git a/srcCXX/sfx_sheba.cpp b/srcCXX/sfx_sheba.cpp
index af52cc2c78fcc2578815b2b9a75e02a96ff04c7e..abd48c71e3c01b939e5b517525cb4963b8a441c4 100644
--- a/srcCXX/sfx_sheba.cpp
+++ b/srcCXX/sfx_sheba.cpp
@@ -1,281 +1,136 @@
 #include <iostream>
+#include <cmath>
 
-#include "../includeCXX/sfx_sheba.h"
-#include "../includeCXX/sfx_compute_sheba.h"
+#include "sfx_sheba.h"
 #ifdef INCLUDE_CUDA
-    #include "../includeCU/sfx_compute_sheba.cuh"
-    #include "../includeCU/sfx_memory_processing.cuh"
+    #include "sfx_memory_processing.cuh"
 #endif
 
-#include "../includeCXX/sfx_memory_processing.h"
+#include "sfx_memory_processing.h"
+#include "sfx_surface.cuh"
+#include "sfx_model_compute_subfunc.cuh"
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-FluxSheba<T, memIn, memOut, RunMem>::FluxSheba()
+FluxShebaBase<T, memIn, memOut, RunMem>::FluxShebaBase(sfxDataVecTypeC* sfx_in,
+                meteoDataVecTypeC* meteo_in,
+                const sfx_sheba_param model_param_in, 
+                const sfx_surface_param surface_param_in,
+                const sfx_sheba_numericsTypeC numerics_in,
+                const sfx_phys_constants phys_constants_in,
+                const int grid_size_in) : ModelBase<T, memIn, memOut, RunMem>(sfx_in, meteo_in, grid_size_in)
 {
-    kappa = 0, Pr_t_0_inv = 0, 
-    alpha_m = 0, alpha_h = 0, 
-    a_m = 0, a_h = 0, 
-    b_m = 0, b_h = 0, 
-    c_h = 0,
-    Re_rough_min = 0, 
-    B1_rough = 0, B2_rough = 0, 
-    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
-    gamma_c = 0,  
-    Re_visc_min = 0,
-    Pr_m = 0, nu_air = 0, g = 0;
-
-    grid_size = 0;
-
-    ifAllocated = false;
-    allocated_size = 0;
+    surface_param = surface_param_in;
+    phys_constants = phys_constants_in;
+    model_param = model_param_in;
+    numerics = numerics_in;
 }
 
 template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_,
-    const T alpha_m_, const T alpha_h_, 
-    const float a_m_, const float a_h_, 
-    const float b_m_, const float b_h_,
-    const float c_h_,
-    const T Re_rough_min_, 
-    const T B1_rough_, const T B2_rough_,
-    const T B_max_land_, const T B_max_ocean_, const T B_max_lake_,
-    const T gamma_c_, const T Re_visc_min_,
-    const T Pr_m_, const T nu_air_, const T g_)
-{
-    kappa = kappa_, Pr_t_0_inv = Pr_t_0_inv_, 
-    alpha_m = alpha_m_, alpha_h = alpha_h_, 
-    a_m = a_m_, a_h = a_h_, 
-    b_m = b_m_, b_h = b_h_, 
-    c_h = c_h_,
-    Re_rough_min = Re_rough_min_, B_max_lake = B_max_lake_,
-    B1_rough = B1_rough_, B2_rough = B2_rough_, 
-    B_max_land = B_max_land_, B_max_ocean = B_max_ocean_, B_max_lake = B_max_lake_,
-    gamma_c = gamma_c_,  
-    Re_visc_min = Re_visc_min_,
-    Pr_m = Pr_m_, nu_air = nu_air_, g = g_;
-
-    grid_size = grid_size_;
-
-    if(RunMem != memIn)
-    {
-        const size_t new_size = grid_size * sizeof(T);
+FluxShebaBase<T, memIn, memOut, RunMem>::~FluxShebaBase() {}
 
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(U), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(dT), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Tsemi), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(dQ), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size);
+template<typename T, MemType memIn, MemType memOut >
+void FluxSheba<T, memIn, memOut, MemType::CPU>::compute_flux()
+{
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
+    zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
+    Km, Pr_t_inv, Cm, Ct;
+    int surface_type;
 
-        ifAllocated = true;
-    }
-    if(RunMem != memOut)
+    for (int step = 0; step < grid_size; step++)
     {
-        const size_t new_size = grid_size * sizeof(T);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Rib), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Re), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Rib_conv_lim), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(z0_m), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(z0_t), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(B), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Cm), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Ct), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Km), allocated_size, new_size);
-        allocated_size = 0;
-        memproc::realloc<RunMem>((void *&)(Pr_t_inv), allocated_size, new_size);
-
-        // T* del = new T[grid_size];
-        // for (int i = 0; i < grid_size; i++)
-        //     del[i] = i;   
-
-        // memproc::memcopy<RunMem, memOut>(Cm, del, new_size);   
-
-        // for (int i = 0; i < grid_size; i++)
-        //     del[i] = 0;   
+        U = meteo.U[step];
+        Tsemi = meteo.Tsemi[step];
+        dT = meteo.dT[step];
+        dQ = meteo.dQ[step];
+        h = meteo.h[step];
+        z0_m = meteo.z0_m[step];
 
-        // printf("before\n");
-        // for (int i = 0; i < 10; i++)
-        //     printf("%f\n", del[i]);
+        surface_type = z0_m < 0.0 ? surface_param.surface_ocean : surface_param.surface_land;
 
-        // memproc::memcopy<memOut, RunMem>(del, Cm, new_size);
-
-        // for (int i = 0; i < grid_size; i++)
-        //     printf("%f\n", del[i]);
-
-        // delete[] del;  
-
-        ifAllocated = true;
-    }
-}
-
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-FluxSheba<T, memIn, memOut, RunMem>::~FluxSheba()
-{
-    kappa = 0, Pr_t_0_inv = 0, 
-    alpha_m = 0, alpha_h = 0, 
-    a_m = 0, a_h = 0, 
-    b_m = 0, b_h = 0, 
-    c_h = 0,
-    Re_rough_min = 0, 
-    B1_rough = 0, B2_rough = 0, 
-    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
-    gamma_c = 0,  
-    Re_visc_min = 0,
-    Pr_m = 0, nu_air = 0, g = 0;
-
-    grid_size = 0;
-
-    if(ifAllocated == true)
-    {
-        if(RunMem != memIn)
-        {    
-            memproc::dealloc<RunMem>((void*&)U);
-            memproc::dealloc<RunMem>((void*&)dT);
-            memproc::dealloc<RunMem>((void*&)Tsemi);
-            memproc::dealloc<RunMem>((void*&)dQ);
-            memproc::dealloc<RunMem>((void*&)h);
+        if (surface_type == surface_param.surface_ocean) 
+        {
+            get_charnock_roughness(z0_m, u_dyn0, U, h, surface_param, numerics.maxiters_charnock);
+            h0_m = h / z0_m;
         }
-        if(RunMem != memOut)
+        if (surface_type == surface_param.surface_land) 
         {
-            memproc::dealloc<RunMem>((void*&)zeta);
-            memproc::dealloc<RunMem>((void*&)Rib);
-            memproc::dealloc<RunMem>((void*&)Re);
-            memproc::dealloc<RunMem>((void*&)Rib_conv_lim);
-            memproc::dealloc<RunMem>((void*&)z0_m);
-            memproc::dealloc<RunMem>((void*&)z0_t);
-            memproc::dealloc<RunMem>((void*&)B);
-            memproc::dealloc<RunMem>((void*&)Cm);
-            memproc::dealloc<RunMem>((void*&)Ct);
-            memproc::dealloc<RunMem>((void*&)Km);
-            memproc::dealloc<RunMem>((void*&)Pr_t_inv);
+            h0_m = h / z0_m;
+            u_dyn0 = U * model_param.kappa / log(h0_m);
         }
 
-        ifAllocated = false;
-        allocated_size = 0;
+        Re = u_dyn0 * z0_m / phys_constants.nu_air;
+        get_thermal_roughness(z0_t, B, z0_m, Re, surface_param, surface_type);
+
+        // --- define relative height [thermal]
+        h0_t = h / z0_t;
+
+        // --- define Ri-bulk
+        Rib = (phys_constants.g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        // --- get the fluxes
+        // ----------------------------------------------------------------------------
+        get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, 
+            U, Tsemi, dT, dQ, h, z0_m, z0_t, (phys_constants.g / Tsemi), model_param, 10);
+        // ----------------------------------------------------------------------------
+
+        get_phi(phi_m, phi_h, zeta, model_param);
+        // ----------------------------------------------------------------------------
+
+        // --- define transfer coeff. (momentum) & (heat)
+        Cm = 0.0;
+        if (U > 0.0)
+            Cm = Udyn / U;
+        Ct = 0.0;
+        if (fabs(dT) > 0.0) 
+            Ct = Tdyn / dT;
+
+        // --- define eddy viscosity & inverse Prandtl number
+        Km = model_param.kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        sfx.zeta[step]         = zeta;
+        sfx.Rib[step]          = Rib;
+        sfx.Re[step]           = Re;
+        sfx.B[step]            = B;
+        sfx.z0_m[step]         = z0_m;
+        sfx.z0_t[step]         = z0_t;
+        sfx.Rib_conv_lim[step] = T(0.0);
+        sfx.Cm[step]           = Cm;
+        sfx.Ct[step]           = Ct;
+        sfx.Km[step]           = Km;
+        sfx.Pr_t_inv[step]     = Pr_t_inv;
     }
-}
-
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-void FluxSheba<T, memIn, memOut, RunMem>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_)
-{
-    if(RunMem == memIn)
-    {
-        U = U_;
-        dT = dT_;
-        Tsemi = Tsemi_;
-        dQ = dQ_;
-        h = h_;
-        in_z0_m = in_z0_m_;
-    }
-    else
-    {
-        const size_t new_size = grid_size * sizeof(T);
-
-        memproc::memcopy<RunMem, memIn>(U, U_, new_size);
-        memproc::memcopy<RunMem, memIn>(dT, dT_, new_size);
-        memproc::memcopy<RunMem, memIn>(Tsemi, Tsemi_, new_size);
-        memproc::memcopy<RunMem, memIn>(dQ, dQ_, new_size);
-        memproc::memcopy<RunMem, memIn>(h, h_, new_size);
-        memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size);
-    }
-
-    if(RunMem == memOut)
-    {
-        zeta = zeta_;
-        Rib = Rib_;
-        Re = Re_;
-        Rib_conv_lim = Rib_conv_lim_;
-        z0_m = z0_m_;
-        z0_t = z0_t_;
-        B = B_;
-        Cm = Cm_;
-        Ct = Ct_;
-        Km = Km_;
-        Pr_t_inv = Pr_t_inv_;
-    }
-}
-
-template<typename T, MemType memIn, MemType memOut, MemType RunMem >
-void FluxSheba<T, memIn, memOut, RunMem>::compute_flux_sheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
-    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
-    const int maxiters_charnock)
-{
-    set_data(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
-    U_, dT_, Tsemi_, dQ_, h_, in_z0_m_);
-
-    if(RunMem == MemType::CPU) 
-        compute_flux_sheba_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
-        U, dT, Tsemi, dQ, h, in_z0_m, 
-        kappa, Pr_t_0_inv, 
-        alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h,
-        Re_rough_min, 
-        B1_rough, B2_rough,
-        B_max_land, B_max_ocean, B_max_lake,
-        gamma_c, Re_visc_min,
-        Pr_m, nu_air, g, 
-        maxiters_charnock, 
-        grid_size);
 
-#ifdef INCLUDE_CUDA
-    else compute_flux_sheba_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
-        U, dT, Tsemi, dQ, h, in_z0_m, 
-        kappa, Pr_t_0_inv, 
-        alpha_m, alpha_h,
-        a_m, a_h, 
-        b_m, b_h,
-        c_h,
-        Re_rough_min, 
-        B1_rough, B2_rough,
-        B_max_land, B_max_ocean, B_max_lake,
-        gamma_c, Re_visc_min,
-        Pr_m, nu_air, g, 
-        maxiters_charnock, 
-        grid_size);
-#endif
-
-    if(RunMem != memOut)
+    if(MemType::CPU != memOut)
     {
         const size_t new_size = grid_size * sizeof(T);
-
-        memproc::memcopy<memOut, RunMem>(zeta_, zeta, new_size);
-        memproc::memcopy<memOut, RunMem>(Rib_, Rib, new_size);
-        memproc::memcopy<memOut, RunMem>(Re_, Re, new_size);
-        memproc::memcopy<memOut, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size);
-        memproc::memcopy<memOut, RunMem>(z0_m_, z0_m, new_size);
-        memproc::memcopy<memOut, RunMem>(z0_t_, z0_t, new_size);
-        memproc::memcopy<memOut, RunMem>(B_, B, new_size);
-        memproc::memcopy<memOut, RunMem>(Cm_, Cm, new_size);
-        memproc::memcopy<memOut, RunMem>(Ct_, Ct, new_size);
-        memproc::memcopy<memOut, RunMem>(Km_, Km, new_size);
-        memproc::memcopy<memOut, RunMem>(Pr_t_inv_, Pr_t_inv, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->zeta, (void*&)sfx.zeta, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib, (void*&)sfx.Rib, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Re, (void*&)sfx.Re, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->B, (void*&)sfx.B, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_m, (void*&)sfx.z0_m, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->z0_t, (void*&)sfx.z0_t, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Rib_conv_lim, (void*&)sfx.Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Cm, (void*&)sfx.Cm, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Ct, (void*&)sfx.Ct, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Km, (void*&)sfx.Km, new_size);
+        memproc::memcopy<memOut, MemType::CPU>((void*&)res_sfx->Pr_t_inv, (void*&)sfx.Pr_t_inv, new_size);
     }
 }
 
 template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU>;
-template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::CPU>;
+template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::CPU>;
 
 #ifdef INCLUDE_CUDA
+    template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::GPU>;
+    template class FluxShebaBase<float, MemType::GPU, MemType::GPU, MemType::CPU>;
+    template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::GPU>;
+    template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::GPU>;
+    template class FluxShebaBase<float, MemType::CPU, MemType::CPU, MemType::GPU>;
+    template class FluxShebaBase<float, MemType::CPU, MemType::GPU, MemType::CPU>;
+    template class FluxShebaBase<float, MemType::GPU, MemType::CPU, MemType::CPU>;
+
     template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
     template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
     template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>;
@@ -283,12 +138,4 @@ template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::CPU>;
     template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>;
     template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>;
     template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>;
-
-    template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::GPU>;
-    template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::CPU>;
-    template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::GPU>;
-    template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::GPU>;
-    template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::GPU>;
-    template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::CPU>;
-    template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::CPU>;
 #endif 
\ No newline at end of file
diff --git a/srcF/sfx_data.f90 b/srcF/sfx_data.f90
index 4d6ceaa686a6ba2f73fa41158c002b6013b5f05a..1cdc8ebfa22ed16266362de6c48577070900fad3 100644
--- a/srcF/sfx_data.f90
+++ b/srcF/sfx_data.f90
@@ -41,12 +41,12 @@ module sfx_data
     !> &details using arrays as input
     type, public :: meteoDataVecType
 #if defined(INCLUDE_CXX)
-        real, pointer :: h(:)       !< constant flux layer height [m]
-        real, pointer :: U(:)       !< abs(wind speed) at 'h' [m/s]
-        real, pointer :: dT(:)      !< difference between potential temperature at 'h' and at surface [K]
-        real, pointer :: Tsemi(:)   !< semi-sum of potential temperature at 'h' and at surface [K]
-        real, pointer :: dQ(:)      !< difference between humidity at 'h' and at surface [g/g]
-        real, pointer :: z0_m(:)    !< surface aerodynamic roughness (should be < 0 for water bodies surface)
+        real, pointer, contiguous :: h(:)       !< constant flux layer height [m]
+        real, pointer, contiguous :: U(:)       !< abs(wind speed) at 'h' [m/s]
+        real, pointer, contiguous :: dT(:)      !< difference between potential temperature at 'h' and at surface [K]
+        real, pointer, contiguous :: Tsemi(:)   !< semi-sum of potential temperature at 'h' and at surface [K]
+        real, pointer, contiguous :: dQ(:)      !< difference between humidity at 'h' and at surface [g/g]
+        real, pointer, contiguous :: z0_m(:)    !< surface aerodynamic roughness (should be < 0 for water bodies surface)
 #else
         real, allocatable :: h(:)       !< constant flux layer height [m]
         real, allocatable :: U(:)       !< abs(wind speed) at 'h' [m/s]
@@ -91,17 +91,17 @@ module sfx_data
     !> &details using arrays as output
     type, public :: sfxDataVecType
 #if defined(INCLUDE_CXX)
-        real, pointer :: zeta(:)            !< = z/L [n/d]
-        real, pointer :: Rib(:)             !< bulk Richardson number [n/d]
-        real, pointer :: Re(:)              !< Reynolds number [n/d]
-        real, pointer :: B(:)               !< = log(z0_m / z0_h) [n/d]
-        real, pointer :: z0_m(:)            !< aerodynamic roughness [m]
-        real, pointer :: z0_t(:)            !< thermal roughness [m]
-        real, pointer :: Rib_conv_lim(:)    !< Ri-bulk convection critical value [n/d]
-        real, pointer :: Cm(:)              !< transfer coefficient for momentum [n/d]
-        real, pointer :: Ct(:)              !< transfer coefficient for heat [n/d]
-        real, pointer :: Km(:)              !< eddy viscosity coeff. at h [m^2/s]
-        real, pointer :: Pr_t_inv(:)        !< inverse turbulent Prandtl number at h [n/d]
+        real, pointer, contiguous :: zeta(:)            !< = z/L [n/d]
+        real, pointer, contiguous :: Rib(:)             !< bulk Richardson number [n/d]
+        real, pointer, contiguous :: Re(:)              !< Reynolds number [n/d]
+        real, pointer, contiguous :: B(:)               !< = log(z0_m / z0_h) [n/d]
+        real, pointer, contiguous :: z0_m(:)            !< aerodynamic roughness [m]
+        real, pointer, contiguous :: z0_t(:)            !< thermal roughness [m]
+        real, pointer, contiguous :: Rib_conv_lim(:)    !< Ri-bulk convection critical value [n/d]
+        real, pointer, contiguous :: Cm(:)              !< transfer coefficient for momentum [n/d]
+        real, pointer, contiguous :: Ct(:)              !< transfer coefficient for heat [n/d]
+        real, pointer, contiguous :: Km(:)              !< eddy viscosity coeff. at h [m^2/s]
+        real, pointer, contiguous :: Pr_t_inv(:)        !< inverse turbulent Prandtl number at h [n/d]
 #else
         real, allocatable :: zeta(:)            !< = z/L [n/d]
         real, allocatable :: Rib(:)             !< bulk Richardson number [n/d]
@@ -137,6 +137,7 @@ module sfx_data
         integer(C_INT) :: surface_land
         integer(C_INT) :: surface_lake
 
+        real(C_FLOAT)  :: kappa;
         real(C_FLOAT)  :: gamma_c;
         real(C_FLOAT)  :: Re_visc_min;
         real(C_FLOAT)  :: h_charnock;
diff --git a/srcF/sfx_fc_wrapper.F90 b/srcF/sfx_fc_wrapper.F90
index de16ec388125675daba6c2719445c832623174d5..0e84bb3f56d8e6b0134a4e40ea53118d72ff13bc 100644
--- a/srcF/sfx_fc_wrapper.F90
+++ b/srcF/sfx_fc_wrapper.F90
@@ -32,30 +32,6 @@ module C_FUNC
 
     contains
 
-    subroutine set_c_struct_sfx_surface_param_values(surface_param)
-        use sfx_data
-        use sfx_surface
-        implicit none
-        type (sfx_surface_param), intent(inout) :: surface_param
-        surface_param%surface_ocean = surface_ocean
-        surface_param%surface_land = surface_land
-        surface_param%surface_lake = surface_lake
-
-        surface_param%gamma_c = gamma_c
-        surface_param%Re_visc_min = Re_visc_min
-        surface_param%h_charnock = h_charnock
-        surface_param%c1_charnock = c1_charnock
-        surface_param%c2_charnock = c2_charnock
-        surface_param%Re_rough_min = Re_rough_min
-        surface_param%B1_rough = B1_rough
-        surface_param%B2_rough = B2_rough
-        surface_param%B3_rough = B3_rough
-        surface_param%B4_rough = B4_rough
-        surface_param%B_max_lake = B_max_lake
-        surface_param%B_max_ocean = B_max_ocean
-        surface_param%B_max_land = B_max_land
-    end subroutine set_c_struct_sfx_surface_param_values
-
     subroutine set_c_struct_sfx_phys_constants_values(constants)
         use sfx_data
         use sfx_phys_const
diff --git a/srcF/sfx_sheba.f90 b/srcF/sfx_sheba.f90
index e0ae6868c86c8df86afa77b65135040de957bca7..ffa5ae399323d619259a9d845596fad262475432 100644
--- a/srcF/sfx_sheba.f90
+++ b/srcF/sfx_sheba.f90
@@ -90,7 +90,7 @@ contains
         call set_meteo_vec_c(meteo, meteo_c)
         call set_sfx_vec_c(sfx, sfx_c)
 
-        ! call get_surface_fluxes_sheba(c_loc(sfx_c), c_loc(meteo_c), model_param, surface_param, numerics_c, phys_constants, n)
+        call get_surface_fluxes_sheba(c_loc(sfx_c), c_loc(meteo_c), model_param, surface_param, numerics_c, phys_constants, n)
 
         deallocate(meteo_c)
         deallocate(sfx_c)
diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90
index d5e369902c2992bfec5620cf08e1e4ea95637fb9..79ebca981a43d1dc6b1bfe91a939f78aacc26b59 100644
--- a/srcF/sfx_surface.f90
+++ b/srcF/sfx_surface.f90
@@ -15,6 +15,9 @@ module sfx_surface
 
     ! public interface
     ! --------------------------------------------------------------------------------
+#if defined(INCLUDE_CXX)
+    public :: set_c_struct_sfx_surface_param_values
+#endif
     public :: get_charnock_roughness
     public :: get_thermal_roughness
     ! --------------------------------------------------------------------------------
@@ -104,6 +107,31 @@ contains
 
     end function
 
+#if defined(INCLUDE_CXX)
+    subroutine set_c_struct_sfx_surface_param_values(surface_param)
+        use sfx_data
+        implicit none
+        type (sfx_surface_param), intent(inout) :: surface_param
+        surface_param%surface_ocean = surface_ocean
+        surface_param%surface_land = surface_land
+        surface_param%surface_lake = surface_lake
+
+        surface_param%kappa = kappa
+        surface_param%gamma_c = gamma_c
+        surface_param%Re_visc_min = Re_visc_min
+        surface_param%h_charnock = h_charnock
+        surface_param%c1_charnock = c1_charnock
+        surface_param%c2_charnock = c2_charnock
+        surface_param%Re_rough_min = Re_rough_min
+        surface_param%B1_rough = B1_rough
+        surface_param%B2_rough = B2_rough
+        surface_param%B3_rough = B3_rough
+        surface_param%B4_rough = B4_rough
+        surface_param%B_max_lake = B_max_lake
+        surface_param%B_max_ocean = B_max_ocean
+        surface_param%B_max_land = B_max_land
+    end subroutine set_c_struct_sfx_surface_param_values
+#endif
     ! charnock roughness definition
     ! --------------------------------------------------------------------------------
     subroutine get_charnock_roughness(z0_m, u_dyn0, U, h, maxiters)