diff --git a/CMakeLists.txt b/CMakeLists.txt
index f38adb2ab6aac4454be4d06ce13764b9f0d132fb..6bf4dd386921337b7f0cafc5f27e31907ab41dae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,6 +55,10 @@ set(SOURCES_F
     srcF/sfx_main.f90
     srcF/sfx_phys_const.f90
     srcF/sfx_surface.f90
+    srcF/sfx_most.f90
+    srcF/sfx_most_param.f90
+    srcF/sfx_sheba.f90
+    srcF/sfx_sheba_param.f90
     srcF/sfx_fc_wrapper.F90
 )
 
diff --git a/includeCU/sfx_flux_compute_func.cuh b/includeCU/sfx_compute_esm.cuh
similarity index 81%
rename from includeCU/sfx_flux_compute_func.cuh
rename to includeCU/sfx_compute_esm.cuh
index 2628d5d4a44a1e5baedddc7a1913683d9af4c9c6..df2f94969a65182639b977541485ac4f044c7d2c 100644
--- a/includeCU/sfx_flux_compute_func.cuh
+++ b/includeCU/sfx_compute_esm.cuh
@@ -1,7 +1,7 @@
 #pragma once 
 
 template<typename T>
-void compute_flux_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
     const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
     const T alpha_m, const T alpha_h, const T alpha_h_fix, 
diff --git a/includeCXX/sfx_flux_compute_func.h b/includeCXX/sfx_compute_esm.h
similarity index 81%
rename from includeCXX/sfx_flux_compute_func.h
rename to includeCXX/sfx_compute_esm.h
index 2cfa0f408af3092c1da29b99e7891f3b8428205c..1d7564efa2c9acfe627f8bf75ee044192438b9fe 100644
--- a/includeCXX/sfx_flux_compute_func.h
+++ b/includeCXX/sfx_compute_esm.h
@@ -1,7 +1,7 @@
 #pragma once
 
 template<typename T>
-void compute_flux_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+void compute_flux_esm_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
     const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
     const T alpha_m, const T alpha_h, const T alpha_h_fix, 
diff --git a/includeCXX/sfx_compute_sheba.h b/includeCXX/sfx_compute_sheba.h
new file mode 100644
index 0000000000000000000000000000000000000000..92516a2746fa4c9c4116fb4a9b990dbee0c83667
--- /dev/null
+++ b/includeCXX/sfx_compute_sheba.h
@@ -0,0 +1,14 @@
+#pragma once
+
+template<typename T>
+void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
+    const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
\ No newline at end of file
diff --git a/includeCXX/sfx_flux.h b/includeCXX/sfx_esm.h
similarity index 92%
rename from includeCXX/sfx_flux.h
rename to includeCXX/sfx_esm.h
index 8dcb5f09a6e8d239c409701a181bd09e69ec89fe..5f47957ae5fa5a6b4ffc4bf809486395a09aa517 100644
--- a/includeCXX/sfx_flux.h
+++ b/includeCXX/sfx_esm.h
@@ -2,8 +2,8 @@
 #include "sfx_template_parameters.h"
 #include <cstddef>
 
-template<typename T, MemType RunMem, MemType memIn>
-class Flux
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+class FluxEsm
 {
 private:
     T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m;
@@ -23,7 +23,7 @@ private:
     bool ifAllocated;
     size_t allocated_size;
 public:
-    Flux();
+    FluxEsm();
     void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
     const T alpha_m, const T alpha_h, const T alpha_h_fix, 
     const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
@@ -31,7 +31,7 @@ public:
     const T B_max_land, const T B_max_ocean, const T B_max_lake,
     const T gamma_c, const T Re_visc_min,
     const T Pr_m, const T nu_air, const T g);
-    ~Flux();
+    ~FluxEsm();
 
     void compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
diff --git a/includeCXX/sfx_sheba.h b/includeCXX/sfx_sheba.h
new file mode 100644
index 0000000000000000000000000000000000000000..3b2cfd37e99d3b04563fbb445f4ea19eac0b78dd
--- /dev/null
+++ b/includeCXX/sfx_sheba.h
@@ -0,0 +1,43 @@
+#pragma once
+#include "sfx_template_parameters.h"
+#include <cstddef>
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+class FluxSheba
+{
+private:
+    T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m;
+    T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv;
+
+    // T kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    // alpha_m, alpha_h, alpha_h_fix, 
+    // beta_m, beta_h, 
+    // Rib_max, Re_rough_min, 
+    // B1_rough, B2_rough, 
+    // B_max_land, B_max_ocean, B_max_lake,
+    // gamma_c,  
+    // Re_visc_min,
+    // Pr_m, nu_air, g;
+
+    int grid_size;
+    bool ifAllocated;
+    size_t allocated_size;
+public:
+    FluxSheba();
+    void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g);
+    ~Flux();
+
+    void compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
+    const int maxiters_charnock, const int maxiters_convection);
+
+private:
+    void set_data( T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_);
+};
\ No newline at end of file
diff --git a/srcCU/sfx_flux_compute_func.cu b/srcCU/sfx_compute_esm.cu
similarity index 96%
rename from srcCU/sfx_flux_compute_func.cu
rename to srcCU/sfx_compute_esm.cu
index ff912b9a4c628ee9e15769e8e6eee1e1b57903c0..b9c4e2cb1d3ad48c1e0de91f59553eecd40bcd0f 100644
--- a/srcCU/sfx_flux_compute_func.cu
+++ b/srcCU/sfx_compute_esm.cu
@@ -1,6 +1,6 @@
 #include <cmath>
 #include <iostream>
-#include "../includeCU/sfx_flux_compute_func.cuh"
+#include "../includeCU/sfx_compute_esm.cuh"
 
 template<typename T>
 __device__ void get_charnock_roughness(T &z0_m, T &u_dyn0,
@@ -237,7 +237,7 @@ template __device__ void get_psi_semi_convection(double &psi_m, double &psi_h, d
     const int maxiters);
 
 template<typename T>
-__global__ void compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+__global__ void compute_flux_esm_gpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
     const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
     const T alpha_m, const T alpha_h, const T alpha_h_fix, 
@@ -351,7 +351,7 @@ __global__ void compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t
     }
 }
 
-template __global__ void compute_flux(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+template __global__ void compute_flux_esm_gpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
     const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
     const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
     const float alpha_m, const float alpha_h, const float alpha_h_fix, 
@@ -362,7 +362,7 @@ template __global__ void compute_flux(float *zeta_, float *Rib_, float *Re_, flo
     const float Pr_m, const float nu_air, const float g, 
     const int maxiters_charnock, const int maxiters_convection, 
     const int grid_size);
-template __global__ void compute_flux(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
+template __global__ void compute_flux_esm_gpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
     const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, 
     const double kappa, const double Pr_t_0_inv, const double Pr_t_inf_inv, 
     const double alpha_m, const double alpha_h, const double alpha_h_fix, 
diff --git a/srcCXX/sfx_call_class_func.cpp b/srcCXX/sfx_call_class_func.cpp
index 2243e0d8063a473c9044311ccd5ba19c30758516..2090d62de818b6b7fa995c13c118259d8aa9b011 100644
--- a/srcCXX/sfx_call_class_func.cpp
+++ b/srcCXX/sfx_call_class_func.cpp
@@ -1,15 +1,9 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "../includeCXX/sfx_call_class_func.h"
-#include "../includeCXX/sfx_flux.h"
+#include "../includeCXX/sfx_esm.h"
 #include <vector>
 
-#ifdef INCLUDE_CUDA
-    Flux<float, MemType::GPU, MemType::CPU> F;
-#else
-    Flux<float, MemType::CPU, MemType::CPU> F;
-#endif
-
 // -------------------------------------------------------------------------- //
 void surf_flux_CXX (float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
     float *U_, float *dT_, float *Tsemi_, float *dQ_, float *h_, float *in_z0_m_,
@@ -23,6 +17,12 @@ void surf_flux_CXX (float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_
     const int maxiters_charnock, const int maxiters_convection, 
     const int grid_size)
 {
+#ifdef INCLUDE_CUDA
+    static FluxEsm<float, MemType::CPU, MemType::CPU, MemType::GPU> F;
+#else
+    static FluxEsm<float, MemType::CPU, MemType::CPU, MemType::CPU> F;
+#endif
+
     F.set_params(grid_size, kappa, Pr_t_0_inv, Pr_t_inf_inv, 
     alpha_m, alpha_h, alpha_h_fix, 
     beta_m, beta_h, Rib_max, Re_rough_min, 
diff --git a/srcCXX/sfx_flux_compute_func.cpp b/srcCXX/sfx_compute_esm.cpp
similarity index 95%
rename from srcCXX/sfx_flux_compute_func.cpp
rename to srcCXX/sfx_compute_esm.cpp
index 1e6bebb16a0add5ff9621a5e52e25fb5b5a1d4c2..b50f49d160113b7e9242c0eb1b88261cf552b3c7 100644
--- a/srcCXX/sfx_flux_compute_func.cpp
+++ b/srcCXX/sfx_compute_esm.cpp
@@ -1,6 +1,6 @@
 #include <cmath>
 #include <iostream>
-#include "../includeCXX/sfx_flux_compute_func.h"
+#include "../includeCXX/sfx_compute_esm.h"
 
 template<typename T>
 void get_charnock_roughness(T &z0_m, T &u_dyn0,
@@ -237,7 +237,7 @@ template void get_psi_semi_convection(double &psi_m, double &psi_h, double &zeta
     const int maxiters);
 
 template<typename T>
-void compute_flux_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+void compute_flux_esm_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
     const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
     const T alpha_m, const T alpha_h, const T alpha_h_fix, 
@@ -349,7 +349,7 @@ void compute_flux_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *R
     }
 }
 
-template void compute_flux_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+template void compute_flux_esm_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
     const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
     const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
     const float alpha_m, const float alpha_h, const float alpha_h_fix, 
@@ -360,7 +360,7 @@ template void compute_flux_cpu(float *zeta_, float *Rib_, float *Re_, float *B_,
     const float Pr_m, const float nu_air, const float g, 
     const int maxiters_charnock, const int maxiters_convection, 
     const int grid_size);
-template void compute_flux_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
+template void compute_flux_esm_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
     const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, 
     const double kappa, const double Pr_t_0_inv, const double Pr_t_inf_inv, 
     const double alpha_m, const double alpha_h, const double alpha_h_fix, 
diff --git a/srcCXX/sfx_compute_sheba.cpp b/srcCXX/sfx_compute_sheba.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ad420c01c487995b84541f6d8743e5e2e0ddf0b2
--- /dev/null
+++ b/srcCXX/sfx_compute_sheba.cpp
@@ -0,0 +1,112 @@
+#include <cmath>
+#include <iostream>
+#include "../includeCXX/sfx_compute_sheba.h"
+
+template<typename T>
+void compute_flux_sheba_cpu(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
+    const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size)
+{
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T z0_t, B, h0_m, h0_t, u_dyn0, Re, 
+    zeta, Rib, Udyn, Tdyn, Qdyn, phi_m, phi_h,
+    Km, Pr_t_inv, Cm, Ct;
+
+    int surface_type;
+
+    for (int step = 0; step < grid_size; step++)
+    {
+        U = U_[step];
+        Tsemi = Tsemi_[step];
+        dT = dT_[step];
+        dQ = dQ_[step];
+        h = h_[step];
+        z0_m = in_z0_m_[step];
+
+        if (z0_m < 0.0) surface_type = 0;
+        else            surface_type = 1;
+
+        if (surface_type == surface_ocean) 
+        {
+            get_charnock_roughness(z0_m, u_dyn0, U, h, numerics%maxiters_charnock);
+            h0_m = h / z0_m;
+        }
+        if (surface_type == surface_land) 
+        {
+            h0_m = h / z0_m;
+            u_dyn0 = U * kappa / log(h0_m);
+        }
+
+        Re = u_dyn0 * z0_m / nu_air;
+        get_thermal_roughness(z0_t, B, z0_m, Re, surface_type);
+
+        // --- define relative height [thermal]
+        h0_t = h / z0_t;
+
+        // --- define Ri-bulk
+        Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        // --- get the fluxes
+        // ----------------------------------------------------------------------------
+        get_dynamic_scales(Udyn, Tdyn, Qdyn, zeta, U, Tsemi, dT, dQ, h, z0_m, z0_t, (g / Tsemi), 10);
+        // ----------------------------------------------------------------------------
+
+        get_phi(phi_m, phi_h, zeta);
+        // ----------------------------------------------------------------------------
+
+        // --- define transfer coeff. (momentum) & (heat)
+        Cm = 0.0;
+        if (U > 0.0)
+            Cm = Udyn / U;
+        Ct = 0.0;
+        if (fabs(dT) > 0.0) 
+            Ct = Tdyn / dT;
+
+        // --- define eddy viscosity & inverse Prandtl number
+        Km = kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        zeta_[step]         = zeta;
+        Rib_[step]          = Rib;
+        Re_[step]           = Re;
+        B_[step]            = B;
+        z0_m_[step]         = z0_m;
+        z0_t_[step]         = z0_t;
+        Rib_conv_lim_[step] = Rib_conv_lim;
+        Cm_[step]           = Cm;
+        Ct_[step]           = Ct;
+        Km_[step]           = Km;
+        Pr_t_inv_[step]     = Pr_t_inv;
+    }
+}
+
+template void compute_flux_sheba_cpu(float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+    const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
+    const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
+    const float alpha_m, const float alpha_h, const float alpha_h_fix, 
+    const float beta_m, const float beta_h, const float Rib_max, const float Re_rough_min, 
+    const float B1_rough, const float B2_rough,
+    const float B_max_land, const float B_max_ocean, const float B_max_lake,
+    const float gamma_c, const float Re_visc_min,
+    const float Pr_m, const float nu_air, const float g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
+template void compute_flux_sheba_cpu(double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
+    const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, 
+    const double kappa, const double Pr_t_0_inv, const double Pr_t_inf_inv, 
+    const double alpha_m, const double alpha_h, const double alpha_h_fix, 
+    const double beta_m, const double beta_h, const double Rib_max, const double Re_rough_min, 
+    const double B1_rough, const double B2_rough,
+    const double B_max_land, const double B_max_ocean, const double B_max_lake,
+    const double gamma_c, const double Re_visc_min,
+    const double Pr_m, const double nu_air, const double g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
\ No newline at end of file
diff --git a/srcCXX/sfx_flux.cpp b/srcCXX/sfx_esm.cpp
similarity index 57%
rename from srcCXX/sfx_flux.cpp
rename to srcCXX/sfx_esm.cpp
index 8f70e1503a7d2e921ccc8f990121120fde053081..5564c28b181e944c85c2441a3126a66322e5a797 100644
--- a/srcCXX/sfx_flux.cpp
+++ b/srcCXX/sfx_esm.cpp
@@ -1,16 +1,16 @@
 #include <iostream>
 
-#include "../includeCXX/sfx_flux.h"
-#include "../includeCXX/sfx_flux_compute_func.h"
+#include "../includeCXX/sfx_esm.h"
+#include "../includeCXX/sfx_compute_esm.h"
 #ifdef INCLUDE_CUDA
-    #include "../includeCU/sfx_flux_compute_func.cuh"
+    #include "../includeCU/sfx_compute_esm.cuh"
     #include "../includeCU/sfx_memory_processing.cuh"
 #endif
 
 #include "../includeCXX/sfx_memory_processing.h"
 
-template<typename T, MemType RunMem, MemType memIn>
-Flux<T, RunMem, memIn>::Flux()
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+FluxEsm<T, memIn, memOut, RunMem>::FluxEsm()
 {
     kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
     alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
@@ -28,8 +28,8 @@ Flux<T, RunMem, memIn>::Flux()
     allocated_size = 0;
 }
 
-template<typename T, MemType RunMem, MemType memIn>
-void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, 
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxEsm<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, 
     const T alpha_m_, const T alpha_h_, const T alpha_h_fix_, 
     const T beta_m_, const T beta_h_, const T Rib_max_, const T Re_rough_min_, 
     const T B1_rough_, const T B2_rough_,
@@ -65,7 +65,10 @@ void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, co
         memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size);
         allocated_size = 0;
         memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size);
-
+    }
+    if(RunMem != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
         allocated_size = 0;
         memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size);
         allocated_size = 0;
@@ -93,8 +96,8 @@ void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, co
     }
 }
 
-template<typename T, MemType RunMem, MemType memIn>
-Flux<T, RunMem, memIn>::~Flux()
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+FluxEsm<T, memIn, memOut, RunMem>::~FluxEsm()
 {
     kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
     alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
@@ -110,31 +113,36 @@ Flux<T, RunMem, memIn>::~Flux()
 
     if(ifAllocated == true)
     {
-        memproc::dealloc<RunMem>((void*&)U);
-        memproc::dealloc<RunMem>((void*&)dT);
-        memproc::dealloc<RunMem>((void*&)Tsemi);
-        memproc::dealloc<RunMem>((void*&)dQ);
-        memproc::dealloc<RunMem>((void*&)h);
-
-        memproc::dealloc<RunMem>((void*&)zeta);
-        memproc::dealloc<RunMem>((void*&)Rib);
-        memproc::dealloc<RunMem>((void*&)Re);
-        memproc::dealloc<RunMem>((void*&)Rib_conv_lim);
-        memproc::dealloc<RunMem>((void*&)z0_m);
-        memproc::dealloc<RunMem>((void*&)z0_t);
-        memproc::dealloc<RunMem>((void*&)B);
-        memproc::dealloc<RunMem>((void*&)Cm);
-        memproc::dealloc<RunMem>((void*&)Ct);
-        memproc::dealloc<RunMem>((void*&)Km);
-        memproc::dealloc<RunMem>((void*&)Pr_t_inv);
+        if(RunMem != memIn)
+        {    
+            memproc::dealloc<RunMem>((void*&)U);
+            memproc::dealloc<RunMem>((void*&)dT);
+            memproc::dealloc<RunMem>((void*&)Tsemi);
+            memproc::dealloc<RunMem>((void*&)dQ);
+            memproc::dealloc<RunMem>((void*&)h);
+        }
+        if(RunMem != memOut)
+        {
+            memproc::dealloc<RunMem>((void*&)zeta);
+            memproc::dealloc<RunMem>((void*&)Rib);
+            memproc::dealloc<RunMem>((void*&)Re);
+            memproc::dealloc<RunMem>((void*&)Rib_conv_lim);
+            memproc::dealloc<RunMem>((void*&)z0_m);
+            memproc::dealloc<RunMem>((void*&)z0_t);
+            memproc::dealloc<RunMem>((void*&)B);
+            memproc::dealloc<RunMem>((void*&)Cm);
+            memproc::dealloc<RunMem>((void*&)Ct);
+            memproc::dealloc<RunMem>((void*&)Km);
+            memproc::dealloc<RunMem>((void*&)Pr_t_inv);
+        }
 
         ifAllocated = false;
         allocated_size = 0;
     }
 }
 
-template<typename T, MemType RunMem, MemType memIn>
-void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxEsm<T, memIn, memOut, RunMem>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_)
 {
     if(RunMem == memIn)
@@ -145,18 +153,6 @@ void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_
         dQ = dQ_;
         h = h_;
         in_z0_m = in_z0_m_;
-        
-        zeta = zeta_;
-        Rib = Rib_;
-        Re = Re_;
-        Rib_conv_lim = Rib_conv_lim_;
-        z0_m = z0_m_;
-        z0_t = z0_t_;
-        B = B_;
-        Cm = Cm_;
-        Ct = Ct_;
-        Km = Km_;
-        Pr_t_inv = Pr_t_inv_;
     }
     else
     {
@@ -169,17 +165,32 @@ void Flux<T, RunMem, memIn>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_
         memproc::memcopy<RunMem, memIn>(h, h_, new_size);
         memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size);
     }
+
+    if(RunMem == memOut)
+    {
+        zeta = zeta_;
+        Rib = Rib_;
+        Re = Re_;
+        Rib_conv_lim = Rib_conv_lim_;
+        z0_m = z0_m_;
+        z0_t = z0_t_;
+        B = B_;
+        Cm = Cm_;
+        Ct = Ct_;
+        Km = Km_;
+        Pr_t_inv = Pr_t_inv_;
+    }
 }
 
-template<typename T, MemType RunMem, MemType memIn>
-void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxEsm<T, memIn, memOut, RunMem>::compute_FluxEsm(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
     T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
     const int maxiters_charnock, const int maxiters_convection)
 {
     set_data(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
     U_, dT_, Tsemi_, dQ_, h_, in_z0_m_);
 
-    if(RunMem == MemType::CPU) compute_flux_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
+    if(RunMem == MemType::CPU) compute_flux_esm_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
     U, dT, Tsemi, dQ, h, in_z0_m, 
     kappa, Pr_t_0_inv, Pr_t_inf_inv, 
     alpha_m, alpha_h, alpha_h_fix, 
@@ -192,7 +203,7 @@ void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z
     grid_size);
 
 #ifdef INCLUDE_CUDA
-    else compute_flux_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
+    else compute_flux_esm_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
     U, dT, Tsemi, dQ, h, in_z0_m, 
     kappa, Pr_t_0_inv, Pr_t_inf_inv, 
     alpha_m, alpha_h, alpha_h_fix, 
@@ -205,33 +216,41 @@ void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z
     grid_size);
 #endif
 
-    if(RunMem != memIn)
+    if(RunMem != memOut)
     {
         const size_t new_size = grid_size * sizeof(T);
 
-        memproc::memcopy<memIn, RunMem>(zeta_, zeta, new_size);
-        memproc::memcopy<memIn, RunMem>(Rib_, Rib, new_size);
-        memproc::memcopy<memIn, RunMem>(Re_, Re, new_size);
-        memproc::memcopy<memIn, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size);
-        memproc::memcopy<memIn, RunMem>(z0_m_, z0_m, new_size);
-        memproc::memcopy<memIn, RunMem>(z0_t_, z0_t, new_size);
-        memproc::memcopy<memIn, RunMem>(B_, B, new_size);
-        memproc::memcopy<memIn, RunMem>(Cm_, Cm, new_size);
-        memproc::memcopy<memIn, RunMem>(Ct_, Ct, new_size);
-        memproc::memcopy<memIn, RunMem>(Km_, Km, new_size);
-        memproc::memcopy<memIn, RunMem>(Pr_t_inv_, Pr_t_inv, new_size);
+        memproc::memcopy<memOut, RunMem>(zeta_, zeta, new_size);
+        memproc::memcopy<memOut, RunMem>(Rib_, Rib, new_size);
+        memproc::memcopy<memOut, RunMem>(Re_, Re, new_size);
+        memproc::memcopy<memOut, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, RunMem>(z0_m_, z0_m, new_size);
+        memproc::memcopy<memOut, RunMem>(z0_t_, z0_t, new_size);
+        memproc::memcopy<memOut, RunMem>(B_, B, new_size);
+        memproc::memcopy<memOut, RunMem>(Cm_, Cm, new_size);
+        memproc::memcopy<memOut, RunMem>(Ct_, Ct, new_size);
+        memproc::memcopy<memOut, RunMem>(Km_, Km, new_size);
+        memproc::memcopy<memOut, RunMem>(Pr_t_inv_, Pr_t_inv, new_size);
     }
 }
 
-template class Flux<float, MemType::CPU, MemType::CPU>;
-template class Flux<double, MemType::CPU, MemType::CPU>;
+template class FluxEsm<float, MemType::CPU, MemType::CPU, MemType::CPU>;
+template class FluxEsm<double, MemType::CPU, MemType::CPU, MemType::CPU>;
 
 #ifdef INCLUDE_CUDA
-    template class Flux<float, MemType::GPU, MemType::GPU>;
-    template class Flux<float, MemType::GPU, MemType::CPU>;
-    template class Flux<float, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<float, MemType::GPU, MemType::GPU, MemType::GPU>;
+    template class FluxEsm<float, MemType::GPU, MemType::GPU, MemType::CPU>;
+    template class FluxEsm<float, MemType::GPU, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<float, MemType::CPU, MemType::GPU, MemType::GPU>;
+    template class FluxEsm<float, MemType::CPU, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<float, MemType::CPU, MemType::GPU, MemType::CPU>;
+    template class FluxEsm<float, MemType::GPU, MemType::CPU, MemType::CPU>;
 
-    template class Flux<double, MemType::GPU, MemType::GPU>;
-    template class Flux<double, MemType::GPU, MemType::CPU>;
-    template class Flux<double, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<double, MemType::GPU, MemType::GPU, MemType::GPU>;
+    template class FluxEsm<double, MemType::GPU, MemType::GPU, MemType::CPU>;
+    template class FluxEsm<double, MemType::GPU, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<double, MemType::CPU, MemType::GPU, MemType::GPU>;
+    template class FluxEsm<double, MemType::CPU, MemType::CPU, MemType::GPU>;
+    template class FluxEsm<double, MemType::CPU, MemType::GPU, MemType::CPU>;
+    template class FluxEsm<double, MemType::GPU, MemType::CPU, MemType::CPU>;
 #endif 
\ No newline at end of file
diff --git a/srcCXX/sfx_sheba.cpp b/srcCXX/sfx_sheba.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5edfa73250b09fa9e1c6c760390f2743eb90906f
--- /dev/null
+++ b/srcCXX/sfx_sheba.cpp
@@ -0,0 +1,256 @@
+#include <iostream>
+
+#include "../includeCXX/sfx_sheba.h"
+#include "../includeCXX/sfx_compute_sheba.h"
+#ifdef INCLUDE_CUDA
+    #include "../includeCU/sfx_compute_sheba.cuh"
+    #include "../includeCU/sfx_memory_processing.cuh"
+#endif
+
+#include "../includeCXX/sfx_memory_processing.h"
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+FluxSheba<T, memIn, memOut, RunMem>::FluxSheba()
+{
+    kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
+    alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
+    beta_m = 0, beta_h = 0, 
+    Rib_max = 0, Re_rough_min = 0, 
+    B1_rough = 0, B2_rough = 0, 
+    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
+    gamma_c = 0,  
+    Re_visc_min = 0,
+    Pr_m = 0, nu_air = 0, g = 0;
+
+    grid_size = 0;
+
+    ifAllocated = false;
+    allocated_size = 0;
+}
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxSheba<T, memIn, memOut, RunMem>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, 
+    const T alpha_m_, const T alpha_h_, const T alpha_h_fix_, 
+    const T beta_m_, const T beta_h_, const T Rib_max_, const T Re_rough_min_, 
+    const T B1_rough_, const T B2_rough_,
+    const T B_max_land_, const T B_max_ocean_, const T B_max_lake_,
+    const T gamma_c_, const T Re_visc_min_,
+    const T Pr_m_, const T nu_air_, const T g_)
+{
+    kappa = kappa_, Pr_t_0_inv = Pr_t_0_inv_, Pr_t_inf_inv = Pr_t_inf_inv_, 
+    alpha_m = alpha_m_, alpha_h = alpha_h_, alpha_h_fix = alpha_h_fix_, 
+    beta_m = beta_m_, beta_h = beta_h_, 
+    Rib_max = Rib_max_, Re_rough_min = Re_rough_min_, B_max_lake = B_max_lake_,
+    B1_rough = B1_rough_, B2_rough = B2_rough_, 
+    B_max_land = B_max_land_, B_max_ocean = B_max_ocean_,
+    gamma_c = gamma_c_,  
+    Re_visc_min = Re_visc_min_,
+    Pr_m = Pr_m_, nu_air = nu_air_, g = g_;
+
+    grid_size = grid_size_;
+
+    if(RunMem != memIn)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(U), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(dT), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Tsemi), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(dQ), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size);
+    }
+    if(RunMem != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Rib), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Re), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Rib_conv_lim), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(z0_m), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(z0_t), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(B), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Cm), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Ct), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Km), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Pr_t_inv), allocated_size, new_size);
+
+        ifAllocated = true;
+    }
+}
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+FluxSheba<T, memIn, memOut, RunMem>::~FluxSheba()
+{
+    kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
+    alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
+    beta_m = 0, beta_h = 0, 
+    Rib_max = 0, Re_rough_min = 0, 
+    B1_rough = 0, B2_rough = 0, 
+    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
+    gamma_c = 0,  
+    Re_visc_min = 0,
+    Pr_m = 0, nu_air = 0, g = 0;
+
+    grid_size = 0;
+
+    if(ifAllocated == true)
+    {
+        if(RunMem != memIn)
+        {    
+            memproc::dealloc<RunMem>((void*&)U);
+            memproc::dealloc<RunMem>((void*&)dT);
+            memproc::dealloc<RunMem>((void*&)Tsemi);
+            memproc::dealloc<RunMem>((void*&)dQ);
+            memproc::dealloc<RunMem>((void*&)h);
+        }
+        if(RunMem != memOut)
+        {
+            memproc::dealloc<RunMem>((void*&)zeta);
+            memproc::dealloc<RunMem>((void*&)Rib);
+            memproc::dealloc<RunMem>((void*&)Re);
+            memproc::dealloc<RunMem>((void*&)Rib_conv_lim);
+            memproc::dealloc<RunMem>((void*&)z0_m);
+            memproc::dealloc<RunMem>((void*&)z0_t);
+            memproc::dealloc<RunMem>((void*&)B);
+            memproc::dealloc<RunMem>((void*&)Cm);
+            memproc::dealloc<RunMem>((void*&)Ct);
+            memproc::dealloc<RunMem>((void*&)Km);
+            memproc::dealloc<RunMem>((void*&)Pr_t_inv);
+        }
+
+        ifAllocated = false;
+        allocated_size = 0;
+    }
+}
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxSheba<T, memIn, memOut, RunMem>::set_data(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_)
+{
+    if(RunMem == memIn)
+    {
+        U = U_;
+        dT = dT_;
+        Tsemi = Tsemi_;
+        dQ = dQ_;
+        h = h_;
+        in_z0_m = in_z0_m_;
+    }
+    else
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        memproc::memcopy<RunMem, memIn>(U, U_, new_size);
+        memproc::memcopy<RunMem, memIn>(dT, dT_, new_size);
+        memproc::memcopy<RunMem, memIn>(Tsemi, Tsemi_, new_size);
+        memproc::memcopy<RunMem, memIn>(dQ, dQ_, new_size);
+        memproc::memcopy<RunMem, memIn>(h, h_, new_size);
+        memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size);
+    }
+
+    if(RunMem == memOut)
+    {
+        zeta = zeta_;
+        Rib = Rib_;
+        Re = Re_;
+        Rib_conv_lim = Rib_conv_lim_;
+        z0_m = z0_m_;
+        z0_t = z0_t_;
+        B = B_;
+        Cm = Cm_;
+        Ct = Ct_;
+        Km = Km_;
+        Pr_t_inv = Pr_t_inv_;
+    }
+}
+
+template<typename T, MemType memIn, MemType memOut, MemType RunMem >
+void FluxSheba<T, memIn, memOut, RunMem>::compute_FluxSheba(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_, 
+    const int maxiters_charnock, const int maxiters_convection)
+{
+    set_data(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
+    U_, dT_, Tsemi_, dQ_, h_, in_z0_m_);
+
+    if(RunMem == MemType::CPU) compute_FluxSheba_cpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
+    U, dT, Tsemi, dQ, h, in_z0_m, 
+    kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    alpha_m, alpha_h, alpha_h_fix, 
+    beta_m, beta_h, Rib_max, Re_rough_min, 
+    B1_rough, B2_rough,
+    B_max_land, B_max_ocean, B_max_lake,
+    gamma_c, Re_visc_min,
+    Pr_m, nu_air, g, 
+    maxiters_charnock, maxiters_convection, 
+    grid_size);
+
+#ifdef INCLUDE_CUDA
+    else compute_FluxSheba_gpu(zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
+    U, dT, Tsemi, dQ, h, in_z0_m, 
+    kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    alpha_m, alpha_h, alpha_h_fix, 
+    beta_m, beta_h, Rib_max, Re_rough_min, 
+    B1_rough, B2_rough,
+    B_max_land, B_max_ocean, B_max_lake,
+    gamma_c, Re_visc_min,
+    Pr_m, nu_air, g, 
+    maxiters_charnock, maxiters_convection, 
+    grid_size);
+#endif
+
+    if(RunMem != memOut)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        memproc::memcopy<memOut, RunMem>(zeta_, zeta, new_size);
+        memproc::memcopy<memOut, RunMem>(Rib_, Rib, new_size);
+        memproc::memcopy<memOut, RunMem>(Re_, Re, new_size);
+        memproc::memcopy<memOut, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size);
+        memproc::memcopy<memOut, RunMem>(z0_m_, z0_m, new_size);
+        memproc::memcopy<memOut, RunMem>(z0_t_, z0_t, new_size);
+        memproc::memcopy<memOut, RunMem>(B_, B, new_size);
+        memproc::memcopy<memOut, RunMem>(Cm_, Cm, new_size);
+        memproc::memcopy<memOut, RunMem>(Ct_, Ct, new_size);
+        memproc::memcopy<memOut, RunMem>(Km_, Km, new_size);
+        memproc::memcopy<memOut, RunMem>(Pr_t_inv_, Pr_t_inv, new_size);
+    }
+}
+
+template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::CPU>;
+template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::CPU>;
+
+#ifdef INCLUDE_CUDA
+    template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::GPU>;
+    template class FluxSheba<float, MemType::GPU, MemType::GPU, MemType::CPU>;
+    template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::GPU>;
+    template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::GPU>;
+    template class FluxSheba<float, MemType::CPU, MemType::CPU, MemType::GPU>;
+    template class FluxSheba<float, MemType::CPU, MemType::GPU, MemType::CPU>;
+    template class FluxSheba<float, MemType::GPU, MemType::CPU, MemType::CPU>;
+
+    template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::GPU>;
+    template class FluxSheba<double, MemType::GPU, MemType::GPU, MemType::CPU>;
+    template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::GPU>;
+    template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::GPU>;
+    template class FluxSheba<double, MemType::CPU, MemType::CPU, MemType::GPU>;
+    template class FluxSheba<double, MemType::CPU, MemType::GPU, MemType::CPU>;
+    template class FluxSheba<double, MemType::GPU, MemType::CPU, MemType::CPU>;
+#endif 
\ No newline at end of file