diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..478ed3ce0e3c581f87f9b785644dc5263e0dfbef
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,94 @@
+cmake_minimum_required(VERSION 3.0)
+
+option(INCLUDE_CUDA "GPU build in mode"    OFF)
+option(INCLUDE_CXX  "CXX build in mode"    OFF)
+
+project(INMCM_sfx)
+enable_language(Fortran)
+
+if(INCLUDE_CXX OR INCLUDE_CUDA)
+    set(MEMPROC_GIT http://tesla.parallel.ru/Lizzzka007/memory_processing.git)
+    include(FetchContent)
+    FetchContent_Declare(memory_processing                                   
+    GIT_REPOSITORY ${MEMPROC_GIT}                                    
+    GIT_TAG origin/main
+    )
+    FetchContent_MakeAvailable(memory_processing)                              
+    add_library(memory_processing INTERFACE)
+    target_compile_definitions(memory_processing INTERFACE INCLUDE_CUDA=${INCLUDE_CUDA})
+endif(INCLUDE_CXX OR INCLUDE_CUDA)
+
+if(INCLUDE_CXX)
+    set(RUN_MACRO -DINCLUDE_CXX)
+endif(INCLUDE_CXX)
+
+if(INCLUDE_CUDA)
+    enable_language(CUDA)
+    find_package(CUDA REQUIRED)
+    include_directories(${CUDA_INCLUDE_DIRS})
+    set(RUN_MACRO -DINCLUDE_CUDA)
+    set(INCLUDE_CXX ON)
+endif(INCLUDE_CUDA)
+
+if(INCLUDE_CXX)
+    enable_language(C)
+    enable_language(CXX)
+    set(CMAKE_CXX_STANDARD 11)
+endif(INCLUDE_CXX)
+
+set(SOURCES_F 
+    srcF/sfx_data.f90
+    srcF/sfx_common.f90
+    srcF/sfx_def.fi
+    srcF/sfx_esm.f90 
+    srcF/sfx_esm_param.f90
+    srcF/sfx_log.f90
+    srcF/sfx_log_param.f90
+    srcF/sfx_main.f90
+    srcF/sfx_phys_const.f90
+    srcF/sfx_surface.f90
+    srcF/FCWrapper.F90
+)
+
+if(INCLUDE_CXX)
+    set(SOURCES_C 
+        srcC/SubFunctionsWrapper.c
+    )
+
+    set(SOURCES_CXX 
+        srcCXX/Flux.cpp
+        srcCXX/FluxComputeFunc.cpp
+        srcCXX/SubFunctions.cpp
+    )
+    set(HEADERS_CXX 
+        includeCXX/Flux.h
+        includeCXX/FluxComputeFunc.h
+        includeCXX/SubFunctions.h
+        )
+endif(INCLUDE_CXX)
+
+if(INCLUDE_CUDA)
+    set(SOURCES_CU 
+    srcCU/Flux.cu
+    srcCU/FluxComputeFunc.cu
+    )
+    set(HEADERS_CU
+    includeCU/Flux.cuh
+    includeCXX/FluxComputeFunc.cuh
+    )
+endif(INCLUDE_CUDA)
+
+set(SOURCES ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${SOURCES_F})
+
+set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ")
+set(CMAKE_CXX_FLAGS " -g ")
+set(CMAKE_C_FLAGS " -g ")
+
+add_executable(drag ${SOURCES})
+add_definitions(${RUN_MACRO})
+set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran)
+
+if(INCLUDE_CXX OR INCLUDE_CUDA)
+    target_include_directories(drag PUBLIC ${memory_processing_SOURCE_DIR}/include)
+    target_link_libraries(drag memproc)
+endif(INCLUDE_CXX OR INCLUDE_CUDA)
\ No newline at end of file
diff --git a/includeCU/Flux.cuh b/includeCU/Flux.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..2eec280c1b8c177292a8db02ac2c0388bd01b247
--- /dev/null
+++ b/includeCU/Flux.cuh
@@ -0,0 +1,11 @@
+#pragma once 
+
+template<typename T>
+void compute_flux_gpu(const T *ws, const T *dt, const T *st, const T *dq, const T *cflh, const T *z0,
+    T *zl, T *ri, T *re, T *lnzuzt, T *zu, T *zt, T *rith, T *cm, T *ch, T *ct, T *ckt,
+    const T g, const T Pr_m, const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv,
+    const T alpha_m, const T alpha_h, const T b4, const T alfam, const T betam, 
+    const T an, const T h1, const T x8, const T an1, const T an2, const T g0, 
+    const T r0, 
+    const int it, const int lu_indx,
+    const int grid_size);
\ No newline at end of file
diff --git a/includeCU/FluxComputeFunc.cuh b/includeCU/FluxComputeFunc.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/includeCXX/Flux.h b/includeCXX/Flux.h
new file mode 100644
index 0000000000000000000000000000000000000000..cc7f6c2bb7fd77abcb2e2b21b094f6717a6d755f
--- /dev/null
+++ b/includeCXX/Flux.h
@@ -0,0 +1,46 @@
+#pragma once
+#include "TemplateParameters.h"
+#include <cstddef>
+
+template<typename T, MemType RunMem, MemType memIn>
+class Flux
+{
+private:
+    T *U, *dT, *Tsemi, *dQ, *h, *in_z0_m;
+    T *zeta, *Rib, *Re, *B, *z0_m, *z0_t, *Rib_conv_lim, *Cm, *Ct, *Km, *Pr_t_inv;
+
+    T kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    alpha_m, alpha_h, alpha_h_fix, 
+    beta_m, beta_h, 
+    Rib_max, Re_rough_min, 
+    B1_rough, B2_rough, 
+    B_max_land, B_max_ocean, B_max_lake,
+    gamma_c,  
+    Re_visc_min,
+    Pr_m, nu_air, g;
+
+    int grid_size;
+    bool ifAllocated;
+    size_t allocated_size;
+public:
+    Flux();
+    void set_params(const int grid_size, const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g);
+    void set_data( T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_,
+    T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_);
+    ~Flux();
+
+    void compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, 
+    const int maxiters_charnock, const int maxiters_convection);
+};
+
+template<typename T, MemType RunMem, MemType memIn>
+class Compute_Flux
+{
+    static Flux<T, RunMem,memIn> F;
+};
\ No newline at end of file
diff --git a/includeCXX/FluxComputeFunc.h b/includeCXX/FluxComputeFunc.h
new file mode 100644
index 0000000000000000000000000000000000000000..d74553d8f78b8a4a0d70749abcfc40cdf52ce15a
--- /dev/null
+++ b/includeCXX/FluxComputeFunc.h
@@ -0,0 +1,14 @@
+#pragma once
+
+template<typename T>
+void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
+    T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
\ No newline at end of file
diff --git a/includeCXX/SubFunctions.h b/includeCXX/SubFunctions.h
new file mode 100644
index 0000000000000000000000000000000000000000..e041131b9881dc792faf14210ef4f05f2df1dc44
--- /dev/null
+++ b/includeCXX/SubFunctions.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    void surf_flux_CXX (float *U_, float *dT_, float *Tsemi_, float *dQ_, float *h_, float *in_z0_m_,
+    float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+    const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
+    const float alpha_m, const float alpha_h, const float alpha_h_fix, 
+    const float beta_m, const float beta_h, const float Rib_max, const float Re_rough_min, 
+    const float B1_rough, const float B2_rough,
+    const float B_max_land, const float B_max_ocean, const float B_max_lake,
+    const float gamma_c, const float Re_visc_min,
+    const float Pr_m, const float nu_air, const float g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/srcC/SubFunctionsWrapper.c b/srcC/SubFunctionsWrapper.c
new file mode 100644
index 0000000000000000000000000000000000000000..91372f764c3fba510487feef8189044181613fed
--- /dev/null
+++ b/srcC/SubFunctionsWrapper.c
@@ -0,0 +1,28 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "../includeCXX/SubFunctions.h"
+// -------------------------------------------------------------------------- //
+void surf_flux (float *U_, float *dT_, float *Tsemi_, float *dQ_, float *h_, float *in_z0_m_,
+    float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+    const float *kappa, const float *Pr_t_0_inv, const float *Pr_t_inf_inv, 
+    const float *alpha_m, const float *alpha_h, const float *alpha_h_fix, 
+    const float *beta_m, const float *beta_h, const float *Rib_max, const float *Re_rough_min, 
+    const float *B1_rough, const float *B2_rough,
+    const float *B_max_land, const float *B_max_ocean, const float *B_max_lake,
+    const float *gamma_c, const float *Re_visc_min,
+    const float *Pr_m, const float *nu_air, const float *g, 
+    const int *maxiters_charnock, const int *maxiters_convection, 
+    const int *grid_size)
+{
+	surf_flux_CXX ( U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
+  zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_,
+  *kappa, *Pr_t_0_inv, *Pr_t_inf_inv, 
+  *alpha_m, *alpha_h, *alpha_h_fix, 
+  *beta_m, *beta_h, *Rib_max, *Re_rough_min, 
+  *B1_rough, *B2_rough,
+  *B_max_land, *B_max_ocean, *B_max_lake,
+  *gamma_c, *Re_visc_min,
+  *Pr_m, *nu_air, *g, 
+  *maxiters_charnock, *maxiters_convection, 
+  *grid_size);
+}
\ No newline at end of file
diff --git a/srcCXX/Flux.cpp b/srcCXX/Flux.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..80dfaf54aac0cfa2910748dc67f3ea518f307fc8
--- /dev/null
+++ b/srcCXX/Flux.cpp
@@ -0,0 +1,242 @@
+#include <iostream>
+
+#include "../includeCXX/Flux.h"
+#include "../includeCXX/FluxComputeFunc.h"
+#ifdef INCLUDE_CUDA
+    #include "../includeCU/Flux.cuh"
+#endif
+
+#include "MemoryProcessing.h"
+
+template<typename T, MemType RunMem, MemType memIn>
+Flux<T, RunMem, memIn>::Flux()
+{
+    kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
+    alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
+    beta_m = 0, beta_h = 0, 
+    Rib_max = 0, Re_rough_min = 0, 
+    B1_rough = 0, B2_rough = 0, 
+    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
+    gamma_c = 0,  
+    Re_visc_min = 0,
+    Pr_m = 0, nu_air = 0, g = 0;
+
+    grid_size = 0;
+
+    ifAllocated = false;
+    allocated_size = 0;
+}
+
+template<typename T, MemType RunMem, MemType memIn>
+void Flux<T, RunMem, memIn>::set_params(const int grid_size_, const T kappa_, const T Pr_t_0_inv_, const T Pr_t_inf_inv_, 
+    const T alpha_m_, const T alpha_h_, const T alpha_h_fix_, 
+    const T beta_m_, const T beta_h_, const T Rib_max_, const T Re_rough_min_, 
+    const T B1_rough_, const T B2_rough_,
+    const T B_max_land_, const T B_max_ocean_, const T B_max_lake_,
+    const T gamma_c_, const T Re_visc_min_,
+    const T Pr_m_, const T nu_air_, const T g_)
+{
+    kappa = kappa_, Pr_t_0_inv = Pr_t_0_inv_, Pr_t_inf_inv = Pr_t_inf_inv_, 
+    alpha_m = alpha_m_, alpha_h = alpha_h_, alpha_h_fix = alpha_h_fix_, 
+    beta_m = beta_m_, beta_h = beta_h_, 
+    Rib_max = Rib_max_, Re_rough_min = Re_rough_min_, B_max_lake = B_max_lake_,
+    B1_rough = B1_rough_, B2_rough = B2_rough_, 
+    B_max_land = B_max_land_, B_max_ocean = B_max_ocean_,
+    gamma_c = gamma_c_,  
+    Re_visc_min = Re_visc_min_,
+    Pr_m = Pr_m_, nu_air = nu_air_, g = g_;
+
+    grid_size = grid_size_;
+
+    if(RunMem != memIn)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(U), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(dT), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Tsemi), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(dQ), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(h), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(in_z0_m), allocated_size, new_size);
+
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(zeta), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Rib), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Re), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Rib_conv_lim), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(z0_m), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(z0_t), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(B), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Cm), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Ct), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Km), allocated_size, new_size);
+        allocated_size = 0;
+        memproc::realloc<RunMem>((void *&)(Pr_t_inv), allocated_size, new_size);
+
+        ifAllocated = true;
+    }
+}
+
+template<typename T, MemType RunMem, MemType memIn>
+Flux<T, RunMem, memIn>::~Flux()
+{
+    kappa = 0, Pr_t_0_inv = 0, Pr_t_inf_inv = 0, 
+    alpha_m = 0, alpha_h = 0, alpha_h_fix = 0, 
+    beta_m = 0, beta_h = 0, 
+    Rib_max = 0, Re_rough_min = 0, 
+    B1_rough = 0, B2_rough = 0, 
+    B_max_land = 0, B_max_ocean = 0, B_max_lake = 0,
+    gamma_c = 0,  
+    Re_visc_min = 0,
+    Pr_m = 0, nu_air = 0, g = 0;
+
+    grid_size = 0;
+
+    if(ifAllocated == true)
+    {
+        memproc::dealloc<RunMem>((void*&)U);
+        memproc::dealloc<RunMem>((void*&)dT);
+        memproc::dealloc<RunMem>((void*&)Tsemi);
+        memproc::dealloc<RunMem>((void*&)dQ);
+        memproc::dealloc<RunMem>((void*&)h);
+
+        memproc::dealloc<RunMem>((void*&)zeta);
+        memproc::dealloc<RunMem>((void*&)Rib);
+        memproc::dealloc<RunMem>((void*&)Re);
+        memproc::dealloc<RunMem>((void*&)Rib_conv_lim);
+        memproc::dealloc<RunMem>((void*&)z0_m);
+        memproc::dealloc<RunMem>((void*&)z0_t);
+        memproc::dealloc<RunMem>((void*&)B);
+        memproc::dealloc<RunMem>((void*&)Cm);
+        memproc::dealloc<RunMem>((void*&)Ct);
+        memproc::dealloc<RunMem>((void*&)Km);
+        memproc::dealloc<RunMem>((void*&)Pr_t_inv);
+
+        ifAllocated = false;
+        allocated_size = 0;
+    }
+}
+
+template<typename T, MemType RunMem, MemType memIn>
+void Flux<T, RunMem, memIn>::set_data(T *U_, T *dT_, T *Tsemi_, T *dQ_, T *h_, T *in_z0_m_,
+    T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_)
+{
+    if(RunMem == memIn)
+    {
+        U = U_;
+        dT = dT_;
+        Tsemi = Tsemi_;
+        dQ = dQ_;
+        h = h_;
+        in_z0_m = in_z0_m_;
+        
+        zeta = zeta_;
+        Rib = Rib_;
+        Re = Re_;
+        Rib_conv_lim = Rib_conv_lim_;
+        z0_m = z0_m_;
+        z0_t = z0_t_;
+        B = B_;
+        Cm = Cm_;
+        Ct = Ct_;
+        Km = Km_;
+        Pr_t_inv = Pr_t_inv_;
+    }
+    else
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        memproc::memcopy<RunMem, memIn>(U, U_, new_size);
+        memproc::memcopy<RunMem, memIn>(dT, dT_, new_size);
+        memproc::memcopy<RunMem, memIn>(Tsemi, Tsemi_, new_size);
+        memproc::memcopy<RunMem, memIn>(dQ, dQ_, new_size);
+        memproc::memcopy<RunMem, memIn>(h, h_, new_size);
+        memproc::memcopy<RunMem, memIn>(in_z0_m, in_z0_m_, new_size);
+    }
+}
+
+template<typename T, MemType RunMem, MemType memIn>
+void Flux<T, RunMem, memIn>::compute_flux(T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_, 
+    const int maxiters_charnock, const int maxiters_convection)
+{
+    if(RunMem == MemType::CPU) compute_flux_cpu(U, dT, Tsemi, dQ, h, in_z0_m, 
+    zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, Cm, Ct, Km, Pr_t_inv,
+    kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    alpha_m, alpha_h, alpha_h_fix, 
+    beta_m, beta_h, Rib_max, Re_rough_min, 
+    B1_rough, B2_rough,
+    B_max_land, B_max_ocean, B_max_lake,
+    gamma_c, Re_visc_min,
+    Pr_m, nu_air, g, 
+    maxiters_charnock, maxiters_convection, 
+    grid_size);
+
+#ifdef INCLUDE_CUDA
+    else compute_flux_gpu(U, dT, Tsemi, dQ, h, zeta,
+    zl, Rib, Re, Rib_conv_lim, z0_m, z0_t, B, Cm, Ct, Km, Pr_t_inv,
+    g,  Pr_m,  kappa,  Pr_t_0_inv,  Pr_t_inf_inv,
+    alpha_m,  alpha_h,  b4,  alfam,  betam, 
+    an,  h1,  x8,  an1,  an2,  g0, 
+    r0, 
+    it, lu_indx,
+    grid_size);
+#endif
+
+    if(RunMem != memIn)
+    {
+        const size_t new_size = grid_size * sizeof(T);
+
+        memproc::memcopy<memIn, RunMem>(zeta_, zeta, new_size);
+        memproc::memcopy<memIn, RunMem>(Rib_, Rib, new_size);
+        memproc::memcopy<memIn, RunMem>(Re_, Re, new_size);
+        memproc::memcopy<memIn, RunMem>(Rib_conv_lim_, Rib_conv_lim, new_size);
+        memproc::memcopy<memIn, RunMem>(z0_m_, z0_m, new_size);
+        memproc::memcopy<memIn, RunMem>(z0_t_, z0_t, new_size);
+        memproc::memcopy<memIn, RunMem>(B_, B, new_size);
+        memproc::memcopy<memIn, RunMem>(Cm_, Cm, new_size);
+        memproc::memcopy<memIn, RunMem>(Ct_, Ct, new_size);
+        memproc::memcopy<memIn, RunMem>(Km_, Km, new_size);
+        memproc::memcopy<memIn, RunMem>(Pr_t_inv_, Pr_t_inv, new_size);
+    }
+}
+
+template class Flux<float, MemType::CPU, MemType::CPU>;
+template class Flux<double, MemType::CPU, MemType::CPU>;
+
+#ifdef INCLUDE_CUDA
+    template class Flux<float, MemType::GPU, MemType::GPU>;
+    template class Flux<float, MemType::GPU, MemType::CPU>;
+    template class Flux<float, MemType::CPU, MemType::GPU>;
+
+    template class Flux<double, MemType::GPU, MemType::GPU>;
+    template class Flux<double, MemType::GPU, MemType::CPU>;
+    template class Flux<double, MemType::CPU, MemType::GPU>;
+#endif 
+
+template class Compute_Flux<float, MemType::CPU, MemType::CPU>;
+template class Compute_Flux<double, MemType::CPU, MemType::CPU>;
+
+#ifdef INCLUDE_CUDA
+    template class Compute_Flux<float, MemType::GPU, MemType::GPU>;
+    template class Compute_Flux<float, MemType::GPU, MemType::CPU>;
+    template class Compute_Flux<float, MemType::CPU, MemType::GPU>;
+
+    template class Compute_Flux<double, MemType::GPU, MemType::GPU>;
+    template class Compute_Flux<double, MemType::GPU, MemType::CPU>;
+    template class Compute_Flux<double, MemType::CPU, MemType::GPU>;
+#endif 
\ No newline at end of file
diff --git a/srcCXX/FluxComputeFunc.cpp b/srcCXX/FluxComputeFunc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e1dbd1ef5e8c5bba2e159753b4dca75996db984
--- /dev/null
+++ b/srcCXX/FluxComputeFunc.cpp
@@ -0,0 +1,377 @@
+#include <cmath>
+#include <iostream>
+#include "../includeCXX/FluxComputeFunc.h"
+
+template<typename T>
+void get_charnock_roughness(const T h, const T U,
+    const T kappa, 
+    const T h_charnock, const T c1_charnock, const T c2_charnock,
+    T &z0_m, T &u_dyn0, 
+    const int maxiters)
+{
+    T Uc, a, b, c, c_min, f;
+
+    Uc = U;
+    a = 0.0;
+    b = 25.0;
+    c_min = log(h_charnock) / kappa;
+
+    for (int i = 0; i < maxiters; i++)
+    {
+        f = c1_charnock - 2.0 * log(Uc);
+        for (int j = 0; j < maxiters; j++)
+        {
+            c = (f + 2.0 * log(b)) / kappa;
+            if (U <= 8.0e0) 
+                a = log(1.0 + c2_charnock * ( pow(b / Uc, 3) ) ) / kappa;
+            c = std::max(c - a, c_min);
+            b = c;
+        }
+        z0_m = h_charnock * exp(-c * kappa);
+        printf("%f and 0.000015e0\n", z0_m);
+        z0_m = std::max(z0_m, T(0.000015e0));
+        Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
+    }
+    
+    u_dyn0 = Uc / c;
+}
+
+template void get_charnock_roughness(const float h, const float U,
+    const float kappa, 
+    const float h_charnock, const float c1_charnock, const float c2_charnock,
+    float &z0_m, float &u_dyn0, 
+    const int maxiters);
+template void get_charnock_roughness(const double h, const double U,
+    const double kappa, 
+    const double h_charnock, const double c1_charnock, const double c2_charnock,
+    double &z0_m, double &u_dyn0, 
+    const int maxiters);
+
+template<typename T>
+void get_convection_lim(const T h0_m, const T h0_t, const T B,
+    const T Pr_t_inf_inv, const T Pr_t_0_inv,
+    const T alpha_h, const T alpha_m, const T alpha_h_fix,
+    T &zeta_lim, T &Rib_lim, T &f_m_lim, T &f_h_lim)
+{
+    T psi_m, psi_h, f_m, f_h, c;
+
+    c = pow(Pr_t_inf_inv / Pr_t_0_inv, 4);
+    zeta_lim = (2.0 * alpha_h - c * alpha_m - sqrt( (c * alpha_m)*(c * alpha_m) + 4.0 * c * alpha_h * (alpha_h - alpha_m))) / (2.0 * alpha_h*alpha_h);
+
+    f_m_lim = pow(1.0 - alpha_m * zeta_lim, 0.25);
+    f_h_lim = sqrt(1.0 - alpha_h * zeta_lim);
+
+    f_m = zeta_lim / h0_m;
+    f_h = zeta_lim / h0_t;
+    if (fabs(B) < 1.0e-10) f_h = f_m;
+
+    f_m = pow(1.0 - alpha_m * f_m, 0.25);
+    f_h = sqrt(1.0 - alpha_h_fix * f_h);
+
+    psi_m = 2.0 * (atan(f_m_lim) - atan(f_m)) + log((f_m_lim - 1.0) * (f_m + 1.0)/((f_m_lim + 1.0) * (f_m - 1.0)));
+    psi_h = log((f_h_lim - 1.0) * (f_h + 1.0)/((f_h_lim + 1.0) * (f_h - 1.0))) / Pr_t_0_inv;
+
+    Rib_lim = zeta_lim * psi_h / (psi_m * psi_m);
+}
+
+template void get_convection_lim(const float h0_m, const float h0_t, const float B,
+    const float Pr_t_inf_inv, const float Pr_t_0_inv,
+    const float alpha_h, const float alpha_m, const float alpha_h_fix,
+    float &zeta_lim, float &Rib_lim, float &f_m_lim, float &f_h_lim);
+template void get_convection_lim(const double h0_m, const double h0_t, const double B,
+    const double Pr_t_inf_inv, const double Pr_t_0_inv,
+    const double alpha_h, const double alpha_m, const double alpha_h_fix,
+    double &zeta_lim, double &Rib_lim, double &f_m_lim, double &f_h_lim);
+
+template<typename T>
+void get_psi_stable(const T Rib, const T h0_m, const T h0_t, const T B,
+    const T Pr_t_0_inv, const T beta_m,
+    T &psi_m, T &psi_h, T &zeta)
+{
+    T Rib_coeff, psi0_m, psi0_h, phi, c;
+    
+    psi0_m = log(h0_m);
+    psi0_h = B / psi0_m;
+
+    Rib_coeff = beta_m * Rib;
+    c = (psi0_h + 1.0) / Pr_t_0_inv - 2.0 * Rib_coeff;
+    zeta = psi0_m * (sqrt(c*c + 4.0 * Rib_coeff * (1.0 - Rib_coeff)) - c) / (2.0 * beta_m * (1.0 - Rib_coeff));
+
+    phi = beta_m * zeta;
+
+    psi_m = psi0_m + phi;
+    psi_h = (psi0_m + B) / Pr_t_0_inv + phi;
+}
+
+template void get_psi_stable(const float Rib, const float h0_m, const float h0_t, const float B,
+    const float Pr_t_0_inv, const float beta_m,
+    float &psi_m, float &psi_h, float &zeta);
+template void get_psi_stable(const double Rib, const double h0_m, const double h0_t, const double B,
+    const double Pr_t_0_inv, const double beta_m,
+    double &psi_m, double &psi_h, double &zeta);
+
+template<typename T>
+void get_psi_convection(const T Rib, const T h0_m, const T h0_t, const T B, 
+    const T zeta_conv_lim, const T f_m_conv_lim, const T f_h_conv_lim,
+    const T Pr_t_0_inv,
+    const T alpha_h, const T alpha_m, const T alpha_h_fix,
+    T &psi_m, T &psi_h, T &zeta, 
+    const int maxiters)
+{
+    T zeta0_m, zeta0_h, f0_m, f0_h, p_m, p_h, a_m, a_h, c_lim, f;
+
+    p_m = 2.0 * atan(f_m_conv_lim) + log((f_m_conv_lim - 1.0) / (f_m_conv_lim + 1.0));
+    p_h = log((f_h_conv_lim - 1.0) / (f_h_conv_lim + 1.0));
+
+    zeta = zeta_conv_lim;
+
+    for (int i = 1; i <= maxiters + 1; i++)
+    {
+        zeta0_m = zeta / h0_m;
+        zeta0_h = zeta / h0_t;
+        if (fabs(B) < 1.0e-10) 
+            zeta0_h = zeta0_m;
+
+        f0_m = pow(1.0 - alpha_m * zeta0_m, 0.25);
+        f0_h = sqrt(1.0 - alpha_h_fix * zeta0_h);
+
+        a_m = -2.0*atan(f0_m) + log((f0_m + 1.0)/(f0_m - 1.0));
+        a_h = log((f0_h + 1.0)/(f0_h - 1.0));
+
+        c_lim = pow(zeta_conv_lim / zeta, 1.0 / 3.0);
+        f = 3.0 * (1.0 - c_lim);
+
+        psi_m = f / f_m_conv_lim + p_m + a_m;
+        psi_h = (f / f_h_conv_lim + p_h + a_h) / Pr_t_0_inv;
+
+        if (i == maxiters + 1) 
+            break;
+
+        zeta = Rib * psi_m * psi_m / psi_h;
+    }
+}
+
+template void get_psi_convection(const float Rib, const float h0_m, const float h0_t, const float B, 
+    const float zeta_conv_lim, const float f_m_conv_lim, const float f_h_conv_lim,
+    const float Pr_t_0_inv,
+    const float alpha_h, const float alpha_m, const float alpha_h_fix,
+    float &psi_m, float &psi_h, float &zeta, 
+    const int maxiters);
+template void get_psi_convection(const double Rib, const double h0_m, const double h0_t, const double B, 
+    const double zeta_conv_lim, const double f_m_conv_lim, const double f_h_conv_lim,
+    const double Pr_t_0_inv,
+    const double alpha_h, const double alpha_m, const double alpha_h_fix,
+    double &psi_m, double &psi_h, double &zeta, 
+    const int maxiters);
+
+template<typename T>
+void get_psi_neutral(const T h0_m, const T h0_t, const T B,
+    const T Pr_t_0_inv,
+    T &psi_m, T &psi_h, T &zeta)
+{
+    zeta = 0.0;
+    psi_m = log(h0_m);
+    psi_h = log(h0_t) / Pr_t_0_inv;
+    if (fabs(B) < 1.0e-10) 
+        psi_h = psi_m / Pr_t_0_inv;
+}
+
+template void get_psi_neutral(const float h0_m, const float h0_t, const float B,
+    const float Pr_t_0_inv,
+    float &psi_m, float &psi_h, float &zeta);
+template void get_psi_neutral(const double h0_m, const double h0_t, const double B,
+    const double Pr_t_0_inv,
+    double &psi_m, double &psi_h, double &zeta);
+
+template<typename T>
+void get_psi_semi_convection(const T Rib, const T h0_m, const T h0_t, const T B, 
+    const T Pr_t_0_inv,
+    const T alpha_m, const T alpha_h_fix,
+    T &psi_m, T &psi_h, T &zeta,
+    const int maxiters)
+{
+    T zeta0_m, zeta0_h, f0_m, f0_h, f_m, f_h;
+
+    psi_m = log(h0_m);
+    psi_h = log(h0_t);
+
+    if (fabs(B) < 1.0e-10) 
+        psi_h = psi_m;
+
+    zeta = Rib * Pr_t_0_inv * psi_m * psi_m / psi_h;
+
+    for (int i = 1; i <= maxiters + 1; i++)
+    {
+        zeta0_m = zeta / h0_m;
+        zeta0_h = zeta / h0_t;
+        if (fabs(B) < 1.0e-10) 
+            zeta0_h = zeta0_m;
+
+        f_m = pow(1.0 - alpha_m * zeta, 0.25e0);
+        f_h = sqrt(1.0 - alpha_h_fix * zeta);
+
+        f0_m = pow(1.0 - alpha_m * zeta0_m, 0.25e0);
+        f0_h = sqrt(1.0 - alpha_h_fix * zeta0_h);
+
+        f0_m = std::max(f0_m, T(1.000001e0));
+        f0_h = std::max(f0_h, T(1.000001e0));
+
+        psi_m = log((f_m - 1.0e0)*(f0_m + 1.0e0)/((f_m + 1.0e0)*(f0_m - 1.0e0))) + 2.0e0*(atan(f_m) - atan(f0_m));
+        psi_h = log((f_h - 1.0e0)*(f0_h + 1.0e0)/((f_h + 1.0e0)*(f0_h - 1.0e0))) / Pr_t_0_inv;
+
+        if (i == maxiters + 1) 
+            break;
+
+        zeta = Rib * psi_m * psi_m / psi_h;
+    }
+}
+
+template void get_psi_semi_convection(const float Rib, const float h0_m, const float h0_t, const float B, 
+    const float Pr_t_0_inv,
+    const float alpha_m, const float alpha_h_fix,
+    float &psi_m, float &psi_h, float &zeta,
+    const int maxiters);
+template void get_psi_semi_convection(const double Rib, const double h0_m, const double h0_t, const double B, 
+    const double Pr_t_0_inv,
+    const double alpha_m, const double alpha_h_fix,
+    double &psi_m, double &psi_h, double &zeta,
+    const int maxiters);
+
+template<typename T>
+void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, const T *h_, const T *in_z0_m_,
+    T *zeta_, T *Rib_, T *Re_, T *B_, T *z0_m_, T *z0_t_, T *Rib_conv_lim_, T *Cm_, T *Ct_, T *Km_, T *Pr_t_inv_,
+    const T kappa, const T Pr_t_0_inv, const T Pr_t_inf_inv, 
+    const T alpha_m, const T alpha_h, const T alpha_h_fix, 
+    const T beta_m, const T beta_h, const T Rib_max, const T Re_rough_min, 
+    const T B1_rough, const T B2_rough,
+    const T B_max_land, const T B_max_ocean, const T B_max_lake,
+    const T gamma_c, const T Re_visc_min,
+    const T Pr_m, const T nu_air, const T g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size)
+{
+    T h, U, dT, Tsemi, dQ, z0_m;
+    T Re, z0_t, B, h0_m, h0_t, u_dyn0, zeta, Rib, zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim, psi_m, psi_h, phi_m, phi_h, Km, Pr_t_inv, Cm, Ct;
+    int surface_type;
+    T fval;
+
+    const T B3_rough = kappa * Pr_m, B4_rough =( 0.14 * ( pow(30.0, B2_rough) ) ) * (pow(Pr_m, 0.8));
+    const T h_charnock = 10.0, c1_charnock = log(h_charnock * (g / gamma_c)), c2_charnock = Re_visc_min * nu_air * c1_charnock;
+
+    for (int step = 0; step < grid_size; step++)
+    {
+        U = U_[step];
+        Tsemi = Tsemi_[step];
+        dT = dT_[step];
+        dQ = dQ_[step];
+        h = h_[step];
+        z0_m = in_z0_m_[step];
+
+        if (z0_m < 0.0) surface_type = 0;
+        else            surface_type = 1;
+
+        if (surface_type == 0)
+        {
+            get_charnock_roughness(h, U, kappa, h_charnock, c1_charnock, c2_charnock, z0_m, u_dyn0, maxiters_charnock);
+            h0_m = h / z0_m;
+        }
+        if (surface_type == 1) 
+        {
+            h0_m = h / z0_m;
+            u_dyn0 = U * kappa / log(h0_m);
+        }
+
+        Re = u_dyn0 * z0_m / nu_air;
+
+        if(Re <= Re_rough_min) B = B1_rough * log(B3_rough * Re) + B2_rough;
+        else                   B = B4_rough * (pow(Re, B2_rough));
+
+        if (surface_type == 0)  B = std::min(B, B_max_ocean);
+        if (surface_type == 1)   B = std::min(B, B_max_land);
+        if (surface_type == 2)   B = std::min(B, B_max_lake);
+
+        z0_t = z0_m / exp(B);
+        h0_t = h / z0_t;
+        Rib = (g / Tsemi) * h * (dT + 0.61e0 * Tsemi * dQ) / (U*U);
+
+        get_convection_lim(h0_m, h0_t, B, Pr_t_inf_inv, Pr_t_0_inv, alpha_h, alpha_m, alpha_h_fix, zeta_conv_lim, Rib_conv_lim, f_m_conv_lim, f_h_conv_lim);
+
+
+        if (Rib > 0.0) 
+        {
+            Rib = std::min(Rib, Rib_max);
+            get_psi_stable(Rib, h0_m, h0_t, B, Pr_t_0_inv, beta_m, psi_m, psi_h, zeta);
+
+            if(step == 353)
+                printf("get_psi_stable zeta = %f\n", zeta);
+
+            fval = beta_m * zeta;
+            phi_m = 1.0 + fval;
+            phi_h = 1.0/Pr_t_0_inv + fval;
+        }
+
+        else if (Rib < Rib_conv_lim) 
+        {
+            get_psi_convection(Rib, h0_m, h0_t, B, zeta_conv_lim, f_m_conv_lim, f_h_conv_lim, Pr_t_0_inv, alpha_h, alpha_m, alpha_h_fix, psi_m, psi_h, zeta, maxiters_convection);
+
+            fval = pow(zeta_conv_lim / zeta, 1.0/3.0);
+            phi_m = fval / f_m_conv_lim;
+            phi_h = fval / (Pr_t_0_inv * f_h_conv_lim);
+        }
+        else if (Rib > -0.001) 
+        {
+            get_psi_neutral(h0_m, h0_t, B, Pr_t_0_inv, psi_m, psi_h, zeta);
+        
+            phi_m = 1.0;
+            phi_h = 1.0 / Pr_t_0_inv;
+        }
+        else
+        {
+            get_psi_semi_convection(Rib, h0_m, h0_t, B, Pr_t_0_inv, alpha_m, alpha_h_fix, psi_m, psi_h, zeta, maxiters_convection);
+            
+            phi_m = pow(1.0 - alpha_m * zeta, -0.25);
+            phi_h = 1.0 / (Pr_t_0_inv * sqrt(1.0 - alpha_h_fix * zeta));
+        }
+
+        Cm = kappa / psi_m;
+        Ct = kappa / psi_h;
+
+        Km = kappa * Cm * U * h / phi_m;
+        Pr_t_inv = phi_m / phi_h;
+
+        zeta_[step]         = zeta;
+        Rib_[step]          = Rib;
+        Re_[step]           = Re;
+        B_[step]            = B;
+        z0_m_[step]         = z0_m;
+        z0_t_[step]         = z0_t;
+        Rib_conv_lim_[step] = Rib_conv_lim;
+        Cm_[step]           = Cm;
+        Ct_[step]           = Ct;
+        Km_[step]           = Km;
+        Pr_t_inv_[step]     = Pr_t_inv;
+    }
+}
+
+template void compute_flux_cpu(const float *U, const float *dt, const float *T_semi, const float *dq, const float *H, const float *in_z0_m,
+    float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+    const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
+    const float alpha_m, const float alpha_h, const float alpha_h_fix, 
+    const float beta_m, const float beta_h, const float Rib_max, const float Re_rough_min, 
+    const float B1_rough, const float B2_rough,
+    const float B_max_land, const float B_max_ocean, const float B_max_lake,
+    const float gamma_c, const float Re_visc_min,
+    const float Pr_m, const float nu_air, const float g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
+template void compute_flux_cpu(const double *U, const double *dt, const double *T_semi, const double *dq, const double *H, const double *in_z0_m, 
+    double *zeta_, double *Rib_, double *Re_, double *B_, double *z0_m_, double *z0_t_, double *Rib_conv_lim_, double *Cm_, double *Ct_, double *Km_, double *Pr_t_inv_,
+    const double kappa, const double Pr_t_0_inv, const double Pr_t_inf_inv, 
+    const double alpha_m, const double alpha_h, const double alpha_h_fix, 
+    const double beta_m, const double beta_h, const double Rib_max, const double Re_rough_min, 
+    const double B1_rough, const double B2_rough,
+    const double B_max_land, const double B_max_ocean, const double B_max_lake,
+    const double gamma_c, const double Re_visc_min,
+    const double Pr_m, const double nu_air, const double g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size);
\ No newline at end of file
diff --git a/srcCXX/SubFunctions.cpp b/srcCXX/SubFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..010f45af503e0dffa05f15d05f50a09a807441e0
--- /dev/null
+++ b/srcCXX/SubFunctions.cpp
@@ -0,0 +1,38 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "../includeCXX/SubFunctions.h"
+#include "../includeCXX/Flux.h"
+#include <vector>
+
+#ifdef INCLUDE_CUDA
+    Flux<float, MemType::GPU, MemType::CPU> F;
+#else
+    Flux<float, MemType::CPU, MemType::CPU> F;
+#endif
+
+// -------------------------------------------------------------------------- //
+void surf_flux_CXX (float *U_, float *dT_, float *Tsemi_, float *dQ_, float *h_, float *in_z0_m_,
+    float *zeta_, float *Rib_, float *Re_, float *B_, float *z0_m_, float *z0_t_, float *Rib_conv_lim_, float *Cm_, float *Ct_, float *Km_, float *Pr_t_inv_,
+    const float kappa, const float Pr_t_0_inv, const float Pr_t_inf_inv, 
+    const float alpha_m, const float alpha_h, const float alpha_h_fix, 
+    const float beta_m, const float beta_h, const float Rib_max, const float Re_rough_min, 
+    const float B1_rough, const float B2_rough,
+    const float B_max_land, const float B_max_ocean, const float B_max_lake,
+    const float gamma_c, const float Re_visc_min,
+    const float Pr_m, const float nu_air, const float g, 
+    const int maxiters_charnock, const int maxiters_convection, 
+    const int grid_size)
+{
+    F.set_params(grid_size, kappa, Pr_t_0_inv, Pr_t_inf_inv, 
+    alpha_m, alpha_h, alpha_h_fix, 
+    beta_m, beta_h, Rib_max, Re_rough_min, 
+    B1_rough, B2_rough,
+    B_max_land, B_max_ocean, B_max_lake,
+    gamma_c, Re_visc_min,
+    Pr_m, nu_air, g);
+
+    F.set_data(U_, dT_, Tsemi_, dQ_, h_, in_z0_m_,
+    zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_);
+
+    F.compute_flux(zeta_, Rib_, Re_, B_, z0_m_, z0_t_, Rib_conv_lim_, Cm_, Ct_, Km_, Pr_t_inv_, maxiters_charnock, maxiters_convection);
+}
\ No newline at end of file
diff --git a/srcF/FCWrapper.F90 b/srcF/FCWrapper.F90
new file mode 100644
index 0000000000000000000000000000000000000000..b946e9756a99705fe7fb1d14c5d7ce4b238e83ee
--- /dev/null
+++ b/srcF/FCWrapper.F90
@@ -0,0 +1,29 @@
+module C_FUNC
+    INTERFACE
+#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX)
+        SUBROUTINE surf_flux(U, dT, Tsemi, dQ, h, in_z0_m, & 
+            zeta, Rib, Re, B, z0_m, z0_t, Rib_conv_lim, &
+            Cm, Ct, Km, Prt_inv, & 
+            kappa, Pr_t_0_inv, Pr_t_inf_inv, & 
+            alpha_m, alpha_h, alpha_h_fix, & 
+            beta_m, beta_h, Rib_max, Re_rough_min, & 
+            B1_rough, B2_rough, & 
+            B_max_land, B_max_ocean, B_max_lake, & 
+            gamma_c, Re_visc_min, & 
+            Pr_m, nu_air, g, & 
+            maxiters_charnock, maxiters_convection, & 
+            grid_size) BIND(C)
+            USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT
+            USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_FLOAT
+            IMPLICIT NONE
+            INTEGER(C_INT) :: grid_size, maxiters_charnock, maxiters_convection
+            REAL(C_FLOAT)  :: kappa, Pr_t_0_inv, Pr_t_inf_inv, alpha_m, alpha_h, alpha_h_fix, & 
+            beta_m, beta_h, Rib_max, Re_rough_min, B1_rough, B2_rough, B_max_land, B_max_ocean, & 
+            B_max_lake, gamma_c, Re_visc_min, Pr_m, nu_air, g
+            REAL(C_FLOAT), dimension(grid_size) :: U, dT, Tsemi, dQ, h, in_z0_m, zeta, Rib, Re, &
+            Rib_conv_lim, z0_m, z0_t, B, Cm, Ct, Km, Prt_inv
+        END SUBROUTINE surf_flux
+#endif
+    END INTERFACE
+end module C_FUNC
+
diff --git a/srcF/sfx_esm.f90 b/srcF/sfx_esm.f90
index 8910c976bd7d61714c70492a4567670b88dafd49..cb4d4889908e91a683beec5be1eb4fc0103d21dc 100644
--- a/srcF/sfx_esm.f90
+++ b/srcF/sfx_esm.f90
@@ -11,6 +11,9 @@ module sfx_esm
     use sfx_data
     use sfx_surface
     use sfx_esm_param
+#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX)
+    use C_FUNC
+#endif
     ! --------------------------------------------------------------------------------
 
     ! directives list
@@ -51,7 +54,22 @@ contains
         type (sfxDataType) sfx_cell
         integer i
         ! ----------------------------------------------------------------------------
-
+#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX)
+        write(*, *) 'CXX'
+        call surf_flux(meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, &
+        sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t,  &
+        sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, &
+        kappa, Pr_t_0_inv, Pr_t_inf_inv, & 
+        alpha_m, alpha_h, alpha_h_fix, & 
+        beta_m, beta_h, Rib_max, Re_rough_min, & 
+        B1_rough, B2_rough, & 
+        B_max_land, B_max_ocean, B_max_lake, & 
+        gamma_c, Re_visc_min, & 
+        Pr_m, nu_air, g, & 
+        numerics%maxiters_charnock, numerics%maxiters_convection, & 
+        n)
+#else
+        write(*, *) 'FORTRAN'
         do i = 1, n
 #ifdef SFX_FORCE_DEPRECATED_ESM_CODE
 #else
@@ -65,6 +83,7 @@ contains
             call push_sfx_data(sfx, sfx_cell, i)
 #endif
         end do
+#endif
 
     end subroutine get_surface_fluxes_vec
     ! --------------------------------------------------------------------------------
diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90
index 628626fb16c0899c50855a90e14250ec76c0110a..f5952aac053b0313646d5c6c69ca0ac7a6bb68bb 100644
--- a/srcF/sfx_surface.f90
+++ b/srcF/sfx_surface.f90
@@ -11,7 +11,6 @@ module sfx_surface
     ! directives list
     ! --------------------------------------------------------------------------------
     implicit none
-    private
     ! --------------------------------------------------------------------------------
 
     ! public interface
@@ -27,12 +26,13 @@ module sfx_surface
     integer, public, parameter :: surface_lake = 2      !> lake surface
 
     ! --------------------------------------------------------------------------------
-    real, parameter :: kappa = 0.40         !> von Karman constant [n/d]
+    real, parameter, private :: kappa = 0.40         !> von Karman constant [n/d]
     ! --------------------------------------------------------------------------------
 
     !> Charnock parameters
     !>     z0 = Re_visc_min * (nu / u_dyn) + gamma_c * (u_dyn^2 / g)
     ! --------------------------------------------------------------------------------
+    
     real, parameter :: gamma_c = 0.0144
     real, parameter :: Re_visc_min = 0.111