From b2f001061c738f0704f7cc70b42c48d01af7667f Mon Sep 17 00:00:00 2001
From: Lizzzka007 <gashchuk2011@mail.ru>
Date: Mon, 18 Dec 2023 13:52:10 +0300
Subject: [PATCH] .

---
 CMakeLists.txt                  | 68 ++++++++++++++++----------------
 includeCU/MemoryProcessing.cuh  | 21 ++++++++++
 includeCXX/MemoryProcessing.h   | 21 ++++++++++
 includeCXX/TemplateParameters.h |  3 ++
 {srcF => includeF}/sfx_def.fi   |  0
 srcCU/MemoryProcessing.cu       | 70 +++++++++++++++++++++++++++++++++
 srcCXX/Flux.cpp                 |  3 +-
 srcCXX/FluxComputeFunc.cpp      |  4 --
 srcCXX/MemoryProcessing.cpp     | 57 +++++++++++++++++++++++++++
 srcF/sfx_esm.f90                |  4 +-
 srcF/sfx_log.f90                |  2 +-
 srcF/sfx_surface.f90            |  2 +-
 12 files changed, 212 insertions(+), 43 deletions(-)
 create mode 100644 includeCU/MemoryProcessing.cuh
 create mode 100644 includeCXX/MemoryProcessing.h
 create mode 100644 includeCXX/TemplateParameters.h
 rename {srcF => includeF}/sfx_def.fi (100%)
 create mode 100644 srcCU/MemoryProcessing.cu
 create mode 100644 srcCXX/MemoryProcessing.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 478ed3c..911a3c8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,18 +6,6 @@ option(INCLUDE_CXX  "CXX build in mode"    OFF)
 project(INMCM_sfx)
 enable_language(Fortran)
 
-if(INCLUDE_CXX OR INCLUDE_CUDA)
-    set(MEMPROC_GIT http://tesla.parallel.ru/Lizzzka007/memory_processing.git)
-    include(FetchContent)
-    FetchContent_Declare(memory_processing                                   
-    GIT_REPOSITORY ${MEMPROC_GIT}                                    
-    GIT_TAG origin/main
-    )
-    FetchContent_MakeAvailable(memory_processing)                              
-    add_library(memory_processing INTERFACE)
-    target_compile_definitions(memory_processing INTERFACE INCLUDE_CUDA=${INCLUDE_CUDA})
-endif(INCLUDE_CXX OR INCLUDE_CUDA)
-
 if(INCLUDE_CXX)
     set(RUN_MACRO -DINCLUDE_CXX)
 endif(INCLUDE_CXX)
@@ -39,7 +27,6 @@ endif(INCLUDE_CXX)
 set(SOURCES_F 
     srcF/sfx_data.f90
     srcF/sfx_common.f90
-    srcF/sfx_def.fi
     srcF/sfx_esm.f90 
     srcF/sfx_esm_param.f90
     srcF/sfx_log.f90
@@ -50,45 +37,60 @@ set(SOURCES_F
     srcF/FCWrapper.F90
 )
 
+set(HEADERS_F 
+    includeF/sfx_def.fi
+)
+
 if(INCLUDE_CXX)
     set(SOURCES_C 
         srcC/SubFunctionsWrapper.c
     )
 
     set(SOURCES_CXX 
-        srcCXX/Flux.cpp
-        srcCXX/FluxComputeFunc.cpp
-        srcCXX/SubFunctions.cpp
+            srcCXX/Flux.cpp
+            srcCXX/FluxComputeFunc.cpp
+            srcCXX/SubFunctions.cpp
     )
     set(HEADERS_CXX 
-        includeCXX/Flux.h
-        includeCXX/FluxComputeFunc.h
-        includeCXX/SubFunctions.h
+            includeCXX/Flux.h
+            includeCXX/FluxComputeFunc.h
+            includeCXX/SubFunctions.h
         )
 endif(INCLUDE_CXX)
 
 if(INCLUDE_CUDA)
     set(SOURCES_CU 
-    srcCU/Flux.cu
-    srcCU/FluxComputeFunc.cu
+        srcCU/Flux.cu
+        srcCU/FluxComputeFunc.cu
     )
     set(HEADERS_CU
-    includeCU/Flux.cuh
-    includeCXX/FluxComputeFunc.cuh
+        includeCU/Flux.cuh
+        includeCU/FluxComputeFunc.cuh
     )
 endif(INCLUDE_CUDA)
 
-set(SOURCES ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${SOURCES_F})
+if(INCLUDE_CXX OR INCLUDE_CUDA)
+    set(MEMPROC_SOURCES_CXX 
+        srcCXX/MemoryProcessing.cpp
+    )
+    set(MEMPROC_HEADERS_CXX 
+        includeCXX/MemoryProcessing.h
+        includeCXX/TemplateParameters.h
+    )
 
-set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ")
-set(CMAKE_CXX_FLAGS " -g ")
-set(CMAKE_C_FLAGS " -g ")
+    if(INCLUDE_CUDA)
+        set(MEMPROC_SOURCES_CU 
+            srcCU/MemoryProcessing.cu
+        )
+        set(MEMPROC_HEADERS_CU
+            includeCU/MemoryProcessing.cuh
+        )
+    endif(INCLUDE_CUDA)
+endif(INCLUDE_CXX OR INCLUDE_CUDA)
 
+set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F})
+
+set(CMAKE_Fortran_FLAGS " -cpp ")
 add_executable(drag ${SOURCES})
 add_definitions(${RUN_MACRO})
-set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran)
-
-if(INCLUDE_CXX OR INCLUDE_CUDA)
-    target_include_directories(drag PUBLIC ${memory_processing_SOURCE_DIR}/include)
-    target_link_libraries(drag memproc)
-endif(INCLUDE_CXX OR INCLUDE_CUDA)
\ No newline at end of file
+set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran)
\ No newline at end of file
diff --git a/includeCU/MemoryProcessing.cuh b/includeCU/MemoryProcessing.cuh
new file mode 100644
index 0000000..2ef88c8
--- /dev/null
+++ b/includeCU/MemoryProcessing.cuh
@@ -0,0 +1,21 @@
+#pragma once
+#include "TemplateParameters.h"
+#include <cstddef>
+
+namespace memproc
+{
+    template <MemType memtype>
+    bool alloc(void *&array, const size_t new_size);
+
+    template <MemType memtype>
+    bool realloc(void *&array, size_t &allocated_size, const size_t new_size);
+
+    template<MemType memtype>
+    bool dealloc(void *&array, size_t &allocated_size);
+
+    template<MemType memtype>
+    bool dealloc(void *&array);
+
+    template <MemType dst_memtype, MemType src_memtype>
+    bool memcopy(void *dst, const void* src, const size_t copy_elem_size);
+}
\ No newline at end of file
diff --git a/includeCXX/MemoryProcessing.h b/includeCXX/MemoryProcessing.h
new file mode 100644
index 0000000..b3db1fe
--- /dev/null
+++ b/includeCXX/MemoryProcessing.h
@@ -0,0 +1,21 @@
+#pragma once
+#include "TemplateParameters.h"
+#include <cstddef>
+
+namespace memproc
+{
+    template <MemType memtype>
+    bool alloc(void *&array, const size_t new_size);
+
+    template <MemType memtype>
+    bool realloc(void *&array, size_t &allocated_size, const size_t new_size);
+
+    template<MemType memtype>
+    bool dealloc(void *&array, size_t &allocated_size);
+
+    template<MemType memtype>
+    bool dealloc(void *&array);
+
+    template <MemType dst_memtype, MemType src_memtype>
+    bool memcopy(void *dst, const void* src, const size_t copy_elem_size);
+}
diff --git a/includeCXX/TemplateParameters.h b/includeCXX/TemplateParameters.h
new file mode 100644
index 0000000..b63e394
--- /dev/null
+++ b/includeCXX/TemplateParameters.h
@@ -0,0 +1,3 @@
+#pragma once
+
+enum MemType {CPU, GPU};
\ No newline at end of file
diff --git a/srcF/sfx_def.fi b/includeF/sfx_def.fi
similarity index 100%
rename from srcF/sfx_def.fi
rename to includeF/sfx_def.fi
diff --git a/srcCU/MemoryProcessing.cu b/srcCU/MemoryProcessing.cu
new file mode 100644
index 0000000..4c866ed
--- /dev/null
+++ b/srcCU/MemoryProcessing.cu
@@ -0,0 +1,70 @@
+#include "../include/MemoryProcessing.cuh"
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace memproc
+{
+    template<>
+    bool dealloc<MemType::GPU>(void *&array, size_t &allocated_size)
+    {
+        if(allocated_size > 0)
+        {
+            cudaFree(array);
+            allocated_size = 0;
+        }
+
+        return true;
+    }
+
+    template<>
+    bool dealloc<MemType::GPU>(void *&array)
+    {
+        cudaFree(array);
+        return true;
+    }
+
+    template <>
+    bool alloc<MemType::GPU>(void *&array, const size_t new_size)
+    {
+        cudaMalloc ( (void **)&array, new_size);
+        cudaMemset(array, 0, new_size);
+
+        return true;
+    }
+
+    template <>
+    bool realloc<MemType::GPU>(void *&array, size_t &allocated_size, const size_t new_size)
+    {
+        if(new_size > allocated_size)
+        {
+            if(allocated_size > 0) dealloc<MemType::GPU>(array, allocated_size);
+            allocated_size = new_size;
+            cudaMalloc ( (void **)&array, new_size);
+            cudaMemset(array, 0, new_size);
+        }
+
+        return true;
+    }
+
+    template <>
+    bool memcopy<MemType::GPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size)
+    {
+        cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyHostToDevice);
+
+        return true;
+    }
+
+    template <>
+    bool memcopy<MemType::CPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size)
+    {
+        cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToHost);
+        return true;
+    }
+
+    template <>
+    bool memcopy<MemType::GPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size)
+    {
+        cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToDevice);
+        return true;
+    }
+}
\ No newline at end of file
diff --git a/srcCXX/Flux.cpp b/srcCXX/Flux.cpp
index 80dfaf5..8157b1a 100644
--- a/srcCXX/Flux.cpp
+++ b/srcCXX/Flux.cpp
@@ -4,9 +4,10 @@
 #include "../includeCXX/FluxComputeFunc.h"
 #ifdef INCLUDE_CUDA
     #include "../includeCU/Flux.cuh"
+    #include "../includeCU/MemoryProcessing.h"
 #endif
 
-#include "MemoryProcessing.h"
+#include "../includeCXX/MemoryProcessing.h"
 
 template<typename T, MemType RunMem, MemType memIn>
 Flux<T, RunMem, memIn>::Flux()
diff --git a/srcCXX/FluxComputeFunc.cpp b/srcCXX/FluxComputeFunc.cpp
index 7e1dbd1..15fbb6c 100644
--- a/srcCXX/FluxComputeFunc.cpp
+++ b/srcCXX/FluxComputeFunc.cpp
@@ -28,7 +28,6 @@ void get_charnock_roughness(const T h, const T U,
             b = c;
         }
         z0_m = h_charnock * exp(-c * kappa);
-        printf("%f and 0.000015e0\n", z0_m);
         z0_m = std::max(z0_m, T(0.000015e0));
         Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
     }
@@ -302,9 +301,6 @@ void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_,
             Rib = std::min(Rib, Rib_max);
             get_psi_stable(Rib, h0_m, h0_t, B, Pr_t_0_inv, beta_m, psi_m, psi_h, zeta);
 
-            if(step == 353)
-                printf("get_psi_stable zeta = %f\n", zeta);
-
             fval = beta_m * zeta;
             phi_m = 1.0 + fval;
             phi_h = 1.0/Pr_t_0_inv + fval;
diff --git a/srcCXX/MemoryProcessing.cpp b/srcCXX/MemoryProcessing.cpp
new file mode 100644
index 0000000..d387605
--- /dev/null
+++ b/srcCXX/MemoryProcessing.cpp
@@ -0,0 +1,57 @@
+#include "../includeCXX/MemoryProcessing.h"
+#include <cstdlib>
+#include <cstring>
+
+namespace memproc
+{
+    template<>
+    bool dealloc<MemType::CPU>(void *&array, size_t &allocated_size)
+    {
+        if(allocated_size > 0)
+        {
+            free(array);
+            allocated_size = 0;
+        }
+
+        return true;
+    }
+
+    template<>
+    bool dealloc<MemType::CPU>(void *&array)
+    {
+        free(array);
+        return true;
+    }
+
+    template <>
+    bool alloc<MemType::CPU>(void *&array, const size_t new_size)
+    {
+        array = malloc(new_size);
+        memset(array, 0, new_size);
+
+        return true;
+    }
+
+
+    template <>
+    bool realloc<MemType::CPU>(void *&array, size_t &allocated_size, const size_t new_size)
+    {
+        if(new_size > allocated_size)
+        {
+            if(allocated_size > 0) dealloc<MemType::CPU>(array, allocated_size);
+            allocated_size = new_size;
+            array = malloc(new_size);
+            memset(array, 0, new_size);
+        }
+
+        return true;
+    }
+
+    template <>
+    bool memcopy<MemType::CPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size)
+    {
+        memcpy(dst, src, copy_elem_size);
+
+        return true;
+    }
+}
\ No newline at end of file
diff --git a/srcF/sfx_esm.f90 b/srcF/sfx_esm.f90
index cb4d488..8b6c68f 100644
--- a/srcF/sfx_esm.f90
+++ b/srcF/sfx_esm.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
 
 module sfx_esm
     !> @brief main Earth System Model surface flux module
@@ -55,7 +55,6 @@ contains
         integer i
         ! ----------------------------------------------------------------------------
 #if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX)
-        write(*, *) 'CXX'
         call surf_flux(meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, &
         sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t,  &
         sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, &
@@ -69,7 +68,6 @@ contains
         numerics%maxiters_charnock, numerics%maxiters_convection, & 
         n)
 #else
-        write(*, *) 'FORTRAN'
         do i = 1, n
 #ifdef SFX_FORCE_DEPRECATED_ESM_CODE
 #else
diff --git a/srcF/sfx_log.f90 b/srcF/sfx_log.f90
index 39f8674..592d159 100644
--- a/srcF/sfx_log.f90
+++ b/srcF/sfx_log.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
 
 module sfx_log
     !> @brief simple log-roughness surface flux module
diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90
index f5952aa..33e7159 100644
--- a/srcF/sfx_surface.f90
+++ b/srcF/sfx_surface.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
 
 module sfx_surface
     !> @brief surface roughness parameterizations
-- 
GitLab