From b2f001061c738f0704f7cc70b42c48d01af7667f Mon Sep 17 00:00:00 2001
From: Lizzzka007 <gashchuk2011@mail.ru>
Date: Mon, 18 Dec 2023 13:52:10 +0300
Subject: [PATCH] .
---
CMakeLists.txt | 68 ++++++++++++++++----------------
includeCU/MemoryProcessing.cuh | 21 ++++++++++
includeCXX/MemoryProcessing.h | 21 ++++++++++
includeCXX/TemplateParameters.h | 3 ++
{srcF => includeF}/sfx_def.fi | 0
srcCU/MemoryProcessing.cu | 70 +++++++++++++++++++++++++++++++++
srcCXX/Flux.cpp | 3 +-
srcCXX/FluxComputeFunc.cpp | 4 --
srcCXX/MemoryProcessing.cpp | 57 +++++++++++++++++++++++++++
srcF/sfx_esm.f90 | 4 +-
srcF/sfx_log.f90 | 2 +-
srcF/sfx_surface.f90 | 2 +-
12 files changed, 212 insertions(+), 43 deletions(-)
create mode 100644 includeCU/MemoryProcessing.cuh
create mode 100644 includeCXX/MemoryProcessing.h
create mode 100644 includeCXX/TemplateParameters.h
rename {srcF => includeF}/sfx_def.fi (100%)
create mode 100644 srcCU/MemoryProcessing.cu
create mode 100644 srcCXX/MemoryProcessing.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 478ed3c..911a3c8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,18 +6,6 @@ option(INCLUDE_CXX "CXX build in mode" OFF)
project(INMCM_sfx)
enable_language(Fortran)
-if(INCLUDE_CXX OR INCLUDE_CUDA)
- set(MEMPROC_GIT http://tesla.parallel.ru/Lizzzka007/memory_processing.git)
- include(FetchContent)
- FetchContent_Declare(memory_processing
- GIT_REPOSITORY ${MEMPROC_GIT}
- GIT_TAG origin/main
- )
- FetchContent_MakeAvailable(memory_processing)
- add_library(memory_processing INTERFACE)
- target_compile_definitions(memory_processing INTERFACE INCLUDE_CUDA=${INCLUDE_CUDA})
-endif(INCLUDE_CXX OR INCLUDE_CUDA)
-
if(INCLUDE_CXX)
set(RUN_MACRO -DINCLUDE_CXX)
endif(INCLUDE_CXX)
@@ -39,7 +27,6 @@ endif(INCLUDE_CXX)
set(SOURCES_F
srcF/sfx_data.f90
srcF/sfx_common.f90
- srcF/sfx_def.fi
srcF/sfx_esm.f90
srcF/sfx_esm_param.f90
srcF/sfx_log.f90
@@ -50,45 +37,60 @@ set(SOURCES_F
srcF/FCWrapper.F90
)
+set(HEADERS_F
+ includeF/sfx_def.fi
+)
+
if(INCLUDE_CXX)
set(SOURCES_C
srcC/SubFunctionsWrapper.c
)
set(SOURCES_CXX
- srcCXX/Flux.cpp
- srcCXX/FluxComputeFunc.cpp
- srcCXX/SubFunctions.cpp
+ srcCXX/Flux.cpp
+ srcCXX/FluxComputeFunc.cpp
+ srcCXX/SubFunctions.cpp
)
set(HEADERS_CXX
- includeCXX/Flux.h
- includeCXX/FluxComputeFunc.h
- includeCXX/SubFunctions.h
+ includeCXX/Flux.h
+ includeCXX/FluxComputeFunc.h
+ includeCXX/SubFunctions.h
)
endif(INCLUDE_CXX)
if(INCLUDE_CUDA)
set(SOURCES_CU
- srcCU/Flux.cu
- srcCU/FluxComputeFunc.cu
+ srcCU/Flux.cu
+ srcCU/FluxComputeFunc.cu
)
set(HEADERS_CU
- includeCU/Flux.cuh
- includeCXX/FluxComputeFunc.cuh
+ includeCU/Flux.cuh
+ includeCU/FluxComputeFunc.cuh
)
endif(INCLUDE_CUDA)
-set(SOURCES ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${SOURCES_F})
+if(INCLUDE_CXX OR INCLUDE_CUDA)
+ set(MEMPROC_SOURCES_CXX
+ srcCXX/MemoryProcessing.cpp
+ )
+ set(MEMPROC_HEADERS_CXX
+ includeCXX/MemoryProcessing.h
+ includeCXX/TemplateParameters.h
+ )
-set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ")
-set(CMAKE_CXX_FLAGS " -g ")
-set(CMAKE_C_FLAGS " -g ")
+ if(INCLUDE_CUDA)
+ set(MEMPROC_SOURCES_CU
+ srcCU/MemoryProcessing.cu
+ )
+ set(MEMPROC_HEADERS_CU
+ includeCU/MemoryProcessing.cuh
+ )
+ endif(INCLUDE_CUDA)
+endif(INCLUDE_CXX OR INCLUDE_CUDA)
+set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F})
+
+set(CMAKE_Fortran_FLAGS " -cpp ")
add_executable(drag ${SOURCES})
add_definitions(${RUN_MACRO})
-set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran)
-
-if(INCLUDE_CXX OR INCLUDE_CUDA)
- target_include_directories(drag PUBLIC ${memory_processing_SOURCE_DIR}/include)
- target_link_libraries(drag memproc)
-endif(INCLUDE_CXX OR INCLUDE_CUDA)
\ No newline at end of file
+set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran)
\ No newline at end of file
diff --git a/includeCU/MemoryProcessing.cuh b/includeCU/MemoryProcessing.cuh
new file mode 100644
index 0000000..2ef88c8
--- /dev/null
+++ b/includeCU/MemoryProcessing.cuh
@@ -0,0 +1,21 @@
+#pragma once
+#include "TemplateParameters.h"
+#include <cstddef>
+
+namespace memproc
+{
+ template <MemType memtype>
+ bool alloc(void *&array, const size_t new_size);
+
+ template <MemType memtype>
+ bool realloc(void *&array, size_t &allocated_size, const size_t new_size);
+
+ template<MemType memtype>
+ bool dealloc(void *&array, size_t &allocated_size);
+
+ template<MemType memtype>
+ bool dealloc(void *&array);
+
+ template <MemType dst_memtype, MemType src_memtype>
+ bool memcopy(void *dst, const void* src, const size_t copy_elem_size);
+}
\ No newline at end of file
diff --git a/includeCXX/MemoryProcessing.h b/includeCXX/MemoryProcessing.h
new file mode 100644
index 0000000..b3db1fe
--- /dev/null
+++ b/includeCXX/MemoryProcessing.h
@@ -0,0 +1,21 @@
+#pragma once
+#include "TemplateParameters.h"
+#include <cstddef>
+
+namespace memproc
+{
+ template <MemType memtype>
+ bool alloc(void *&array, const size_t new_size);
+
+ template <MemType memtype>
+ bool realloc(void *&array, size_t &allocated_size, const size_t new_size);
+
+ template<MemType memtype>
+ bool dealloc(void *&array, size_t &allocated_size);
+
+ template<MemType memtype>
+ bool dealloc(void *&array);
+
+ template <MemType dst_memtype, MemType src_memtype>
+ bool memcopy(void *dst, const void* src, const size_t copy_elem_size);
+}
diff --git a/includeCXX/TemplateParameters.h b/includeCXX/TemplateParameters.h
new file mode 100644
index 0000000..b63e394
--- /dev/null
+++ b/includeCXX/TemplateParameters.h
@@ -0,0 +1,3 @@
+#pragma once
+
+enum MemType {CPU, GPU};
\ No newline at end of file
diff --git a/srcF/sfx_def.fi b/includeF/sfx_def.fi
similarity index 100%
rename from srcF/sfx_def.fi
rename to includeF/sfx_def.fi
diff --git a/srcCU/MemoryProcessing.cu b/srcCU/MemoryProcessing.cu
new file mode 100644
index 0000000..4c866ed
--- /dev/null
+++ b/srcCU/MemoryProcessing.cu
@@ -0,0 +1,70 @@
+#include "../include/MemoryProcessing.cuh"
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+namespace memproc
+{
+ template<>
+ bool dealloc<MemType::GPU>(void *&array, size_t &allocated_size)
+ {
+ if(allocated_size > 0)
+ {
+ cudaFree(array);
+ allocated_size = 0;
+ }
+
+ return true;
+ }
+
+ template<>
+ bool dealloc<MemType::GPU>(void *&array)
+ {
+ cudaFree(array);
+ return true;
+ }
+
+ template <>
+ bool alloc<MemType::GPU>(void *&array, const size_t new_size)
+ {
+ cudaMalloc ( (void **)&array, new_size);
+ cudaMemset(array, 0, new_size);
+
+ return true;
+ }
+
+ template <>
+ bool realloc<MemType::GPU>(void *&array, size_t &allocated_size, const size_t new_size)
+ {
+ if(new_size > allocated_size)
+ {
+ if(allocated_size > 0) dealloc<MemType::GPU>(array, allocated_size);
+ allocated_size = new_size;
+ cudaMalloc ( (void **)&array, new_size);
+ cudaMemset(array, 0, new_size);
+ }
+
+ return true;
+ }
+
+ template <>
+ bool memcopy<MemType::GPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size)
+ {
+ cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyHostToDevice);
+
+ return true;
+ }
+
+ template <>
+ bool memcopy<MemType::CPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size)
+ {
+ cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToHost);
+ return true;
+ }
+
+ template <>
+ bool memcopy<MemType::GPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size)
+ {
+ cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToDevice);
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/srcCXX/Flux.cpp b/srcCXX/Flux.cpp
index 80dfaf5..8157b1a 100644
--- a/srcCXX/Flux.cpp
+++ b/srcCXX/Flux.cpp
@@ -4,9 +4,10 @@
#include "../includeCXX/FluxComputeFunc.h"
#ifdef INCLUDE_CUDA
#include "../includeCU/Flux.cuh"
+ #include "../includeCU/MemoryProcessing.h"
#endif
-#include "MemoryProcessing.h"
+#include "../includeCXX/MemoryProcessing.h"
template<typename T, MemType RunMem, MemType memIn>
Flux<T, RunMem, memIn>::Flux()
diff --git a/srcCXX/FluxComputeFunc.cpp b/srcCXX/FluxComputeFunc.cpp
index 7e1dbd1..15fbb6c 100644
--- a/srcCXX/FluxComputeFunc.cpp
+++ b/srcCXX/FluxComputeFunc.cpp
@@ -28,7 +28,6 @@ void get_charnock_roughness(const T h, const T U,
b = c;
}
z0_m = h_charnock * exp(-c * kappa);
- printf("%f and 0.000015e0\n", z0_m);
z0_m = std::max(z0_m, T(0.000015e0));
Uc = U * log(h_charnock / z0_m) / log(h / z0_m);
}
@@ -302,9 +301,6 @@ void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_,
Rib = std::min(Rib, Rib_max);
get_psi_stable(Rib, h0_m, h0_t, B, Pr_t_0_inv, beta_m, psi_m, psi_h, zeta);
- if(step == 353)
- printf("get_psi_stable zeta = %f\n", zeta);
-
fval = beta_m * zeta;
phi_m = 1.0 + fval;
phi_h = 1.0/Pr_t_0_inv + fval;
diff --git a/srcCXX/MemoryProcessing.cpp b/srcCXX/MemoryProcessing.cpp
new file mode 100644
index 0000000..d387605
--- /dev/null
+++ b/srcCXX/MemoryProcessing.cpp
@@ -0,0 +1,57 @@
+#include "../includeCXX/MemoryProcessing.h"
+#include <cstdlib>
+#include <cstring>
+
+namespace memproc
+{
+ template<>
+ bool dealloc<MemType::CPU>(void *&array, size_t &allocated_size)
+ {
+ if(allocated_size > 0)
+ {
+ free(array);
+ allocated_size = 0;
+ }
+
+ return true;
+ }
+
+ template<>
+ bool dealloc<MemType::CPU>(void *&array)
+ {
+ free(array);
+ return true;
+ }
+
+ template <>
+ bool alloc<MemType::CPU>(void *&array, const size_t new_size)
+ {
+ array = malloc(new_size);
+ memset(array, 0, new_size);
+
+ return true;
+ }
+
+
+ template <>
+ bool realloc<MemType::CPU>(void *&array, size_t &allocated_size, const size_t new_size)
+ {
+ if(new_size > allocated_size)
+ {
+ if(allocated_size > 0) dealloc<MemType::CPU>(array, allocated_size);
+ allocated_size = new_size;
+ array = malloc(new_size);
+ memset(array, 0, new_size);
+ }
+
+ return true;
+ }
+
+ template <>
+ bool memcopy<MemType::CPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size)
+ {
+ memcpy(dst, src, copy_elem_size);
+
+ return true;
+ }
+}
\ No newline at end of file
diff --git a/srcF/sfx_esm.f90 b/srcF/sfx_esm.f90
index cb4d488..8b6c68f 100644
--- a/srcF/sfx_esm.f90
+++ b/srcF/sfx_esm.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
module sfx_esm
!> @brief main Earth System Model surface flux module
@@ -55,7 +55,6 @@ contains
integer i
! ----------------------------------------------------------------------------
#if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX)
- write(*, *) 'CXX'
call surf_flux(meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, &
sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t, &
sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, &
@@ -69,7 +68,6 @@ contains
numerics%maxiters_charnock, numerics%maxiters_convection, &
n)
#else
- write(*, *) 'FORTRAN'
do i = 1, n
#ifdef SFX_FORCE_DEPRECATED_ESM_CODE
#else
diff --git a/srcF/sfx_log.f90 b/srcF/sfx_log.f90
index 39f8674..592d159 100644
--- a/srcF/sfx_log.f90
+++ b/srcF/sfx_log.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
module sfx_log
!> @brief simple log-roughness surface flux module
diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90
index f5952aa..33e7159 100644
--- a/srcF/sfx_surface.f90
+++ b/srcF/sfx_surface.f90
@@ -1,4 +1,4 @@
-#include "sfx_def.fi"
+#include "../includeF/sfx_def.fi"
module sfx_surface
!> @brief surface roughness parameterizations
--
GitLab