From b2f001061c738f0704f7cc70b42c48d01af7667f Mon Sep 17 00:00:00 2001 From: Lizzzka007 <gashchuk2011@mail.ru> Date: Mon, 18 Dec 2023 13:52:10 +0300 Subject: [PATCH] . --- CMakeLists.txt | 68 ++++++++++++++++---------------- includeCU/MemoryProcessing.cuh | 21 ++++++++++ includeCXX/MemoryProcessing.h | 21 ++++++++++ includeCXX/TemplateParameters.h | 3 ++ {srcF => includeF}/sfx_def.fi | 0 srcCU/MemoryProcessing.cu | 70 +++++++++++++++++++++++++++++++++ srcCXX/Flux.cpp | 3 +- srcCXX/FluxComputeFunc.cpp | 4 -- srcCXX/MemoryProcessing.cpp | 57 +++++++++++++++++++++++++++ srcF/sfx_esm.f90 | 4 +- srcF/sfx_log.f90 | 2 +- srcF/sfx_surface.f90 | 2 +- 12 files changed, 212 insertions(+), 43 deletions(-) create mode 100644 includeCU/MemoryProcessing.cuh create mode 100644 includeCXX/MemoryProcessing.h create mode 100644 includeCXX/TemplateParameters.h rename {srcF => includeF}/sfx_def.fi (100%) create mode 100644 srcCU/MemoryProcessing.cu create mode 100644 srcCXX/MemoryProcessing.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 478ed3c..911a3c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,18 +6,6 @@ option(INCLUDE_CXX "CXX build in mode" OFF) project(INMCM_sfx) enable_language(Fortran) -if(INCLUDE_CXX OR INCLUDE_CUDA) - set(MEMPROC_GIT http://tesla.parallel.ru/Lizzzka007/memory_processing.git) - include(FetchContent) - FetchContent_Declare(memory_processing - GIT_REPOSITORY ${MEMPROC_GIT} - GIT_TAG origin/main - ) - FetchContent_MakeAvailable(memory_processing) - add_library(memory_processing INTERFACE) - target_compile_definitions(memory_processing INTERFACE INCLUDE_CUDA=${INCLUDE_CUDA}) -endif(INCLUDE_CXX OR INCLUDE_CUDA) - if(INCLUDE_CXX) set(RUN_MACRO -DINCLUDE_CXX) endif(INCLUDE_CXX) @@ -39,7 +27,6 @@ endif(INCLUDE_CXX) set(SOURCES_F srcF/sfx_data.f90 srcF/sfx_common.f90 - srcF/sfx_def.fi srcF/sfx_esm.f90 srcF/sfx_esm_param.f90 srcF/sfx_log.f90 @@ -50,45 +37,60 @@ set(SOURCES_F srcF/FCWrapper.F90 ) +set(HEADERS_F + includeF/sfx_def.fi +) + if(INCLUDE_CXX) set(SOURCES_C srcC/SubFunctionsWrapper.c ) set(SOURCES_CXX - srcCXX/Flux.cpp - srcCXX/FluxComputeFunc.cpp - srcCXX/SubFunctions.cpp + srcCXX/Flux.cpp + srcCXX/FluxComputeFunc.cpp + srcCXX/SubFunctions.cpp ) set(HEADERS_CXX - includeCXX/Flux.h - includeCXX/FluxComputeFunc.h - includeCXX/SubFunctions.h + includeCXX/Flux.h + includeCXX/FluxComputeFunc.h + includeCXX/SubFunctions.h ) endif(INCLUDE_CXX) if(INCLUDE_CUDA) set(SOURCES_CU - srcCU/Flux.cu - srcCU/FluxComputeFunc.cu + srcCU/Flux.cu + srcCU/FluxComputeFunc.cu ) set(HEADERS_CU - includeCU/Flux.cuh - includeCXX/FluxComputeFunc.cuh + includeCU/Flux.cuh + includeCU/FluxComputeFunc.cuh ) endif(INCLUDE_CUDA) -set(SOURCES ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${SOURCES_F}) +if(INCLUDE_CXX OR INCLUDE_CUDA) + set(MEMPROC_SOURCES_CXX + srcCXX/MemoryProcessing.cpp + ) + set(MEMPROC_HEADERS_CXX + includeCXX/MemoryProcessing.h + includeCXX/TemplateParameters.h + ) -set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ") -set(CMAKE_CXX_FLAGS " -g ") -set(CMAKE_C_FLAGS " -g ") + if(INCLUDE_CUDA) + set(MEMPROC_SOURCES_CU + srcCU/MemoryProcessing.cu + ) + set(MEMPROC_HEADERS_CU + includeCU/MemoryProcessing.cuh + ) + endif(INCLUDE_CUDA) +endif(INCLUDE_CXX OR INCLUDE_CUDA) +set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F}) + +set(CMAKE_Fortran_FLAGS " -cpp ") add_executable(drag ${SOURCES}) add_definitions(${RUN_MACRO}) -set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran) - -if(INCLUDE_CXX OR INCLUDE_CUDA) - target_include_directories(drag PUBLIC ${memory_processing_SOURCE_DIR}/include) - target_link_libraries(drag memproc) -endif(INCLUDE_CXX OR INCLUDE_CUDA) \ No newline at end of file +set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran) \ No newline at end of file diff --git a/includeCU/MemoryProcessing.cuh b/includeCU/MemoryProcessing.cuh new file mode 100644 index 0000000..2ef88c8 --- /dev/null +++ b/includeCU/MemoryProcessing.cuh @@ -0,0 +1,21 @@ +#pragma once +#include "TemplateParameters.h" +#include <cstddef> + +namespace memproc +{ + template <MemType memtype> + bool alloc(void *&array, const size_t new_size); + + template <MemType memtype> + bool realloc(void *&array, size_t &allocated_size, const size_t new_size); + + template<MemType memtype> + bool dealloc(void *&array, size_t &allocated_size); + + template<MemType memtype> + bool dealloc(void *&array); + + template <MemType dst_memtype, MemType src_memtype> + bool memcopy(void *dst, const void* src, const size_t copy_elem_size); +} \ No newline at end of file diff --git a/includeCXX/MemoryProcessing.h b/includeCXX/MemoryProcessing.h new file mode 100644 index 0000000..b3db1fe --- /dev/null +++ b/includeCXX/MemoryProcessing.h @@ -0,0 +1,21 @@ +#pragma once +#include "TemplateParameters.h" +#include <cstddef> + +namespace memproc +{ + template <MemType memtype> + bool alloc(void *&array, const size_t new_size); + + template <MemType memtype> + bool realloc(void *&array, size_t &allocated_size, const size_t new_size); + + template<MemType memtype> + bool dealloc(void *&array, size_t &allocated_size); + + template<MemType memtype> + bool dealloc(void *&array); + + template <MemType dst_memtype, MemType src_memtype> + bool memcopy(void *dst, const void* src, const size_t copy_elem_size); +} diff --git a/includeCXX/TemplateParameters.h b/includeCXX/TemplateParameters.h new file mode 100644 index 0000000..b63e394 --- /dev/null +++ b/includeCXX/TemplateParameters.h @@ -0,0 +1,3 @@ +#pragma once + +enum MemType {CPU, GPU}; \ No newline at end of file diff --git a/srcF/sfx_def.fi b/includeF/sfx_def.fi similarity index 100% rename from srcF/sfx_def.fi rename to includeF/sfx_def.fi diff --git a/srcCU/MemoryProcessing.cu b/srcCU/MemoryProcessing.cu new file mode 100644 index 0000000..4c866ed --- /dev/null +++ b/srcCU/MemoryProcessing.cu @@ -0,0 +1,70 @@ +#include "../include/MemoryProcessing.cuh" +#include <cuda.h> +#include <cuda_runtime_api.h> + +namespace memproc +{ + template<> + bool dealloc<MemType::GPU>(void *&array, size_t &allocated_size) + { + if(allocated_size > 0) + { + cudaFree(array); + allocated_size = 0; + } + + return true; + } + + template<> + bool dealloc<MemType::GPU>(void *&array) + { + cudaFree(array); + return true; + } + + template <> + bool alloc<MemType::GPU>(void *&array, const size_t new_size) + { + cudaMalloc ( (void **)&array, new_size); + cudaMemset(array, 0, new_size); + + return true; + } + + template <> + bool realloc<MemType::GPU>(void *&array, size_t &allocated_size, const size_t new_size) + { + if(new_size > allocated_size) + { + if(allocated_size > 0) dealloc<MemType::GPU>(array, allocated_size); + allocated_size = new_size; + cudaMalloc ( (void **)&array, new_size); + cudaMemset(array, 0, new_size); + } + + return true; + } + + template <> + bool memcopy<MemType::GPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyHostToDevice); + + return true; + } + + template <> + bool memcopy<MemType::CPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToHost); + return true; + } + + template <> + bool memcopy<MemType::GPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToDevice); + return true; + } +} \ No newline at end of file diff --git a/srcCXX/Flux.cpp b/srcCXX/Flux.cpp index 80dfaf5..8157b1a 100644 --- a/srcCXX/Flux.cpp +++ b/srcCXX/Flux.cpp @@ -4,9 +4,10 @@ #include "../includeCXX/FluxComputeFunc.h" #ifdef INCLUDE_CUDA #include "../includeCU/Flux.cuh" + #include "../includeCU/MemoryProcessing.h" #endif -#include "MemoryProcessing.h" +#include "../includeCXX/MemoryProcessing.h" template<typename T, MemType RunMem, MemType memIn> Flux<T, RunMem, memIn>::Flux() diff --git a/srcCXX/FluxComputeFunc.cpp b/srcCXX/FluxComputeFunc.cpp index 7e1dbd1..15fbb6c 100644 --- a/srcCXX/FluxComputeFunc.cpp +++ b/srcCXX/FluxComputeFunc.cpp @@ -28,7 +28,6 @@ void get_charnock_roughness(const T h, const T U, b = c; } z0_m = h_charnock * exp(-c * kappa); - printf("%f and 0.000015e0\n", z0_m); z0_m = std::max(z0_m, T(0.000015e0)); Uc = U * log(h_charnock / z0_m) / log(h / z0_m); } @@ -302,9 +301,6 @@ void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, Rib = std::min(Rib, Rib_max); get_psi_stable(Rib, h0_m, h0_t, B, Pr_t_0_inv, beta_m, psi_m, psi_h, zeta); - if(step == 353) - printf("get_psi_stable zeta = %f\n", zeta); - fval = beta_m * zeta; phi_m = 1.0 + fval; phi_h = 1.0/Pr_t_0_inv + fval; diff --git a/srcCXX/MemoryProcessing.cpp b/srcCXX/MemoryProcessing.cpp new file mode 100644 index 0000000..d387605 --- /dev/null +++ b/srcCXX/MemoryProcessing.cpp @@ -0,0 +1,57 @@ +#include "../includeCXX/MemoryProcessing.h" +#include <cstdlib> +#include <cstring> + +namespace memproc +{ + template<> + bool dealloc<MemType::CPU>(void *&array, size_t &allocated_size) + { + if(allocated_size > 0) + { + free(array); + allocated_size = 0; + } + + return true; + } + + template<> + bool dealloc<MemType::CPU>(void *&array) + { + free(array); + return true; + } + + template <> + bool alloc<MemType::CPU>(void *&array, const size_t new_size) + { + array = malloc(new_size); + memset(array, 0, new_size); + + return true; + } + + + template <> + bool realloc<MemType::CPU>(void *&array, size_t &allocated_size, const size_t new_size) + { + if(new_size > allocated_size) + { + if(allocated_size > 0) dealloc<MemType::CPU>(array, allocated_size); + allocated_size = new_size; + array = malloc(new_size); + memset(array, 0, new_size); + } + + return true; + } + + template <> + bool memcopy<MemType::CPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size) + { + memcpy(dst, src, copy_elem_size); + + return true; + } +} \ No newline at end of file diff --git a/srcF/sfx_esm.f90 b/srcF/sfx_esm.f90 index cb4d488..8b6c68f 100644 --- a/srcF/sfx_esm.f90 +++ b/srcF/sfx_esm.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_esm !> @brief main Earth System Model surface flux module @@ -55,7 +55,6 @@ contains integer i ! ---------------------------------------------------------------------------- #if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX) - write(*, *) 'CXX' call surf_flux(meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, & sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t, & sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, & @@ -69,7 +68,6 @@ contains numerics%maxiters_charnock, numerics%maxiters_convection, & n) #else - write(*, *) 'FORTRAN' do i = 1, n #ifdef SFX_FORCE_DEPRECATED_ESM_CODE #else diff --git a/srcF/sfx_log.f90 b/srcF/sfx_log.f90 index 39f8674..592d159 100644 --- a/srcF/sfx_log.f90 +++ b/srcF/sfx_log.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_log !> @brief simple log-roughness surface flux module diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90 index f5952aa..33e7159 100644 --- a/srcF/sfx_surface.f90 +++ b/srcF/sfx_surface.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_surface !> @brief surface roughness parameterizations -- GitLab