diff --git a/CMakeLists.txt b/CMakeLists.txt index 478ed3ce0e3c581f87f9b785644dc5263e0dfbef..911a3c875b0077dd3ced5201a05f5f741c5259aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,18 +6,6 @@ option(INCLUDE_CXX "CXX build in mode" OFF) project(INMCM_sfx) enable_language(Fortran) -if(INCLUDE_CXX OR INCLUDE_CUDA) - set(MEMPROC_GIT http://tesla.parallel.ru/Lizzzka007/memory_processing.git) - include(FetchContent) - FetchContent_Declare(memory_processing - GIT_REPOSITORY ${MEMPROC_GIT} - GIT_TAG origin/main - ) - FetchContent_MakeAvailable(memory_processing) - add_library(memory_processing INTERFACE) - target_compile_definitions(memory_processing INTERFACE INCLUDE_CUDA=${INCLUDE_CUDA}) -endif(INCLUDE_CXX OR INCLUDE_CUDA) - if(INCLUDE_CXX) set(RUN_MACRO -DINCLUDE_CXX) endif(INCLUDE_CXX) @@ -39,7 +27,6 @@ endif(INCLUDE_CXX) set(SOURCES_F srcF/sfx_data.f90 srcF/sfx_common.f90 - srcF/sfx_def.fi srcF/sfx_esm.f90 srcF/sfx_esm_param.f90 srcF/sfx_log.f90 @@ -50,45 +37,60 @@ set(SOURCES_F srcF/FCWrapper.F90 ) +set(HEADERS_F + includeF/sfx_def.fi +) + if(INCLUDE_CXX) set(SOURCES_C srcC/SubFunctionsWrapper.c ) set(SOURCES_CXX - srcCXX/Flux.cpp - srcCXX/FluxComputeFunc.cpp - srcCXX/SubFunctions.cpp + srcCXX/Flux.cpp + srcCXX/FluxComputeFunc.cpp + srcCXX/SubFunctions.cpp ) set(HEADERS_CXX - includeCXX/Flux.h - includeCXX/FluxComputeFunc.h - includeCXX/SubFunctions.h + includeCXX/Flux.h + includeCXX/FluxComputeFunc.h + includeCXX/SubFunctions.h ) endif(INCLUDE_CXX) if(INCLUDE_CUDA) set(SOURCES_CU - srcCU/Flux.cu - srcCU/FluxComputeFunc.cu + srcCU/Flux.cu + srcCU/FluxComputeFunc.cu ) set(HEADERS_CU - includeCU/Flux.cuh - includeCXX/FluxComputeFunc.cuh + includeCU/Flux.cuh + includeCU/FluxComputeFunc.cuh ) endif(INCLUDE_CUDA) -set(SOURCES ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${SOURCES_F}) +if(INCLUDE_CXX OR INCLUDE_CUDA) + set(MEMPROC_SOURCES_CXX + srcCXX/MemoryProcessing.cpp + ) + set(MEMPROC_HEADERS_CXX + includeCXX/MemoryProcessing.h + includeCXX/TemplateParameters.h + ) -set(CMAKE_Fortran_FLAGS " -g -fbacktrace -ffpe-trap=zero,overflow,underflow -cpp ") -set(CMAKE_CXX_FLAGS " -g ") -set(CMAKE_C_FLAGS " -g ") + if(INCLUDE_CUDA) + set(MEMPROC_SOURCES_CU + srcCU/MemoryProcessing.cu + ) + set(MEMPROC_HEADERS_CU + includeCU/MemoryProcessing.cuh + ) + endif(INCLUDE_CUDA) +endif(INCLUDE_CXX OR INCLUDE_CUDA) +set(SOURCES ${MEMPROC_HEADERS_CU} ${MEMPROC_SOURCES_CU} ${MEMPROC_HEADERS_CXX} ${MEMPROC_SOURCES_CXX} ${HEADERS_CU} ${SOURCES_CU} ${HEADERS_CXX} ${SOURCES_CXX} ${SOURCES_C} ${HEADERS_F} ${SOURCES_F}) + +set(CMAKE_Fortran_FLAGS " -cpp ") add_executable(drag ${SOURCES}) add_definitions(${RUN_MACRO}) -set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran) - -if(INCLUDE_CXX OR INCLUDE_CUDA) - target_include_directories(drag PUBLIC ${memory_processing_SOURCE_DIR}/include) - target_link_libraries(drag memproc) -endif(INCLUDE_CXX OR INCLUDE_CUDA) \ No newline at end of file +set_property(TARGET drag PROPERTY LINKER_LANGUAGE Fortran) \ No newline at end of file diff --git a/includeCU/MemoryProcessing.cuh b/includeCU/MemoryProcessing.cuh new file mode 100644 index 0000000000000000000000000000000000000000..2ef88c84258752fb3e945bde002c3239e7fea77a --- /dev/null +++ b/includeCU/MemoryProcessing.cuh @@ -0,0 +1,21 @@ +#pragma once +#include "TemplateParameters.h" +#include <cstddef> + +namespace memproc +{ + template <MemType memtype> + bool alloc(void *&array, const size_t new_size); + + template <MemType memtype> + bool realloc(void *&array, size_t &allocated_size, const size_t new_size); + + template<MemType memtype> + bool dealloc(void *&array, size_t &allocated_size); + + template<MemType memtype> + bool dealloc(void *&array); + + template <MemType dst_memtype, MemType src_memtype> + bool memcopy(void *dst, const void* src, const size_t copy_elem_size); +} \ No newline at end of file diff --git a/includeCXX/MemoryProcessing.h b/includeCXX/MemoryProcessing.h new file mode 100644 index 0000000000000000000000000000000000000000..b3db1fece0388f6a471dfbd833fed4e62bd7be8b --- /dev/null +++ b/includeCXX/MemoryProcessing.h @@ -0,0 +1,21 @@ +#pragma once +#include "TemplateParameters.h" +#include <cstddef> + +namespace memproc +{ + template <MemType memtype> + bool alloc(void *&array, const size_t new_size); + + template <MemType memtype> + bool realloc(void *&array, size_t &allocated_size, const size_t new_size); + + template<MemType memtype> + bool dealloc(void *&array, size_t &allocated_size); + + template<MemType memtype> + bool dealloc(void *&array); + + template <MemType dst_memtype, MemType src_memtype> + bool memcopy(void *dst, const void* src, const size_t copy_elem_size); +} diff --git a/includeCXX/TemplateParameters.h b/includeCXX/TemplateParameters.h new file mode 100644 index 0000000000000000000000000000000000000000..b63e3944c11ed1214ab0fbe75c4c081c3562c4ef --- /dev/null +++ b/includeCXX/TemplateParameters.h @@ -0,0 +1,3 @@ +#pragma once + +enum MemType {CPU, GPU}; \ No newline at end of file diff --git a/srcF/sfx_def.fi b/includeF/sfx_def.fi similarity index 100% rename from srcF/sfx_def.fi rename to includeF/sfx_def.fi diff --git a/srcCU/MemoryProcessing.cu b/srcCU/MemoryProcessing.cu new file mode 100644 index 0000000000000000000000000000000000000000..4c866edff474f039eca1ef2ddd4c535ee0574ac2 --- /dev/null +++ b/srcCU/MemoryProcessing.cu @@ -0,0 +1,70 @@ +#include "../include/MemoryProcessing.cuh" +#include <cuda.h> +#include <cuda_runtime_api.h> + +namespace memproc +{ + template<> + bool dealloc<MemType::GPU>(void *&array, size_t &allocated_size) + { + if(allocated_size > 0) + { + cudaFree(array); + allocated_size = 0; + } + + return true; + } + + template<> + bool dealloc<MemType::GPU>(void *&array) + { + cudaFree(array); + return true; + } + + template <> + bool alloc<MemType::GPU>(void *&array, const size_t new_size) + { + cudaMalloc ( (void **)&array, new_size); + cudaMemset(array, 0, new_size); + + return true; + } + + template <> + bool realloc<MemType::GPU>(void *&array, size_t &allocated_size, const size_t new_size) + { + if(new_size > allocated_size) + { + if(allocated_size > 0) dealloc<MemType::GPU>(array, allocated_size); + allocated_size = new_size; + cudaMalloc ( (void **)&array, new_size); + cudaMemset(array, 0, new_size); + } + + return true; + } + + template <> + bool memcopy<MemType::GPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyHostToDevice); + + return true; + } + + template <> + bool memcopy<MemType::CPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToHost); + return true; + } + + template <> + bool memcopy<MemType::GPU, MemType::GPU>(void *dst, const void* src, const size_t copy_elem_size) + { + cudaMemcpy ( dst, src, copy_elem_size, cudaMemcpyDeviceToDevice); + return true; + } +} \ No newline at end of file diff --git a/srcCXX/Flux.cpp b/srcCXX/Flux.cpp index 80dfaf54aac0cfa2910748dc67f3ea518f307fc8..8157b1af7e1c673a4289fc891a80a70a5079d072 100644 --- a/srcCXX/Flux.cpp +++ b/srcCXX/Flux.cpp @@ -4,9 +4,10 @@ #include "../includeCXX/FluxComputeFunc.h" #ifdef INCLUDE_CUDA #include "../includeCU/Flux.cuh" + #include "../includeCU/MemoryProcessing.h" #endif -#include "MemoryProcessing.h" +#include "../includeCXX/MemoryProcessing.h" template<typename T, MemType RunMem, MemType memIn> Flux<T, RunMem, memIn>::Flux() diff --git a/srcCXX/FluxComputeFunc.cpp b/srcCXX/FluxComputeFunc.cpp index 7e1dbd1ef5e8c5bba2e159753b4dca75996db984..15fbb6c28cffcbd3738db9b2e0f2ad0e0351fee0 100644 --- a/srcCXX/FluxComputeFunc.cpp +++ b/srcCXX/FluxComputeFunc.cpp @@ -28,7 +28,6 @@ void get_charnock_roughness(const T h, const T U, b = c; } z0_m = h_charnock * exp(-c * kappa); - printf("%f and 0.000015e0\n", z0_m); z0_m = std::max(z0_m, T(0.000015e0)); Uc = U * log(h_charnock / z0_m) / log(h / z0_m); } @@ -302,9 +301,6 @@ void compute_flux_cpu(const T *U_, const T *dT_, const T *Tsemi_, const T *dQ_, Rib = std::min(Rib, Rib_max); get_psi_stable(Rib, h0_m, h0_t, B, Pr_t_0_inv, beta_m, psi_m, psi_h, zeta); - if(step == 353) - printf("get_psi_stable zeta = %f\n", zeta); - fval = beta_m * zeta; phi_m = 1.0 + fval; phi_h = 1.0/Pr_t_0_inv + fval; diff --git a/srcCXX/MemoryProcessing.cpp b/srcCXX/MemoryProcessing.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d3876059de4b7e03dcafef383d200f89c71a8dc8 --- /dev/null +++ b/srcCXX/MemoryProcessing.cpp @@ -0,0 +1,57 @@ +#include "../includeCXX/MemoryProcessing.h" +#include <cstdlib> +#include <cstring> + +namespace memproc +{ + template<> + bool dealloc<MemType::CPU>(void *&array, size_t &allocated_size) + { + if(allocated_size > 0) + { + free(array); + allocated_size = 0; + } + + return true; + } + + template<> + bool dealloc<MemType::CPU>(void *&array) + { + free(array); + return true; + } + + template <> + bool alloc<MemType::CPU>(void *&array, const size_t new_size) + { + array = malloc(new_size); + memset(array, 0, new_size); + + return true; + } + + + template <> + bool realloc<MemType::CPU>(void *&array, size_t &allocated_size, const size_t new_size) + { + if(new_size > allocated_size) + { + if(allocated_size > 0) dealloc<MemType::CPU>(array, allocated_size); + allocated_size = new_size; + array = malloc(new_size); + memset(array, 0, new_size); + } + + return true; + } + + template <> + bool memcopy<MemType::CPU, MemType::CPU>(void *dst, const void* src, const size_t copy_elem_size) + { + memcpy(dst, src, copy_elem_size); + + return true; + } +} \ No newline at end of file diff --git a/srcF/sfx_esm.f90 b/srcF/sfx_esm.f90 index cb4d4889908e91a683beec5be1eb4fc0103d21dc..8b6c68f733d0153a1f712c52e0442e31b9a5fc57 100644 --- a/srcF/sfx_esm.f90 +++ b/srcF/sfx_esm.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_esm !> @brief main Earth System Model surface flux module @@ -55,7 +55,6 @@ contains integer i ! ---------------------------------------------------------------------------- #if defined(INCLUDE_CUDA) || defined(INCLUDE_CXX) - write(*, *) 'CXX' call surf_flux(meteo%U, meteo%dT, meteo%Tsemi, meteo%dQ, meteo%h, meteo%z0_m, & sfx%zeta, sfx%Rib, sfx%Re, sfx%B, sfx%z0_m, sfx%z0_t, & sfx%Rib_conv_lim, sfx%Cm, sfx%Ct, sfx%Km, sfx%Pr_t_inv, & @@ -69,7 +68,6 @@ contains numerics%maxiters_charnock, numerics%maxiters_convection, & n) #else - write(*, *) 'FORTRAN' do i = 1, n #ifdef SFX_FORCE_DEPRECATED_ESM_CODE #else diff --git a/srcF/sfx_log.f90 b/srcF/sfx_log.f90 index 39f867484af0ea6c6f23ac9b4d3a6aaafaa8cdb1..592d1592cbfde17200d5c1530267925007f05ce1 100644 --- a/srcF/sfx_log.f90 +++ b/srcF/sfx_log.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_log !> @brief simple log-roughness surface flux module diff --git a/srcF/sfx_surface.f90 b/srcF/sfx_surface.f90 index f5952aac053b0313646d5c6c69ca0ac7a6bb68bb..33e71594ffc865b4a5bb1f5be6b99d51697b8fe1 100644 --- a/srcF/sfx_surface.f90 +++ b/srcF/sfx_surface.f90 @@ -1,4 +1,4 @@ -#include "sfx_def.fi" +#include "../includeF/sfx_def.fi" module sfx_surface !> @brief surface roughness parameterizations