#include "mesh.hpp"
#include "TemplateParameters.h"
#include "MemoryProcessing.h"

#ifdef INCLUDE_CUDA
    #include "MemoryProcessing.cuh"
#endif

using namespace icethermo;

int main(void)
{
#ifdef INCLUDE_CUDA
    Mesh<float, MemType::GPU> mesh1(1.0f);
#else
    Mesh<float, MemType::CPU> mesh1(1.0f);
#endif
    
    // how to create cells data (lhs is shared ptr to vector)
    auto cells_temp =  mesh1.CreateCellsData("cells_temperature");
    auto cells_capacity = mesh1.CreateCellsData("cells_capacity", true);
    auto cells_enthalpy = mesh1.CreateCellsData("cells_enthalpy", false);

    // one can modify cell data
#ifdef INCLUDE_CUDA
    float* farray;
    size_t farray_size = 0;
    memproc::realloc<MemType::CPU>((void *&)(farray), farray_size, mesh1.GetCellsNum() * sizeof(float));
    memproc::memcopy<MemType::CPU, MemType::GPU>(farray, *cells_temp.get(), mesh1.GetCellsNum() * sizeof(float));
    farray[0] = 1.0f; farray[1] = 2.0f; farray[2] = 3.0f;
    memproc::memcopy<MemType::GPU, MemType::CPU>(*cells_temp.get(), farray, mesh1.GetCellsNum() * sizeof(float));
#else
    (*cells_temp)[0] = 1.0f; (*cells_temp)[1] = 2.0f; (*cells_temp)[2] = 3.0f;
#endif 

    // how to create nodes data (lhs is shared ptr to vector)
    auto nodes_k = mesh1.CreateNodesData("nodes_k");
    auto nodes_enthalpy = mesh1.CreateNodesData("nodes_enthalpy", false);

    // one can modify nodes data 
#ifdef INCLUDE_CUDA
    memproc::realloc<MemType::CPU>((void *&)(farray), farray_size, mesh1.GetNodesNum() * sizeof(float));
    memproc::memcopy<MemType::CPU, MemType::GPU>(farray, *nodes_k.get(), mesh1.GetNodesNum() * sizeof(float));
    farray[0] = -5.0f; farray[mesh1.GetNodesNum() - 1] = -3.0f;
    memproc::memcopy<MemType::GPU, MemType::CPU>(*nodes_k.get(), farray, mesh1.GetNodesNum() * sizeof(float));
#else
    (*nodes_k)[0] = -5.0f; (*nodes_k)[mesh1.GetNodesNum() - 1] = -3.0f;
#endif 

    // how to create single data 
    auto temp_ib = mesh1.CreateSingleData("temp_ib");
    auto temp_is = mesh1.CreateSingleData("temp_is");
    
    // one can modify single data
    (*temp_ib) = -1.0f; (*temp_is) = 2.0f; 

    // one can delete cells, nodes or single data 
    mesh1.DeleteCellsData("cells_enthalpy");
    mesh1.DeleteNodesData("nodes_enthalpy");
    mesh1.DeleteSingleData("temp_is");

    // its is better to avoid this, but one can get another pointer to created data

    auto another_cells_temp = mesh1.GetCellsData("cells_temperature");
    auto another_nodes_k = mesh1.GetNodesData("nodes_k");
    auto another_temp_ib = mesh1.GetSingleData("temp_ib");

#ifdef INCLUDE_CUDA
    memproc::realloc<MemType::CPU>((void *&)(farray), farray_size, mesh1.GetCellsNum() * sizeof(float));
    memproc::memcopy<MemType::CPU, MemType::GPU>(farray, *another_cells_temp.get(), mesh1.GetCellsNum() * sizeof(float));
    farray[0] = -5.0f;
    memproc::memcopy<MemType::GPU, MemType::CPU>(*another_cells_temp.get(), farray, mesh1.GetCellsNum() * sizeof(float));

    memproc::realloc<MemType::CPU>((void *&)(farray), farray_size, mesh1.GetNodesNum() * sizeof(float));
    memproc::memcopy<MemType::CPU, MemType::GPU>(farray, *another_nodes_k.get(), mesh1.GetNodesNum() * sizeof(float));
    farray[0] *= 2.0f;
    memproc::memcopy<MemType::GPU, MemType::CPU>(*another_nodes_k.get(), farray, mesh1.GetNodesNum() * sizeof(float));
#else
    (*another_cells_temp)[0] = -5.0f;
    (*another_nodes_k)[0] *= 2.0f;
#endif 
    (*another_temp_ib) = -30.0f;

    // one can get total thickness
    std::cout << "current total cell thickness: " << mesh1.GetTotalThickness() << std::endl;

    // one could manually mute and unmute variables (muted variables will not be writed to the output)
    mesh1.MuteCellData("cells_temperature");
    mesh1.UnmuteCellData("cells_temperature");

    mesh1.MuteNodeData("nodes_enthalpy");
    mesh1.UnmuteNodeData("nodes_enthalpy");
    
    // one can save mesh to .txt file
    mesh1.SaveTXT("./mesh");

    // one can save mesh to .txt file with postfix number (relevant for time series)
    mesh1.SaveTXT("./mesh", 1488);

    // ### examples of another Mesh class constructor ###

    // construct uniform mesh with given cells num and total thickness
#ifdef INCLUDE_CUDA
    Mesh<double, MemType::GPU> mesh2(15, 1.0);
#else
    Mesh<double, MemType::CPU> mesh2(15, 1.0);
#endif
    mesh2.SaveTXT("./mesh2");

    // construct arbitrary mesh with given unit segment partition and total thickness
#ifdef INCLUDE_CUDA
    double dsegment_partition[2] = {0.5, 0.5};
    double *dev_segment_partition;
    size_t dev_segment_partition_size = 0;

    memproc::realloc<MemType::GPU>((void *&)(dev_segment_partition), dev_segment_partition_size, 2 * sizeof(double));
    memproc::memcopy<MemType::GPU, MemType::CPU>(dev_segment_partition, dsegment_partition, 2 * sizeof(double));

    Mesh<double, MemType::GPU> mesh3(dev_segment_partition, 2, 5.0);
#else
    double dsegment_partition[2] = {0.5, 0.5};
    Mesh<double, MemType::CPU> mesh3(dsegment_partition, 2, 5.0);
#endif

    mesh3.SaveTXT("./mesh3");

#ifdef INCLUDE_CUDA
    Mesh<double, MemType::GPU> mesh_vis(15, 4.0);
#else
    Mesh<double, MemType::CPU> mesh_vis(15, 4.0);
#endif

    auto cells_thick = mesh_vis.CreateCellsData("cells_temp_array");
    int N = mesh_vis.GetCellsNum();

#ifdef INCLUDE_CUDA
    double *darray;
    size_t darray_size = 0;

    memproc::realloc<MemType::CPU>((void *&)(darray), darray_size, N * sizeof(double));
    memproc::memcopy<MemType::CPU, MemType::GPU>(darray, *cells_thick.get(), N * sizeof(double));

    for (int i = 0; i < N; ++i)
        darray[i] = -5.0 + i*1.0/N * (-5.0);

    memproc::memcopy<MemType::GPU, MemType::CPU>(*cells_thick.get(), darray, N * sizeof(double));
#else
    for (int i = 0; i < N; ++i)
        (*cells_thick)[i] = -5.0 + i*1.0/N * (-5.0);
#endif

    mesh_vis.SaveTXT("./mesh_vis");

    // wrong constructor (it should be unit segment partition 0.5 + 0.4 != 1.0)
#ifdef INCLUDE_CUDA
    dsegment_partition[0] = 0.5; dsegment_partition[1] = 0.4;

    memproc::realloc<MemType::GPU>((void *&)(dev_segment_partition), dev_segment_partition_size, 2 * sizeof(double));
    memproc::memcopy<MemType::GPU, MemType::CPU>(dev_segment_partition, dsegment_partition, 2 * sizeof(double));

    Mesh<double, MemType::GPU> mesh4(dev_segment_partition, 2, 5.0);
#else
    dsegment_partition[0] = 0.5; dsegment_partition[1] = 0.4;
    Mesh<double, MemType::CPU> mesh4(dsegment_partition, 2, 5.0);
#endif

#ifdef INCLUDE_CUDA
    memproc::dealloc<MemType::CPU>((void *&)(farray));
    memproc::dealloc<MemType::GPU>((void *&)(dev_segment_partition));
    memproc::dealloc<MemType::CPU>((void *&)(darray));
#endif
    
    return 0;
}