From b1a97e5ce35dad5d0ffd8a7ffa73a8b2f6a31271 Mon Sep 17 00:00:00 2001
From: Debolskiy Andrey <and.debol@gmail.com>
Date: Tue, 23 Jun 2020 00:43:22 +0300
Subject: [PATCH] moved plutils

---
 ParLib.src/CMakeLists.txt          |   10 +-
 ParLib.src/{plutils => }/plutils.c | 1438 ++++++++++++++--------------
 ParLib.src/{plutils => }/plutils.h |  102 +-
 ParLib.src/plutils/CMakeLists.txt  |    8 -
 4 files changed, 779 insertions(+), 779 deletions(-)
 rename ParLib.src/{plutils => }/plutils.c (96%)
 rename ParLib.src/{plutils => }/plutils.h (96%)
 delete mode 100644 ParLib.src/plutils/CMakeLists.txt

diff --git a/ParLib.src/CMakeLists.txt b/ParLib.src/CMakeLists.txt
index 289767f..4288a7b 100644
--- a/ParLib.src/CMakeLists.txt
+++ b/ParLib.src/CMakeLists.txt
@@ -1,6 +1,14 @@
 
 add_library(plutils STATIC "")
-INCLUDE(plutils/CMakeLists.txt)
+target_sources(plutils
+        PRIVATE
+        plutils.c
+        PUBLIC
+        plutils.h
+        )
+include_directories(plutils
+        PUBLIC ${CMAKE_CURRENT_LIST_DIR})
+#INCLUDE(plutils/CMakeLists.txt)
 if(BUILD_SHARED_LIBS)
     add_library(parlibc-shared SHARED "")
     target_sources(parlibc-shared
diff --git a/ParLib.src/plutils/plutils.c b/ParLib.src/plutils.c
similarity index 96%
rename from ParLib.src/plutils/plutils.c
rename to ParLib.src/plutils.c
index 3b1566a..8bcaf03 100644
--- a/ParLib.src/plutils/plutils.c
+++ b/ParLib.src/plutils.c
@@ -1,719 +1,719 @@
-#include "plutils.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-// parLIB buffers [definition]
-// -------------------------------------------------------------------------- //
-void *plbuf[MAX_PL_BUFS];
-int plbuf_size[MAX_PL_BUFS];
-int plbuf_status[MAX_PL_BUFS];
-
-int plbuf_ptr;
-// -------------------------------------------------------------------------- //
-
-// using loop_count pragma expectations
-// ----------------------------------------------------------------------------------- //
-#define CP_EXPECT_5D_J1		21			// number of vertical levels[1]
-#define CP_EXPECT_5D_J2		73			// number of vertical levels[2]
-#define CP_EXPECT_5D_K1		1			// number of variables[1]
-#define CP_EXPECT_5D_K2		2			// number of variables[2]
-#define CP_EXPECT_5D_K3		5			// number of variables[3]
-#define CP_EXPECT_5D_K4		10			// number of variables[4]
-#define CP_EXPECT_5D_Q1		1			// number of time scices[1]
-#define CP_EXPECT_5D_Q2		2			// number of time scices[2]
-// ----------------------------------------------------------------------------------- //
-
-
-// buffer memory interface
-// ----------------------------------------------------------------------------------- //
-void init_plbuf()
-{
-	int k;
-	for (k = 0; k < MAX_PL_BUFS; k++) {
-		plbuf_size[k] = 0;
-		plbuf_status[k] = 0;
-	}
-	plbuf_ptr = 0;
-}
-
-void deinit_plbuf()
-{
-	int k;
-	for (k = 0; k < MAX_PL_BUFS; k++) {
-		if (plbuf_size[k] > 0) {
-			free(plbuf[k]);
-			plbuf_size[k] = 0;
-		}
-		plbuf_status[k] = 0;
-	}
-	plbuf_ptr = 0;
-}
-
-
-void* get_plbuf(int msize, int* id)
-{
-	int k, kbeg = plbuf_ptr;
-
-	for (k = kbeg; k < MAX_PL_BUFS; k++) {
-		if (!plbuf_status[k]) {
-			if (msize > plbuf_size[k]) {
-				if (plbuf_size[k] > 0) free(plbuf[k]);
-				plbuf_size[k] = msize;
-				plbuf[k] = (void*)malloc(plbuf_size[k]);
-			}
-
-			plbuf_status[k] = 1;
-			plbuf_ptr = k + 1;
-
-			(*id) = k;
-			return plbuf[k];
-		}
-	}
-
-	// no free buffer found:
-	(*id) = MAX_PL_BUFS;
-	return (void*)malloc(msize);
-}
-
-void free_plbuf(void* ptr, int id)
-{
-	if (id < 0) return;
-	if (id >= MAX_PL_BUFS) {
-		free(ptr);
-		return;
-	}
-
-	plbuf_status[id] = 0;
-	if (id < plbuf_ptr) plbuf_ptr = id;
-}
-// ----------------------------------------------------------------------------------- //
-
-// 1D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_1d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx)
-{
-	int i;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (i = 0; i < nx; i++)
-		buf[i] = a[i];
-	else
-		memcpy(buf, a, nx * sizeof(char));
-}
-
-static inline void copy_from_buffer_1d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx)
-{
-	int i;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (i = 0; i < nx; i++)
-		a[i] = buf[i];
-	else
-		memcpy(a, buf, nx * sizeof(char));
-}
-// ----------------------------------------------------------------------------------- //
-
-// 2D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_2d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx, const int ny,
-	const int shx)
-{
-	int i, j, idx = 0, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-	for (i = 0; i < nx; i++) {
-		buf[bidx + i] = a[idx + i];
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-		for (j = 0; j < ny; j++, bidx += nx) {
-			idx = j * shx;
-			memcpy(&buf[bidx], &a[idx], nbx);
-		}
-	}
-}
-
-static inline void copy_from_buffer_2d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx, const int ny,
-	const int shx)
-{
-	int i, j, idx = 0, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-	for (i = 0; i < nx; i++) {
-		a[idx + i] = buf[bidx + i];
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-		for (j = 0; j < ny; j++, bidx += nx) {
-			idx = j * shx;
-			memcpy(&a[idx], &buf[bidx], nbx);
-		}
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-// 3D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_3d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx, const int ny, const int nz,
-	const int shx, const int shxy)
-{
-	int i, j, k, idx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (k = 0; k < nz; k++)
-	{
-		idx = k * shxy;
-		for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-		for (i = 0; i < nx; i++) {
-			buf[bidx + i] = a[idx + i];
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-		for (k = 0; k < nz; k++)
-		{
-			idx = k * shxy;
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-				memcpy(&buf[bidx], &a[idx], nbx);
-			}
-		}
-	}
-}
-
-static inline void copy_from_buffer_3d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx, const int ny, const int nz,
-	const int shx, const int shxy)
-{
-	int i, j, k, idx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (k = 0; k < nz; k++)
-	{
-		idx = k * shxy;
-		for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-		for (i = 0; i < nx; i++) {
-			a[idx + i] = buf[bidx + i];
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-		for (k = 0; k < nz; k++)
-		{
-			idx = k * shxy;
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-				memcpy(&a[idx], &buf[bidx], nbx);
-			}
-		}
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-// 4D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_4d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx, const int ny, const int nz, const int np,
-	const int shx, const int shxy, const int shxyz)
-{
-	int i, j, k, p, idx, shidx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (p = 0; p < np; p++)
-	{
-		shidx = p * shxyz;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-		for (k = 0; k < nz; k++, shidx += shxy)
-		{
-			idx = shidx;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-			for (i = 0; i < nx; i++) {
-				buf[bidx + i] = a[idx + i];
-			}
-		}
-	}
-	else
-	for (p = 0; p < np; p++)
-	{
-		shidx = p * shxyz;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-		for (k = 0; k < nz; k++, shidx += shxy)
-		{
-			idx = shidx;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-				memcpy(&buf[bidx], &a[idx], nx * sizeof(char));
-			}
-		}
-	}
-}
-
-static inline void copy_from_buffer_4d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx, const int ny, const int nz, const int np,
-	const int shx, const int shxy, const int shxyz)
-{
-	int i, j, k, p, idx, shidx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (p = 0; p < np; p++)
-	{
-		shidx = p * shxyz;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-		for (k = 0; k < nz; k++, shidx += shxy)
-		{
-			idx = shidx;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-			for (i = 0; i < nx; i++) {
-				a[idx + i] = buf[bidx + i];
-			}
-		}
-	}
-	else
-	for (p = 0; p < np; p++)
-	{
-		shidx = p * shxyz;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-		for (k = 0; k < nz; k++, shidx += shxy)
-		{
-			idx = shidx;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-				memcpy(&a[idx], &buf[bidx], nx * sizeof(char));
-			}
-		}
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-// 5D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_5d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx, const int ny, const int nz, const int np, const int nq,
-	const int shx, const int shxy, const int shxyz, const int shxyzp)
-{
-	int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
-#endif
-	for (q = 0; q < nq; q++)
-	{
-		shidx_q = q * shxyzp;
-		for (p = 0; p < np; p++, shidx_q += shxyz)
-		{
-			shidx_p = shidx_q;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-			for (k = 0; k < nz; k++, shidx_p += shxy)
-			{
-				idx = shidx_p;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-				for (i = 0; i < nx; i++) {
-					buf[bidx + i] = a[idx + i];
-				}
-			}
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
-#endif
-		for (q = 0; q < nq; q++)
-		{
-			shidx_q = q * shxyzp;
-			for (p = 0; p < np; p++, shidx_q += shxyz)
-			{
-				shidx_p = shidx_q;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-				for (k = 0; k < nz; k++, shidx_p += shxy)
-				{
-					idx = shidx_p;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-						memcpy(&buf[bidx], &a[idx], nbx);
-					}
-				}
-			}
-		}
-	}
-}
-
-static inline void copy_from_buffer_5d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx, const int ny, const int nz, const int np, const int nq,
-	const int shx, const int shxy, const int shxyz, const int shxyzp)
-{
-	int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
-#endif
-	for (q = 0; q < nq; q++)
-	{
-		shidx_q = q * shxyzp;
-		for (p = 0; p < np; p++, shidx_q += shxyz)
-		{
-			shidx_p = shidx_q;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-			for (k = 0; k < nz; k++, shidx_p += shxy)
-			{
-				idx = shidx_p;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-				for (i = 0; i < nx; i++) {
-					a[idx + i] = buf[bidx + i];
-				}
-			}
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
-#endif
-		for (q = 0; q < nq; q++)
-		{
-			shidx_q = q * shxyzp;
-			for (p = 0; p < np; p++, shidx_q += shxyz)
-			{
-				shidx_p = shidx_q;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
-#endif
-				for (k = 0; k < nz; k++, shidx_p += shxy)
-				{
-					idx = shidx_p;
-#if defined(__INTEL_COMPILER)
-#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
-#endif
-					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-						memcpy(&a[idx], &buf[bidx], nbx);
-					}
-				}
-			}
-		}
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-
-// 6D copy
-// ----------------------------------------------------------------------------------- //
-static inline void copy_to_buffer_6d(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
-	const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
-{
-	int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (s = 0; s < ns; s++)
-	for (q = 0; q < nq; q++)
-	{
-		shidx_q = s * shxyzpq + q * shxyzp;
-		for (p = 0; p < np; p++, shidx_q += shxyz)
-		{
-			shidx_p = shidx_q;
-			for (k = 0; k < nz; k++, shidx_p += shxy)
-			{
-				idx = shidx_p;
-				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-				for (i = 0; i < nx; i++) {
-					buf[bidx + i] = a[idx + i];
-				}
-			}
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-		for (s = 0; s < ns; s++)
-		for (q = 0; q < nq; q++)
-		{
-			shidx_q = s * shxyzpq + q * shxyzp;
-			for (p = 0; p < np; p++, shidx_q += shxyz)
-			{
-				shidx_p = shidx_q;
-				for (k = 0; k < nz; k++, shidx_p += shxy)
-				{
-					idx = shidx_p;
-					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-						memcpy(&buf[bidx], &a[idx], nbx);
-					}
-				}
-			}
-		}
-	}
-}
-
-static inline void copy_from_buffer_6d(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
-	const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
-{
-	int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
-
-	if (nx < MIN_MEMCPY_BLOCK)
-	for (s = 0; s < ns; s++)
-	for (q = 0; q < nq; q++)
-	{
-		shidx_q = s * shxyzpq + q * shxyzp;
-		for (p = 0; p < np; p++, shidx_q += shxyz)
-		{
-			shidx_p = shidx_q;
-			for (k = 0; k < nz; k++, shidx_p += shxy)
-			{
-				idx = shidx_p;
-				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
-				for (i = 0; i < nx; i++) {
-					a[idx + i] = buf[bidx + i];
-				}
-			}
-		}
-	}
-	else
-	{
-		const int nbx = nx * sizeof(char);
-
-		for (s = 0; s < ns; s++)
-		for (q = 0; q < nq; q++)
-		{
-			shidx_q = s * shxyzpq + q * shxyzp;
-			for (p = 0; p < np; p++, shidx_q += shxyz)
-			{
-				shidx_p = shidx_q;
-				for (k = 0; k < nz; k++, shidx_p += shxy)
-				{
-					idx = shidx_p;
-					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
-						memcpy(&a[idx], &buf[bidx], nbx);
-					}
-				}
-			}
-		}
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-
-// COPY-TO
-// ----------------------------------------------------------------------------------- //
-void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int ndims,
-	const int* _RESTRICT const msgdim,
-	const int* _RESTRICT const stride,
-	const int fsize)
-{
-	if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
-
-	if (ndims == 1) {
-		const int nx = msgdim[0] * fsize;
-
-		copy_to_buffer_1d(buf, a, nx);
-		return;
-	}
-
-	if (ndims == 2) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1];
-
-		const int shx = stride[0] * fsize;
-
-		copy_to_buffer_2d(buf, a, nx, ny, 
-			shx);
-		return;
-	}
-
-	if (ndims == 3) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-
-		copy_to_buffer_3d(buf, a, nx, ny, nz,
-			shx, shxy);
-		return;
-	}
-
-	if (ndims == 4) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2], 
-			np = msgdim[3];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-
-		copy_to_buffer_4d(buf, a, nx, ny, nz, np,
-			shx, shxy, shxyz);
-		return;
-	}
-
-	if (ndims == 5) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2], 
-			np = msgdim[3], nq = msgdim[4];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-		const int shxyzp = stride[3] * shxyz;
-
-		copy_to_buffer_5d(buf, a, nx, ny, nz, np, nq,
-			shx, shxy, shxyz, shxyzp);
-		return;
-	}
-
-	if (ndims == 6) {
-		const int nx = msgdim[0] * fsize, 
-			ny = msgdim[1], nz = msgdim[2],
-			np = msgdim[3], nq = msgdim[4],
-			ns = msgdim[5];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-		const int shxyzp = stride[3] * shxyz;
-		const int shxyzpq = stride[4] * shxyzp;
-
-		copy_to_buffer_6d(buf, a, nx, ny, nz, np, nq, ns,
-			shx, shxy, shxyz, shxyzp, shxyzpq);
-		return;
-	}
-}
-// ----------------------------------------------------------------------------------- //
-
-// COPY-FROM
-// ----------------------------------------------------------------------------------- //
-void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int ndims,
-	const int* _RESTRICT const msgdim,
-	const int* _RESTRICT const stride,
-	const int fsize)
-{
-	if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
-
-	if (ndims == 1) {
-		const int nx = msgdim[0] * fsize;
-
-		copy_from_buffer_1d(a, buf, nx);
-		return;
-	}
-
-	if (ndims == 2) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1];
-
-		const int shx = stride[0] * fsize;
-
-		copy_from_buffer_2d(a, buf, nx, ny,
-			shx);
-		return;
-	}
-	
-	if (ndims == 3) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-
-		copy_from_buffer_3d(a, buf, nx, ny, nz,
-			shx, shxy);
-		return;
-	}
-	
-	if (ndims == 4) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2],
-			np = msgdim[3];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-
-		copy_from_buffer_4d(a, buf, nx, ny, nz, np,
-			shx, shxy, shxyz);
-		return;
-	}
-
-	if (ndims == 5) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2],
-			np = msgdim[3], nq = msgdim[4];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-		const int shxyzp = stride[3] * shxyz;
-
-		copy_from_buffer_5d(a, buf, nx, ny, nz, np, nq,
-			shx, shxy, shxyz, shxyzp);
-		return;
-	}
-
-	if (ndims == 6) {
-		const int nx = msgdim[0] * fsize,
-			ny = msgdim[1], nz = msgdim[2],
-			np = msgdim[3], nq = msgdim[4], ns = msgdim[5];
-
-		const int shx = stride[0] * fsize;
-		const int shxy = stride[1] * shx;
-		const int shxyz = stride[2] * shxy;
-		const int shxyzp = stride[3] * shxyz;
-		const int shxyzpq = stride[4] * shxyzp;
-
-		copy_from_buffer_6d(a, buf, nx, ny, nz, np, nq, ns,
-			shx, shxy, shxyz, shxyzp, shxyzpq);
-		return;
-	}
-}
-// ----------------------------------------------------------------------------------- //
+#include "plutils.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+// parLIB buffers [definition]
+// -------------------------------------------------------------------------- //
+void *plbuf[MAX_PL_BUFS];
+int plbuf_size[MAX_PL_BUFS];
+int plbuf_status[MAX_PL_BUFS];
+
+int plbuf_ptr;
+// -------------------------------------------------------------------------- //
+
+// using loop_count pragma expectations
+// ----------------------------------------------------------------------------------- //
+#define CP_EXPECT_5D_J1		21			// number of vertical levels[1]
+#define CP_EXPECT_5D_J2		73			// number of vertical levels[2]
+#define CP_EXPECT_5D_K1		1			// number of variables[1]
+#define CP_EXPECT_5D_K2		2			// number of variables[2]
+#define CP_EXPECT_5D_K3		5			// number of variables[3]
+#define CP_EXPECT_5D_K4		10			// number of variables[4]
+#define CP_EXPECT_5D_Q1		1			// number of time scices[1]
+#define CP_EXPECT_5D_Q2		2			// number of time scices[2]
+// ----------------------------------------------------------------------------------- //
+
+
+// buffer memory interface
+// ----------------------------------------------------------------------------------- //
+void init_plbuf()
+{
+	int k;
+	for (k = 0; k < MAX_PL_BUFS; k++) {
+		plbuf_size[k] = 0;
+		plbuf_status[k] = 0;
+	}
+	plbuf_ptr = 0;
+}
+
+void deinit_plbuf()
+{
+	int k;
+	for (k = 0; k < MAX_PL_BUFS; k++) {
+		if (plbuf_size[k] > 0) {
+			free(plbuf[k]);
+			plbuf_size[k] = 0;
+		}
+		plbuf_status[k] = 0;
+	}
+	plbuf_ptr = 0;
+}
+
+
+void* get_plbuf(int msize, int* id)
+{
+	int k, kbeg = plbuf_ptr;
+
+	for (k = kbeg; k < MAX_PL_BUFS; k++) {
+		if (!plbuf_status[k]) {
+			if (msize > plbuf_size[k]) {
+				if (plbuf_size[k] > 0) free(plbuf[k]);
+				plbuf_size[k] = msize;
+				plbuf[k] = (void*)malloc(plbuf_size[k]);
+			}
+
+			plbuf_status[k] = 1;
+			plbuf_ptr = k + 1;
+
+			(*id) = k;
+			return plbuf[k];
+		}
+	}
+
+	// no free buffer found:
+	(*id) = MAX_PL_BUFS;
+	return (void*)malloc(msize);
+}
+
+void free_plbuf(void* ptr, int id)
+{
+	if (id < 0) return;
+	if (id >= MAX_PL_BUFS) {
+		free(ptr);
+		return;
+	}
+
+	plbuf_status[id] = 0;
+	if (id < plbuf_ptr) plbuf_ptr = id;
+}
+// ----------------------------------------------------------------------------------- //
+
+// 1D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_1d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx)
+{
+	int i;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (i = 0; i < nx; i++)
+		buf[i] = a[i];
+	else
+		memcpy(buf, a, nx * sizeof(char));
+}
+
+static inline void copy_from_buffer_1d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx)
+{
+	int i;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (i = 0; i < nx; i++)
+		a[i] = buf[i];
+	else
+		memcpy(a, buf, nx * sizeof(char));
+}
+// ----------------------------------------------------------------------------------- //
+
+// 2D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_2d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx, const int ny,
+	const int shx)
+{
+	int i, j, idx = 0, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+	for (i = 0; i < nx; i++) {
+		buf[bidx + i] = a[idx + i];
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+		for (j = 0; j < ny; j++, bidx += nx) {
+			idx = j * shx;
+			memcpy(&buf[bidx], &a[idx], nbx);
+		}
+	}
+}
+
+static inline void copy_from_buffer_2d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx, const int ny,
+	const int shx)
+{
+	int i, j, idx = 0, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+	for (i = 0; i < nx; i++) {
+		a[idx + i] = buf[bidx + i];
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+		for (j = 0; j < ny; j++, bidx += nx) {
+			idx = j * shx;
+			memcpy(&a[idx], &buf[bidx], nbx);
+		}
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+// 3D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_3d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx, const int ny, const int nz,
+	const int shx, const int shxy)
+{
+	int i, j, k, idx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (k = 0; k < nz; k++)
+	{
+		idx = k * shxy;
+		for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+		for (i = 0; i < nx; i++) {
+			buf[bidx + i] = a[idx + i];
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+		for (k = 0; k < nz; k++)
+		{
+			idx = k * shxy;
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+				memcpy(&buf[bidx], &a[idx], nbx);
+			}
+		}
+	}
+}
+
+static inline void copy_from_buffer_3d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx, const int ny, const int nz,
+	const int shx, const int shxy)
+{
+	int i, j, k, idx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (k = 0; k < nz; k++)
+	{
+		idx = k * shxy;
+		for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+		for (i = 0; i < nx; i++) {
+			a[idx + i] = buf[bidx + i];
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+		for (k = 0; k < nz; k++)
+		{
+			idx = k * shxy;
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+				memcpy(&a[idx], &buf[bidx], nbx);
+			}
+		}
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+// 4D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_4d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx, const int ny, const int nz, const int np,
+	const int shx, const int shxy, const int shxyz)
+{
+	int i, j, k, p, idx, shidx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (p = 0; p < np; p++)
+	{
+		shidx = p * shxyz;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+		for (k = 0; k < nz; k++, shidx += shxy)
+		{
+			idx = shidx;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+			for (i = 0; i < nx; i++) {
+				buf[bidx + i] = a[idx + i];
+			}
+		}
+	}
+	else
+	for (p = 0; p < np; p++)
+	{
+		shidx = p * shxyz;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+		for (k = 0; k < nz; k++, shidx += shxy)
+		{
+			idx = shidx;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+				memcpy(&buf[bidx], &a[idx], nx * sizeof(char));
+			}
+		}
+	}
+}
+
+static inline void copy_from_buffer_4d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx, const int ny, const int nz, const int np,
+	const int shx, const int shxy, const int shxyz)
+{
+	int i, j, k, p, idx, shidx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (p = 0; p < np; p++)
+	{
+		shidx = p * shxyz;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+		for (k = 0; k < nz; k++, shidx += shxy)
+		{
+			idx = shidx;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+			for (i = 0; i < nx; i++) {
+				a[idx + i] = buf[bidx + i];
+			}
+		}
+	}
+	else
+	for (p = 0; p < np; p++)
+	{
+		shidx = p * shxyz;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+		for (k = 0; k < nz; k++, shidx += shxy)
+		{
+			idx = shidx;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+			for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+				memcpy(&a[idx], &buf[bidx], nx * sizeof(char));
+			}
+		}
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+// 5D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_5d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx, const int ny, const int nz, const int np, const int nq,
+	const int shx, const int shxy, const int shxyz, const int shxyzp)
+{
+	int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
+#endif
+	for (q = 0; q < nq; q++)
+	{
+		shidx_q = q * shxyzp;
+		for (p = 0; p < np; p++, shidx_q += shxyz)
+		{
+			shidx_p = shidx_q;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+			for (k = 0; k < nz; k++, shidx_p += shxy)
+			{
+				idx = shidx_p;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+				for (i = 0; i < nx; i++) {
+					buf[bidx + i] = a[idx + i];
+				}
+			}
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
+#endif
+		for (q = 0; q < nq; q++)
+		{
+			shidx_q = q * shxyzp;
+			for (p = 0; p < np; p++, shidx_q += shxyz)
+			{
+				shidx_p = shidx_q;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+				for (k = 0; k < nz; k++, shidx_p += shxy)
+				{
+					idx = shidx_p;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+						memcpy(&buf[bidx], &a[idx], nbx);
+					}
+				}
+			}
+		}
+	}
+}
+
+static inline void copy_from_buffer_5d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx, const int ny, const int nz, const int np, const int nq,
+	const int shx, const int shxy, const int shxyz, const int shxyzp)
+{
+	int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
+#endif
+	for (q = 0; q < nq; q++)
+	{
+		shidx_q = q * shxyzp;
+		for (p = 0; p < np; p++, shidx_q += shxyz)
+		{
+			shidx_p = shidx_q;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+			for (k = 0; k < nz; k++, shidx_p += shxy)
+			{
+				idx = shidx_p;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+				for (i = 0; i < nx; i++) {
+					a[idx + i] = buf[bidx + i];
+				}
+			}
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
+#endif
+		for (q = 0; q < nq; q++)
+		{
+			shidx_q = q * shxyzp;
+			for (p = 0; p < np; p++, shidx_q += shxyz)
+			{
+				shidx_p = shidx_q;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
+#endif
+				for (k = 0; k < nz; k++, shidx_p += shxy)
+				{
+					idx = shidx_p;
+#if defined(__INTEL_COMPILER)
+#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
+#endif
+					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+						memcpy(&a[idx], &buf[bidx], nbx);
+					}
+				}
+			}
+		}
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+
+// 6D copy
+// ----------------------------------------------------------------------------------- //
+static inline void copy_to_buffer_6d(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
+	const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
+{
+	int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (s = 0; s < ns; s++)
+	for (q = 0; q < nq; q++)
+	{
+		shidx_q = s * shxyzpq + q * shxyzp;
+		for (p = 0; p < np; p++, shidx_q += shxyz)
+		{
+			shidx_p = shidx_q;
+			for (k = 0; k < nz; k++, shidx_p += shxy)
+			{
+				idx = shidx_p;
+				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+				for (i = 0; i < nx; i++) {
+					buf[bidx + i] = a[idx + i];
+				}
+			}
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+		for (s = 0; s < ns; s++)
+		for (q = 0; q < nq; q++)
+		{
+			shidx_q = s * shxyzpq + q * shxyzp;
+			for (p = 0; p < np; p++, shidx_q += shxyz)
+			{
+				shidx_p = shidx_q;
+				for (k = 0; k < nz; k++, shidx_p += shxy)
+				{
+					idx = shidx_p;
+					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+						memcpy(&buf[bidx], &a[idx], nbx);
+					}
+				}
+			}
+		}
+	}
+}
+
+static inline void copy_from_buffer_6d(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
+	const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
+{
+	int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
+
+	if (nx < MIN_MEMCPY_BLOCK)
+	for (s = 0; s < ns; s++)
+	for (q = 0; q < nq; q++)
+	{
+		shidx_q = s * shxyzpq + q * shxyzp;
+		for (p = 0; p < np; p++, shidx_q += shxyz)
+		{
+			shidx_p = shidx_q;
+			for (k = 0; k < nz; k++, shidx_p += shxy)
+			{
+				idx = shidx_p;
+				for (j = 0; j < ny; j++, idx += shx, bidx += nx)
+				for (i = 0; i < nx; i++) {
+					a[idx + i] = buf[bidx + i];
+				}
+			}
+		}
+	}
+	else
+	{
+		const int nbx = nx * sizeof(char);
+
+		for (s = 0; s < ns; s++)
+		for (q = 0; q < nq; q++)
+		{
+			shidx_q = s * shxyzpq + q * shxyzp;
+			for (p = 0; p < np; p++, shidx_q += shxyz)
+			{
+				shidx_p = shidx_q;
+				for (k = 0; k < nz; k++, shidx_p += shxy)
+				{
+					idx = shidx_p;
+					for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
+						memcpy(&a[idx], &buf[bidx], nbx);
+					}
+				}
+			}
+		}
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+
+// COPY-TO
+// ----------------------------------------------------------------------------------- //
+void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int ndims,
+	const int* _RESTRICT const msgdim,
+	const int* _RESTRICT const stride,
+	const int fsize)
+{
+	if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
+
+	if (ndims == 1) {
+		const int nx = msgdim[0] * fsize;
+
+		copy_to_buffer_1d(buf, a, nx);
+		return;
+	}
+
+	if (ndims == 2) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1];
+
+		const int shx = stride[0] * fsize;
+
+		copy_to_buffer_2d(buf, a, nx, ny, 
+			shx);
+		return;
+	}
+
+	if (ndims == 3) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+
+		copy_to_buffer_3d(buf, a, nx, ny, nz,
+			shx, shxy);
+		return;
+	}
+
+	if (ndims == 4) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2], 
+			np = msgdim[3];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+
+		copy_to_buffer_4d(buf, a, nx, ny, nz, np,
+			shx, shxy, shxyz);
+		return;
+	}
+
+	if (ndims == 5) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2], 
+			np = msgdim[3], nq = msgdim[4];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+		const int shxyzp = stride[3] * shxyz;
+
+		copy_to_buffer_5d(buf, a, nx, ny, nz, np, nq,
+			shx, shxy, shxyz, shxyzp);
+		return;
+	}
+
+	if (ndims == 6) {
+		const int nx = msgdim[0] * fsize, 
+			ny = msgdim[1], nz = msgdim[2],
+			np = msgdim[3], nq = msgdim[4],
+			ns = msgdim[5];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+		const int shxyzp = stride[3] * shxyz;
+		const int shxyzpq = stride[4] * shxyzp;
+
+		copy_to_buffer_6d(buf, a, nx, ny, nz, np, nq, ns,
+			shx, shxy, shxyz, shxyzp, shxyzpq);
+		return;
+	}
+}
+// ----------------------------------------------------------------------------------- //
+
+// COPY-FROM
+// ----------------------------------------------------------------------------------- //
+void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int ndims,
+	const int* _RESTRICT const msgdim,
+	const int* _RESTRICT const stride,
+	const int fsize)
+{
+	if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
+
+	if (ndims == 1) {
+		const int nx = msgdim[0] * fsize;
+
+		copy_from_buffer_1d(a, buf, nx);
+		return;
+	}
+
+	if (ndims == 2) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1];
+
+		const int shx = stride[0] * fsize;
+
+		copy_from_buffer_2d(a, buf, nx, ny,
+			shx);
+		return;
+	}
+	
+	if (ndims == 3) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+
+		copy_from_buffer_3d(a, buf, nx, ny, nz,
+			shx, shxy);
+		return;
+	}
+	
+	if (ndims == 4) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2],
+			np = msgdim[3];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+
+		copy_from_buffer_4d(a, buf, nx, ny, nz, np,
+			shx, shxy, shxyz);
+		return;
+	}
+
+	if (ndims == 5) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2],
+			np = msgdim[3], nq = msgdim[4];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+		const int shxyzp = stride[3] * shxyz;
+
+		copy_from_buffer_5d(a, buf, nx, ny, nz, np, nq,
+			shx, shxy, shxyz, shxyzp);
+		return;
+	}
+
+	if (ndims == 6) {
+		const int nx = msgdim[0] * fsize,
+			ny = msgdim[1], nz = msgdim[2],
+			np = msgdim[3], nq = msgdim[4], ns = msgdim[5];
+
+		const int shx = stride[0] * fsize;
+		const int shxy = stride[1] * shx;
+		const int shxyz = stride[2] * shxy;
+		const int shxyzp = stride[3] * shxyz;
+		const int shxyzpq = stride[4] * shxyzp;
+
+		copy_from_buffer_6d(a, buf, nx, ny, nz, np, nq, ns,
+			shx, shxy, shxyz, shxyzp, shxyzpq);
+		return;
+	}
+}
+// ----------------------------------------------------------------------------------- //
diff --git a/ParLib.src/plutils/plutils.h b/ParLib.src/plutils.h
similarity index 96%
rename from ParLib.src/plutils/plutils.h
rename to ParLib.src/plutils.h
index 2219efb..7ffc008 100644
--- a/ParLib.src/plutils/plutils.h
+++ b/ParLib.src/plutils.h
@@ -1,51 +1,51 @@
-#pragma once
-
-#define MAX_PARLIB_MP_DIMS	6		// maximum number of dims for manual packing
-
-#define MIN_MEMCPY_BLOCK	256		// minimum block (in bytes) for memcpy copy (magic number)
-
-#define MAX_PL_BUFS		4096		// maximum number of parLIB internal buffers
-
-// _RESTRICT definition 
-// ------------------------------------------------------------------- //
-#if defined(__INTEL_COMPILER)
-#define _RESTRICT restrict
-#elif defined(__GNUC__) && !defined(_WIN32) && !defined(_CYGWIN32__)
-#define _RESTRICT __restrict__
-#elif defined(_MSC_VER)
-#define _RESTRICT __restrict
-#else
-#define _RESTRICT
-#endif
-// ------------------------------------------------------------------- //
-
-
-// parLIB buffers [declaration]
-// -------------------------------------------------------------------------- //
-extern void *plbuf[MAX_PL_BUFS];
-extern int plbuf_size[MAX_PL_BUFS];
-extern int plbuf_status[MAX_PL_BUFS];
-
-extern int plbuf_ptr;
-// -------------------------------------------------------------------------- //
-
-
-void init_plbuf();
-void deinit_plbuf();
-
-void* get_plbuf(int msize, int* id);
-void free_plbuf(void* ptr, int id);
-// -------------------------------------------------------------------------- //
-
-void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
-	const int ndims,
-	const int* _RESTRICT const msgdim,
-	const int* _RESTRICT const stride,
-	const int fsize);
-
-void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
-	const int ndims,
-	const int* _RESTRICT const msgdim,
-	const int* _RESTRICT const stride,
-	const int fsize);
-// -------------------------------------------------------------------------- //
+#pragma once
+
+#define MAX_PARLIB_MP_DIMS	6		// maximum number of dims for manual packing
+
+#define MIN_MEMCPY_BLOCK	256		// minimum block (in bytes) for memcpy copy (magic number)
+
+#define MAX_PL_BUFS		4096		// maximum number of parLIB internal buffers
+
+// _RESTRICT definition 
+// ------------------------------------------------------------------- //
+#if defined(__INTEL_COMPILER)
+#define _RESTRICT restrict
+#elif defined(__GNUC__) && !defined(_WIN32) && !defined(_CYGWIN32__)
+#define _RESTRICT __restrict__
+#elif defined(_MSC_VER)
+#define _RESTRICT __restrict
+#else
+#define _RESTRICT
+#endif
+// ------------------------------------------------------------------- //
+
+
+// parLIB buffers [declaration]
+// -------------------------------------------------------------------------- //
+extern void *plbuf[MAX_PL_BUFS];
+extern int plbuf_size[MAX_PL_BUFS];
+extern int plbuf_status[MAX_PL_BUFS];
+
+extern int plbuf_ptr;
+// -------------------------------------------------------------------------- //
+
+
+void init_plbuf();
+void deinit_plbuf();
+
+void* get_plbuf(int msize, int* id);
+void free_plbuf(void* ptr, int id);
+// -------------------------------------------------------------------------- //
+
+void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
+	const int ndims,
+	const int* _RESTRICT const msgdim,
+	const int* _RESTRICT const stride,
+	const int fsize);
+
+void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
+	const int ndims,
+	const int* _RESTRICT const msgdim,
+	const int* _RESTRICT const stride,
+	const int fsize);
+// -------------------------------------------------------------------------- //
diff --git a/ParLib.src/plutils/CMakeLists.txt b/ParLib.src/plutils/CMakeLists.txt
deleted file mode 100644
index 0fc8a32..0000000
--- a/ParLib.src/plutils/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-target_sources(plutils
-        PRIVATE
-        ${CMAKE_CURRENT_LIST_DIR}/plutils.c
-        PUBLIC
-        ${CMAKE_CURRENT_LIST_DIR}/plutils.h
-       )
-include_directories(plutils
-        PUBLIC ${CMAKE_CURRENT_LIST_DIR})
\ No newline at end of file
-- 
GitLab