#include "parlib.h"
#include "plutils.h"

#include <stdlib.h>
#include <string.h>

/*  
 *  Error codes:
 *     0 - success
 *     1 - nonpositive number of dimensions
 *     2 - wrong communicated dimension
 *     3 - negative boundary width
 *     4 - nonpositive dimension
 *     5 - boundary width exceeds the array block length
 *     6 - number of dimensions exceeds maximum value (only for MP - manual packing)
 *     999 - incorrect exchange mode (only for generic calls)
 */

int P_BExchange_ginit(ndims, stride, blklen, bdim, overlap, datatype,
	comm, period, only_period, bexchange)
	
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period, only_period;
	BExchange *bexchange;
{
	int nproc, iproc, direct, idim, sendproc[2], recvproc[2];
	int count, strd, sbind[2], rbind[2], send[2], recv[2];
	MPI_Aint fsize, lb;
	MPI_Datatype oldtype, btype[2];


	// Setting degenerate-success cases conditions for consistency
	//	including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED
	for (direct = 0; direct < 2; direct++) {
		bexchange->overlap[direct] = 0;
	}

	//
	// Check input parameters
	//
	if (ndims < 1) { return 1; }
	if (bdim < 1 || bdim > ndims) { return 2; }
	if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */
	for (idim = 0; idim < ndims; idim++) {
		if (stride[idim] <= 0) { return 4; }
	}
	for (direct = 0; direct < 2; direct++) {
		if (overlap[direct] < 0) { return 3; }
		if (overlap[direct] > blklen[bdim - 1]) { return 5; }
	}
	if ((only_period) && (!period)) { return 0; }

	//
	// Define the number of processors in the group and the rank
	//
	if (comm == MPI_COMM_NULL) { return 0; } // empty communicator

	MPI_Comm_size(comm, &nproc);
	if (nproc == 0) { return 0; } /* success */
	MPI_Comm_rank(comm, &iproc);
	if (iproc == MPI_UNDEFINED) { return 0; } /* the process does not belong to the group */
	sendproc[0] = (iproc == 0 ? nproc - 1 : iproc - 1);
	recvproc[0] = (iproc == nproc - 1 ? 0 : iproc + 1);
	sendproc[1] = recvproc[0];
	recvproc[1] = sendproc[0];
	send[0] = ((iproc > 0) && (!only_period)) || ((iproc == 0) && period);
	recv[0] = ((iproc < nproc - 1) && (!only_period)) || ((iproc == nproc - 1) && period);
	send[1] = recv[0];
	recv[1] = send[0];
	MPI_Type_get_extent(datatype, &lb, &fsize);
	//
	// Define data types for the boundaries
	//
	// checking if one data type will suffice ...
	const int ndsize = (overlap[0] == overlap[1]) ? 1 : 2;

	for (direct = 0; direct < ndsize; direct++) {
		if (overlap[direct] > 0) {

			if (bdim == 1) {
				count = overlap[direct];
			}
			else {
				count = blklen[0];
			}
			MPI_Type_contiguous(count, datatype, &btype[direct]);
			oldtype = btype[direct];
			strd = stride[0];

			for (idim = 1; idim < ndims; idim++) {
				if (bdim == idim + 1) {
					count = overlap[direct];
				}
				else {
					count = blklen[idim];
				}
				MPI_Type_create_hvector(count, 1, strd * fsize, oldtype,
					&btype[direct]);

				MPI_Type_free(&oldtype);
				oldtype = btype[direct];
				strd = strd * stride[idim];
			}
			MPI_Type_commit(&btype[direct]);
		}
	}
	if (ndsize == 1) btype[1] = btype[0];	// using same MPI-datatype


	//
	// Determine the begining of boundaries
	//
	strd = 1;
	for (idim = 0; idim < bdim - 1; idim++) {
		strd = strd * stride[idim];
	}
	sbind[0] = 0;
	rbind[0] = blklen[bdim - 1] * strd;
	sbind[1] = (blklen[bdim - 1] - overlap[1])*strd;
	rbind[1] = -overlap[1] * strd;

	for (direct = 0; direct < 2; direct++) {
		bexchange->overlap[direct] = overlap[direct];
		bexchange->send[direct] = send[direct];
		bexchange->recv[direct] = recv[direct];
		bexchange->btype[direct] = btype[direct];
		bexchange->sendproc[direct] = sendproc[direct];
		bexchange->recvproc[direct] = recvproc[direct];
		bexchange->sbind[direct] = sbind[direct];
		bexchange->rbind[direct] = rbind[direct];
	}
	bexchange->comm = comm;
	bexchange->fsize = fsize;
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, 
		comm, period, bexchange )
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	BExchange *bexchange;
{
	const int only_period = 0;
	return P_BExchange_ginit(ndims, stride, blklen, bdim, overlap, datatype,
		comm, period, only_period, bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_start ( a, bexchange )
	void *a;
	BExchange *bexchange;
{
	int direct;
	char *ach = (char *) a;

	for (direct = 0; direct < 2; direct++) {

		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				MPI_Isend(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
					bexchange->sendproc[direct], 0, bexchange->comm, 
					&bexchange->req[direct]);
			}
			else
			{
				bexchange->req[direct] = MPI_REQUEST_NULL;
			}
			if (bexchange->recv[direct]) {
				MPI_Irecv(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
					bexchange->recvproc[direct], 0, bexchange->comm, 
					&bexchange->req[2 + direct]);
			}
			else
			{
				bexchange->req[2 + direct] = MPI_REQUEST_NULL;
			}
		}
		else
		{
			bexchange->req[direct] = MPI_REQUEST_NULL;
			bexchange->req[2 + direct] = MPI_REQUEST_NULL;
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_end ( bexchange ) 
	BExchange *bexchange;
{
	MPI_Status status[4];

	MPI_Waitall(4, bexchange->req, status);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_free ( bexchange ) 
	BExchange *bexchange;
{
	int direct;

	// checking if one data type sufficed at init ...
	const int ndsize = (bexchange->overlap[0] == bexchange->overlap[1]) ? 1 : 2;

	for (direct = 0; direct < ndsize; direct++) {
		if (bexchange->overlap[direct] > 0) {
			MPI_Type_free(&bexchange->btype[direct]);
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
{
	BExchange bexchange;
	int ierr;
	if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, &bexchange) != 0) {
		return ierr;
	}
	P_BExchange_start(a, &bexchange);
	P_BExchange_end(&bexchange);
	P_BExchange_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_period( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm)
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
{
	const int only_period = 1;
	const int period = 1;
	
	BExchange bexchange;
	int ierr;
	if (ierr = P_BExchange_ginit(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, &bexchange) != 0) {
		return ierr;
	}
	P_BExchange_start(a, &bexchange);
	P_BExchange_end(&bexchange);
	P_BExchange_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //


// v.1.3 - persistent exchanges //
// -------------------------------------------------------------------------- //
int PST_BExchange_ginit ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, only_period, bexchange )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period, only_period;
	BExchange *bexchange;
{
	int direct;
	char *ach = (char *)a;

	int ierr = P_BExchange_ginit(ndims, stride, blklen, bdim, overlap, datatype,
		comm, period, only_period, bexchange);
	if (ierr != 0) return ierr;

	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				MPI_Send_init(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
					bexchange->sendproc[direct], 0, bexchange->comm, 
					&bexchange->req[direct]);
			}
			if (bexchange->recv[direct]) {
				MPI_Recv_init(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
					bexchange->recvproc[direct], 0, bexchange->comm, 
					&bexchange->req[2 + direct]);
			}
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_init ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, bexchange )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	BExchange *bexchange;
{
	const int only_period = 0;
	return PST_BExchange_ginit(a, ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_start ( bexchange )
	BExchange *bexchange;
{
	int direct;
	
	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			
			if (bexchange->send[direct]) {
				MPI_Start(&bexchange->req[direct]);
			}
			else
			{
				bexchange->req[direct] = MPI_REQUEST_NULL;
			}

			if (bexchange->recv[direct]) {
				MPI_Start(&bexchange->req[2 + direct]);
			}
			else
			{
				bexchange->req[2 + direct] = MPI_REQUEST_NULL;
			}
		}
		else
		{
			bexchange->req[direct] = MPI_REQUEST_NULL;
			bexchange->req[2 + direct] = MPI_REQUEST_NULL;
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_end(bexchange)
	BExchange *bexchange;
{
	return P_BExchange_end(bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_free(bexchange)
	BExchange *bexchange;
{
	int direct;

	P_BExchange_free(bexchange);

	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				MPI_Request_free(&bexchange->req[direct]);
			}
			if (bexchange->recv[direct]) {
				MPI_Request_free(&bexchange->req[2 + direct]);
			}
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period )

	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
{
	BExchange bexchange;
	int ierr;
	if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, &bexchange) != 0) {
		return ierr;
	}
	PST_BExchange_start(&bexchange);
	PST_BExchange_end(&bexchange);
	PST_BExchange_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_period( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm )

	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
{
	const int only_period = 1;
	const int period = 1;

	BExchange bexchange;
	int ierr;
	if (ierr = PST_BExchange_ginit(a, ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, &bexchange) != 0) {
		return ierr;
	}
	PST_BExchange_start(&bexchange);
	PST_BExchange_end(&bexchange);
	PST_BExchange_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //


// v.1.4 - manual packing //
// -------------------------------------------------------------------------- //
int P_BExchange_mp_ginit ( ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, only_period, bexchange )
	
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period, only_period;
	BExchange *bexchange;
{
	int nproc, iproc, direct, idim, sendproc[2], recvproc[2];
	int count, strd, sbind[2], rbind[2], send[2], recv[2];
	void *sbuf[2], *rbuf[2];
	int buf_id[4];
	int mdims[2][MAX_PARLIB_MP_DIMS];
	int msize[2];
	MPI_Aint lb, fsize;
	MPI_Datatype btype[2];

// Setting degenerate-success cases conditions for consistency
//	including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED
	for (direct = 0; direct < 2; direct++) {
		bexchange->overlap[direct] = 0;
	}

//
// Check input parameters
//
	if (ndims < 1) { return 1; }
	if (bdim < 1 || bdim > ndims) { return 2; }
	if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */
	for (idim = 0; idim < ndims; idim++) {
		if (stride[idim] <= 0) { return 4; }
	}
	for (direct = 0; direct < 2; direct++) {
		if (overlap[direct] < 0) { return 3; }
		if (overlap[direct] > blklen[bdim - 1]) { return 5; }
	}
	if (ndims > MAX_PARLIB_MP_DIMS) { return 6; }

	if ((only_period) && (!period)) { return 0; }
//
// Define the number of processors in the group and the rank
//
	if (comm == MPI_COMM_NULL) { return 0; } // empty communicator

	MPI_Comm_size(comm, &nproc);
	if (nproc == 0) { return 0; } /* success */
	MPI_Comm_rank(comm, &iproc);
	if (iproc == MPI_UNDEFINED) { return 0; } /* the process does not belong to the group */
	sendproc[0] = (iproc == 0 ? nproc - 1 : iproc - 1);
	recvproc[0] = (iproc == nproc - 1 ? 0 : iproc + 1);
	sendproc[1] = recvproc[0];
	recvproc[1] = sendproc[0];
	send[0] = ((iproc > 0) && (!only_period)) || ((iproc == 0) && period);
	recv[0] = ((iproc < nproc - 1) && (!only_period)) || ((iproc == nproc - 1) && period);
	send[1] = recv[0];
	recv[1] = send[0];
	MPI_Type_get_extent(datatype, &lb, &fsize);
//
// Define data type, message sizes and buffers for the boundaries
//

	for (direct = 0; direct < 2; direct++) {
		if (overlap[direct] > 0) {

			msize[direct] = 1;
			btype[direct] = datatype;

			for (idim = 0; idim < ndims; idim++) {
				if (bdim == idim + 1) {
					count = overlap[direct];
				}
				else {
					count = blklen[idim];
				}
				mdims[direct][idim] = count;
				msize[direct] *= count;
			}

			if (send[direct]) {
				sbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char),
					&buf_id[direct]);
			}
			else
			{
				sbuf[direct] = NULL;
				buf_id[direct] = -1;
			}

			if (recv[direct]) {
				rbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char),
					&buf_id[2 + direct]);
			}
			else
			{
				rbuf[direct] = NULL;
				buf_id[2 + direct] = -1;
			}
		}
		else
		{
			msize[direct] = 0;
			for (idim = 0; idim < ndims; idim++) {
				mdims[direct][idim] = 0;
			}

			sbuf[direct] = NULL;
			rbuf[direct] = NULL;

			buf_id[direct] = -1;
			buf_id[2 + direct] = -1;
		}
	}
//
// Determine the begining of boundaries
//
	strd = 1;
	for (idim = 0; idim < bdim - 1; idim++) {
		strd = strd * stride[idim];
	}
	sbind[0] = 0;
	rbind[0] = blklen[bdim - 1] * strd;
	sbind[1] = (blklen[bdim - 1] - overlap[1])*strd;
	rbind[1] = -overlap[1] * strd;

	for (direct = 0; direct < 2; direct++) {
		bexchange->overlap[direct] = overlap[direct];
		bexchange->send[direct] = send[direct];
		bexchange->recv[direct] = recv[direct];
		bexchange->btype[direct] = btype[direct];
		bexchange->sendproc[direct] = sendproc[direct];
		bexchange->recvproc[direct] = recvproc[direct];
		bexchange->sbind[direct] = sbind[direct];
		bexchange->rbind[direct] = rbind[direct];

		bexchange->sbuf[direct] = sbuf[direct];
		bexchange->rbuf[direct] = rbuf[direct];
		bexchange->buf_id[direct] = buf_id[direct];
		bexchange->buf_id[2 + direct] = buf_id[2 + direct];

		memcpy(bexchange->mdims[direct], mdims[direct], ndims * sizeof(int));
		bexchange->msize[direct] = msize[direct];
	}
	bexchange->comm = comm;
	bexchange->fsize = fsize;

	bexchange->ndims = ndims;
	memcpy(bexchange->stride, stride, ndims * sizeof(int));
	return 0;
}
// -------------------------------------------------------------------------- //


int P_BExchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, bexchange )
	
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	BExchange *bexchange;
{
	const int only_period = 0;
	return P_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap, datatype,
		comm, period, only_period, bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_mp_start( a, bexchange )
	void *a;
	BExchange *bexchange;
{
	int direct;
	char *ach = (char *)a;

	for (direct = 0; direct < 2; direct++) {

		if (bexchange->overlap[direct] > 0) {
			if (bexchange->recv[direct]) {
				MPI_Irecv(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
					bexchange->recvproc[direct], 0, bexchange->comm,
					&bexchange->req[2 + direct]);
			}
			else
			{
				bexchange->req[2 + direct] = MPI_REQUEST_NULL;
			}
			if (bexchange->send[direct]) {
				copy_to_buffer(
					(char*)bexchange->sbuf[direct],
					ach + bexchange->sbind[direct] * bexchange->fsize,
					bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);

				MPI_Isend(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
					bexchange->sendproc[direct], 0, bexchange->comm, 
					&bexchange->req[direct]);
			}
			else
			{
				bexchange->req[direct] = MPI_REQUEST_NULL;
			}
		}
		else
		{
			bexchange->req[direct] = MPI_REQUEST_NULL;
			bexchange->req[2 + direct] = MPI_REQUEST_NULL;
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_mp_end ( a, bexchange ) 
	void *a;
	BExchange *bexchange;
{
	MPI_Status status[4];
	
	int direct;
	char *ach = (char *)a;

	MPI_Waitall(2, &bexchange->req[2], &status[2]);
	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->recv[direct]) {
				copy_from_buffer(ach + bexchange->rbind[direct] * bexchange->fsize,
					(char*)bexchange->rbuf[direct],
					bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);
			}
		}
	}

	MPI_Waitall(2, bexchange->req, status);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_mp_free ( bexchange ) 
	BExchange *bexchange;
{
	int direct;

	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				free_plbuf(bexchange->sbuf[direct],
					bexchange->buf_id[direct]);
			}
			if (bexchange->recv[direct]) {
				free_plbuf(bexchange->rbuf[direct],
					bexchange->buf_id[2 + direct]);
			}
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_mp(a, ndims, stride, blklen, bdim, overlap, datatype,
	comm, period)
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
{
	BExchange bexchange;
	int ierr;
	if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, &bexchange) != 0) {
		return ierr;
	}
	P_BExchange_mp_start(a, &bexchange);
	P_BExchange_mp_end(a, &bexchange);
	P_BExchange_mp_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_period_mp(a, ndims, stride, blklen, bdim, overlap, datatype,
	comm)
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
{
	const int only_period = 1;
	const int period = 1;
	
	BExchange bexchange;
	int ierr;
	if (ierr = P_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, &bexchange) != 0) {
		return ierr;
	}
	P_BExchange_mp_start(a, &bexchange);
	P_BExchange_mp_end(a, &bexchange);
	P_BExchange_mp_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //


// v.1.5 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap, datatype,
	comm, period, only_period, bexchange)
	
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period, only_period;
	BExchange *bexchange;
{
	int direct;
	
	int ierr = P_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap, datatype,
		comm, period, only_period, bexchange);
	if (ierr != 0) return ierr;

	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				MPI_Send_init(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
					bexchange->sendproc[direct], 0, bexchange->comm, 
					&bexchange->req[direct]);
			}
			if (bexchange->recv[direct]) {
				MPI_Recv_init(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
					bexchange->recvproc[direct], 0, bexchange->comm, 
					&bexchange->req[2 + direct]);
			}
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
	comm, period, bexchange)
	
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	BExchange *bexchange;
{
	const int only_period = 0;
	return PST_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_mp_start( a, bexchange )
	void *a;
	BExchange *bexchange;
{
	int direct;
	char *ach = (char *)a;
	
	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			
			if (bexchange->recv[direct]) {
				MPI_Start(&bexchange->req[2 + direct]);
			}
			else
				bexchange->req[2 + direct] = MPI_REQUEST_NULL;

			if (bexchange->send[direct]) {
				copy_to_buffer((char*)bexchange->sbuf[direct],
					ach + bexchange->sbind[direct] * bexchange->fsize,
					bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);

				MPI_Start(&bexchange->req[direct]);
			}
			else
				bexchange->req[direct] = MPI_REQUEST_NULL;
		}
		else
		{
			bexchange->req[direct] = MPI_REQUEST_NULL;
			bexchange->req[2 + direct] = MPI_REQUEST_NULL;
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_mp_end( a, bexchange )
	void* a;
	BExchange *bexchange;
{
	return P_BExchange_mp_end(a, bexchange);
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_mp_free( bexchange )
	BExchange *bexchange;
{
	int direct;

	P_BExchange_mp_free(bexchange);

	for (direct = 0; direct < 2; direct++) {
		if (bexchange->overlap[direct] > 0) {
			if (bexchange->send[direct]) {
				MPI_Request_free(&bexchange->req[direct]);
			}
			if (bexchange->recv[direct]) {
				MPI_Request_free(&bexchange->req[2 + direct]);
			}
		}
	}
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_mp( a, ndims, stride, blklen, bdim, overlap, datatype,
	comm, period )
	
	void *a;
	MPI_Datatype datatype;
	int ndims, *stride, *blklen, bdim, overlap[2];
	int period;
	MPI_Comm comm;
{
	BExchange bexchange;
	int ierr;
	if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, &bexchange) != 0) {
		return ierr;
	}
	PST_BExchange_mp_start(a, &bexchange);
	PST_BExchange_mp_end(a, &bexchange);
	PST_BExchange_mp_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int PST_BExchange_period_mp( a, ndims, stride, blklen, bdim, overlap, datatype,
	comm )
	
	void *a;
	MPI_Datatype datatype;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Comm comm;
{
	const int only_period = 1;
	const int period = 1;

	BExchange bexchange;
	int ierr;
	if (ierr = PST_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap,
		datatype, comm, period, only_period, &bexchange) != 0) {
		return ierr;
	}
	PST_BExchange_mp_start(a, &bexchange);
	PST_BExchange_mp_end(a, &bexchange);
	PST_BExchange_mp_free(&bexchange);
	return 0;
}
// -------------------------------------------------------------------------- //

// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_init ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, bexchange, exch_mode )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	BExchange *bexchange;
	int exch_mode;
{
	if (exch_mode == IS_MPI_TYPED) {
		return P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		return P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, bexchange);
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		return PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		return PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, bexchange);
	}

	return 999;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_opt_start ( a, bexchange, exch_mode )
	void *a;
	BExchange *bexchange;
	int exch_mode;
{
	if (exch_mode == IS_MPI_TYPED) {
		return P_BExchange_start(a, bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		return P_BExchange_mp_start(a, bexchange);
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		return PST_BExchange_start(bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		return PST_BExchange_mp_start(a, bexchange);
	}

	return 999;
}
// -------------------------------------------------------------------------- //


// -------------------------------------------------------------------------- //
int P_BExchange_opt_end ( a, bexchange, exch_mode ) 
	void *a;
	BExchange *bexchange;
	int exch_mode;
{
	if (exch_mode == IS_MPI_TYPED) {
		return P_BExchange_end(bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		return P_BExchange_mp_end(a, bexchange);
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		return PST_BExchange_end(bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		return PST_BExchange_mp_end(a, bexchange);
	}

	return 999;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_opt_free ( bexchange, exch_mode ) 
	BExchange *bexchange;
	int exch_mode;
{
	if (exch_mode == IS_MPI_TYPED) {
		return P_BExchange_free(bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		return P_BExchange_mp_free(bexchange);
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		return PST_BExchange_free(bexchange);
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		return PST_BExchange_mp_free(bexchange);
	}

	return 999;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_opt ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, period, exch_mode )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int period;
	int exch_mode;
{
	BExchange bexchange;
	int ierr;

	if (exch_mode == IS_MPI_TYPED) {
		if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, &bexchange) != 0) {
			return ierr;
		}

		P_BExchange_start(a, &bexchange);
		P_BExchange_end(&bexchange);
		P_BExchange_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, &bexchange) != 0) {
			return ierr;
		}

		P_BExchange_mp_start(a, &bexchange);
		P_BExchange_mp_end(a, &bexchange);
		P_BExchange_mp_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, &bexchange) != 0) {
			return ierr;
		}

		PST_BExchange_start(&bexchange);
		PST_BExchange_end(&bexchange);
		PST_BExchange_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, &bexchange) != 0) {
			return ierr;
		}
		PST_BExchange_mp_start(a, &bexchange);
		PST_BExchange_mp_end(a, &bexchange);
		PST_BExchange_mp_free(&bexchange);
		return 0;
	}

	return 999;
}
// -------------------------------------------------------------------------- //

// -------------------------------------------------------------------------- //
int P_BExchange_period_opt ( a, ndims, stride, blklen, bdim, overlap, datatype, 
	comm, exch_mode )
	
	void *a;
	int ndims, *stride, *blklen, bdim, overlap[2];
	MPI_Datatype datatype;
	MPI_Comm comm;
	int exch_mode;
{
	const int period = 1; 
	const int only_period = 1;

	BExchange bexchange;
	int ierr;

	if (exch_mode == IS_MPI_TYPED) {
		if (ierr = P_BExchange_ginit(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, only_period, &bexchange) != 0) {
			return ierr;
		}

		P_BExchange_start(a, &bexchange);
		P_BExchange_end(&bexchange);
		P_BExchange_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_MANUAL_PACK) {
		if (ierr = P_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, only_period, &bexchange) != 0) {
			return ierr;
		}

		P_BExchange_mp_start(a, &bexchange);
		P_BExchange_mp_end(a, &bexchange);
		P_BExchange_mp_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
		if (ierr = PST_BExchange_ginit(a, ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, only_period, &bexchange) != 0) {
			return ierr;
		}

		PST_BExchange_start(&bexchange);
		PST_BExchange_end(&bexchange);
		PST_BExchange_free(&bexchange);
		return 0;
	}
	if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
		if (ierr = PST_BExchange_mp_ginit(ndims, stride, blklen, bdim, overlap, datatype,
			comm, period, only_period, &bexchange) != 0) {
			return ierr;
		}
		PST_BExchange_mp_start(a, &bexchange);
		PST_BExchange_mp_end(a, &bexchange);
		PST_BExchange_mp_free(&bexchange);
		return 0;
	}

	return 999;
}
// -------------------------------------------------------------------------- //