Skip to content
Snippets Groups Projects
Commit a177419e authored by Debolskiy Andrey's avatar Debolskiy Andrey :bicyclist_tone5:
Browse files

Merge branch '21withman' of http://tesla.parallel.ru/debol/parlib

parents e9c54389 c683b375
Branches
Tags
No related merge requests found
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
AR = ar crl AR = ar crl
RANLIB = ranlib RANLIB = ranlib
INSTALLDIR = $(HOME)/ParLib.v1.1 INSTALLDIR = $(HOME)/ParLib.v2.1
#INSTALLDIR = $(HOME) #INSTALLDIR = $(HOME)
...@@ -13,7 +13,7 @@ all: setvars libparlib.a libparlibf.a ...@@ -13,7 +13,7 @@ all: setvars libparlib.a libparlibf.a
# #
# For INM's HP-cluster # For INM's HP-cluster
# #
CC = mpicc CC = mpicc -restrict -no-ansi-alias
LIBS = -lmpich LIBS = -lmpich
# LIBPATH = -L/opt/mpich-gm/lib # LIBPATH = -L/opt/mpich-gm/lib
# INCPATH = -I/opt/mpich-gm/include # INCPATH = -I/opt/mpich-gm/include
...@@ -21,7 +21,7 @@ all: setvars libparlib.a libparlibf.a ...@@ -21,7 +21,7 @@ all: setvars libparlib.a libparlibf.a
setvars: setvars:
libparlib.a: bexchange.o transpose.o libparlib.a: parlib.o plutils.o bexchange.o transpose.o
$(AR) $@ $? $(AR) $@ $?
$(RANLIB) $@ $(RANLIB) $@
...@@ -74,8 +74,10 @@ install: libparlib.a libparlibf.a ...@@ -74,8 +74,10 @@ install: libparlib.a libparlibf.a
ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \
$(INSTALLDIR)/man/man3/P_Transpose_free.3 $(INSTALLDIR)/man/man3/P_Transpose_free.3
parlibf.o: parlib.h plutils.o: plutils.h
bexchange.o: parlib.h parlib.o: parlib.h plutils.h
bexchangef.o: parlib.h parlibf.o: parlib.h plutils.h
transpose.o: parlib.h bexchange.o: parlib.h plutils.h
transposef.o: parlib.h bexchangef.o: parlib.h plutils.h
transpose.o: parlib.h plutils.h
transposef.o: parlib.h plutils.h
#include "parlib.h" #include "parlib.h"
#include "plutils.h"
#include <stdlib.h>
#include <string.h>
/* /*
* Error codes: * Error codes:
...@@ -8,23 +12,33 @@ ...@@ -8,23 +12,33 @@
* 3 - negative boundary width * 3 - negative boundary width
* 4 - nonpositive dimension * 4 - nonpositive dimension
* 5 - boundary width exceeds the array block length * 5 - boundary width exceeds the array block length
* 6 - number of dimensions exceeds maximum value (only for MP - manual packing)
* 999 - incorrect exchange mode (only for generic calls)
*/ */
int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange ) comm, period, bexchange )
int ndims, *stride, *blklen, bdim, overlap[2], period; int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype; MPI_Datatype datatype;
MPI_Comm comm; MPI_Comm comm;
int period;
BExchange *bexchange; BExchange *bexchange;
{ {
int nproc, iproc, direct, idim, sendproc[2], recvproc[2]; int nproc, iproc, direct, idim, sendproc[2], recvproc[2];
int count, strd, sbind[2], rbind[2], send[2], recv[2]; int count, strd, sbind[2], rbind[2], send[2], recv[2];
MPI_Aint fsize; MPI_Aint fsize, lb;
MPI_Datatype oldtype, btype[2]; MPI_Datatype oldtype, btype[2];
MPI_Request sreq[2], rreq[2];
/*
* Check input parameters // Setting degenerate-success cases conditions for consistency
*/ // including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED
for (direct = 0; direct < 2; direct++) {
bexchange->overlap[direct] = 0;
}
//
// Check input parameters
//
if (ndims < 1) { return 1; } if (ndims < 1) { return 1; }
if (bdim < 1 || bdim > ndims) { return 2; } if (bdim < 1 || bdim > ndims) { return 2; }
if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */ if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */
...@@ -35,9 +49,12 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, ...@@ -35,9 +49,12 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype,
if (overlap[direct] < 0) { return 3; } if (overlap[direct] < 0) { return 3; }
if (overlap[direct] > blklen[bdim - 1]) { return 5; } if (overlap[direct] > blklen[bdim - 1]) { return 5; }
} }
/*
* Define the number of processors in the group and the rank //
*/ // Define the number of processors in the group and the rank
//
if (comm == MPI_COMM_NULL) { return 0; } // empty communicator
MPI_Comm_size(comm, &nproc); MPI_Comm_size(comm, &nproc);
if (nproc == 0) { return 0; } /* success */ if (nproc == 0) { return 0; } /* success */
MPI_Comm_rank(comm, &iproc); MPI_Comm_rank(comm, &iproc);
...@@ -50,34 +67,49 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, ...@@ -50,34 +67,49 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype,
recv[0] = iproc < nproc - 1 || period; recv[0] = iproc < nproc - 1 || period;
send[1] = recv[0]; send[1] = recv[0];
recv[1] = send[0]; recv[1] = send[0];
MPI_Type_extent ( datatype, &fsize ); MPI_Type_get_extent(datatype, &lb, &fsize);
/* //
* Define data types for the boundaries // Define data types for the boundaries
*/ //
for ( direct = 0; direct < 2; direct++ ) { // checking if one data type will suffice ...
const int ndsize = (overlap[0] == overlap[1]) ? 1 : 2;
for (direct = 0; direct < ndsize; direct++) {
if (overlap[direct] > 0) { if (overlap[direct] > 0) {
oldtype = datatype;
strd = 1; if (bdim == 1) {
for ( idim = 0; idim < ndims; idim++ ) { count = overlap[direct];
if ( idim+1 == bdim ) { }
else {
count = blklen[0];
}
MPI_Type_contiguous(count, datatype, &btype[direct]);
oldtype = btype[direct];
strd = stride[0];
for (idim = 1; idim < ndims; idim++) {
if (bdim == idim + 1) {
count = overlap[direct]; count = overlap[direct];
} else { }
else {
count = blklen[idim]; count = blklen[idim];
} }
MPI_Type_hvector ( count, 1, strd * fsize, oldtype, MPI_Type_create_hvector(count, 1, strd * fsize, oldtype,
&btype[direct]); &btype[direct]);
if ( idim > 0 ) {
MPI_Type_free(&oldtype); MPI_Type_free(&oldtype);
}
oldtype = btype[direct]; oldtype = btype[direct];
strd = strd * stride[idim]; strd = strd * stride[idim];
} }
MPI_Type_commit(&btype[direct]); MPI_Type_commit(&btype[direct]);
} }
} }
/* if (ndsize == 1) btype[1] = btype[0]; // using same MPI-datatype
* Determine the begining of boundaries
*/
//
// Determine the begining of boundaries
//
strd = 1; strd = 1;
for (idim = 0; idim < bdim - 1; idim++) { for (idim = 0; idim < bdim - 1; idim++) {
strd = strd * stride[idim]; strd = strd * stride[idim];
...@@ -101,97 +133,602 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, ...@@ -101,97 +133,602 @@ int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype,
bexchange->fsize = fsize; bexchange->fsize = fsize;
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_start ( a, bexchange ) int P_BExchange_start ( a, bexchange )
void *a; void *a;
BExchange *bexchange; BExchange *bexchange;
{ {
int direct, overlap[2], send[2], recv[2], btype[2]; int direct;
int sendproc[2], recvproc[2], sbind[2], rbind[2]; char *ach = (char *) a;
for (direct = 0; direct < 2; direct++) {
if (bexchange->overlap[direct] > 0) {
if (bexchange->send[direct]) {
MPI_Isend(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
bexchange->sendproc[direct], 0, bexchange->comm,
&bexchange->req[direct]);
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
}
if (bexchange->recv[direct]) {
MPI_Irecv(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
bexchange->recvproc[direct], 0, bexchange->comm,
&bexchange->req[2 + direct]);
}
else
{
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
}
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_end ( bexchange )
BExchange *bexchange;
{
MPI_Status status[4];
MPI_Waitall(4, bexchange->req, status);
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_free ( bexchange )
BExchange *bexchange;
{
int direct;
// checking if one data type sufficed at init ...
const int ndsize = (bexchange->overlap[0] == bexchange->overlap[1]) ? 1 : 2;
for (direct = 0; direct < ndsize; direct++) {
if (bexchange->overlap[direct] > 0) {
MPI_Type_free(&bexchange->btype[direct]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period )
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm; MPI_Comm comm;
MPI_Request sreq[2], rreq[2]; int period;
MPI_Aint fsize; {
BExchange bexchange;
int ierr;
if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap,
datatype, comm, period, &bexchange) != 0) {
return ierr;
}
P_BExchange_start(a, &bexchange);
P_BExchange_end(&bexchange);
P_BExchange_free(&bexchange);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.3 - persistent exchanges //
// -------------------------------------------------------------------------- //
int PST_BExchange_init ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange )
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
BExchange *bexchange;
{
int direct;
char *ach = (char *)a; char *ach = (char *)a;
int ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
if (ierr != 0) return ierr;
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
overlap[direct] = bexchange->overlap[direct]; if (bexchange->overlap[direct] > 0) {
send[direct] = bexchange->send[direct]; if (bexchange->send[direct]) {
recv[direct] = bexchange->recv[direct]; MPI_Send_init(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
btype[direct] = bexchange->btype[direct]; bexchange->sendproc[direct], 0, bexchange->comm,
sendproc[direct] = bexchange->sendproc[direct]; &bexchange->req[direct]);
recvproc[direct] = bexchange->recvproc[direct];
sbind[direct] = bexchange->sbind[direct];
rbind[direct] = bexchange->rbind[direct];
} }
comm = bexchange->comm; if (bexchange->recv[direct]) {
fsize = bexchange->fsize; MPI_Recv_init(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct],
bexchange->recvproc[direct], 0, bexchange->comm,
&bexchange->req[2 + direct]);
}
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_start ( bexchange )
BExchange *bexchange;
{
int direct;
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
if ( overlap[direct] > 0 ) { if (bexchange->overlap[direct] > 0) {
if ( send[direct] ) {
MPI_Isend ( ach+sbind[direct]*fsize, 1, btype[direct], if (bexchange->send[direct]) {
sendproc[direct], 0, comm, &sreq[direct] ); MPI_Start(&bexchange->req[direct]);
} }
if ( recv[direct] ) { else
MPI_Irecv ( ach+rbind[direct]*fsize, 1, btype[direct], {
recvproc[direct], 0, comm, &rreq[direct] ); bexchange->req[direct] = MPI_REQUEST_NULL;
}
if (bexchange->recv[direct]) {
MPI_Start(&bexchange->req[2 + direct]);
}
else
{
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
}
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
} }
} }
return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_end(bexchange)
BExchange *bexchange;
{
return P_BExchange_end(bexchange);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_free(bexchange)
BExchange *bexchange;
{
int direct;
P_BExchange_free(bexchange);
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
bexchange->sreq[direct]=sreq[direct]; if (bexchange->overlap[direct] > 0) {
bexchange->rreq[direct]=rreq[direct]; if (bexchange->send[direct]) {
MPI_Request_free(&bexchange->req[direct]);
}
if (bexchange->recv[direct]) {
MPI_Request_free(&bexchange->req[2 + direct]);
}
}
} }
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
int P_BExchange_end ( bexchange ) // -------------------------------------------------------------------------- //
int PST_BExchange( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period )
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
BExchange bexchange;
int ierr;
if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap,
datatype, comm, period, &bexchange) != 0) {
return ierr;
}
PST_BExchange_start(&bexchange);
PST_BExchange_end(&bexchange);
PST_BExchange_free(&bexchange);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.4 - manual packing //
// -------------------------------------------------------------------------- //
int P_BExchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange )
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
BExchange *bexchange; BExchange *bexchange;
{ {
MPI_Status status; int nproc, iproc, direct, idim, sendproc[2], recvproc[2];
int direct, overlap[2], send[2], recv[2]; int count, strd, sbind[2], rbind[2], send[2], recv[2];
MPI_Request sreq[2], rreq[2]; void *sbuf[2], *rbuf[2];
int buf_id[4];
int mdims[2][MAX_PARLIB_MP_DIMS];
int msize[2];
MPI_Aint lb, fsize;
MPI_Datatype btype[2];
// Setting degenerate-success cases conditions for consistency
// including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
overlap[direct] = bexchange->overlap[direct]; bexchange->overlap[direct] = 0;
send[direct] = bexchange->send[direct];
recv[direct] = bexchange->recv[direct];
sreq[direct] = bexchange->sreq[direct];
rreq[direct] = bexchange->rreq[direct];
} }
//
// Check input parameters
//
if (ndims < 1) { return 1; }
if (bdim < 1 || bdim > ndims) { return 2; }
if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */
for (idim = 0; idim < ndims; idim++) {
if (stride[idim] <= 0) { return 4; }
}
for (direct = 0; direct < 2; direct++) {
if (overlap[direct] < 0) { return 3; }
if (overlap[direct] > blklen[bdim - 1]) { return 5; }
}
if (ndims > MAX_PARLIB_MP_DIMS) { return 6; }
//
// Define the number of processors in the group and the rank
//
if (comm == MPI_COMM_NULL) { return 0; } // empty communicator
MPI_Comm_size(comm, &nproc);
if (nproc == 0) { return 0; } /* success */
MPI_Comm_rank(comm, &iproc);
if (iproc == MPI_UNDEFINED) { return 0; } /* the process does not belong to the group */
sendproc[0] = (iproc == 0 ? nproc - 1 : iproc - 1);
recvproc[0] = (iproc == nproc - 1 ? 0 : iproc + 1);
sendproc[1] = recvproc[0];
recvproc[1] = sendproc[0];
send[0] = iproc > 0 || period;
recv[0] = iproc < nproc - 1 || period;
send[1] = recv[0];
recv[1] = send[0];
MPI_Type_get_extent(datatype, &lb, &fsize);
//
// Define data type, message sizes and buffers for the boundaries
//
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
if (overlap[direct] > 0) { if (overlap[direct] > 0) {
msize[direct] = 1;
btype[direct] = datatype;
for (idim = 0; idim < ndims; idim++) {
if (bdim == idim + 1) {
count = overlap[direct];
}
else {
count = blklen[idim];
}
mdims[direct][idim] = count;
msize[direct] *= count;
}
if (send[direct]) { if (send[direct]) {
MPI_Wait ( &sreq[direct], &status ); sbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char),
&buf_id[direct]);
}
else
{
sbuf[direct] = NULL;
buf_id[direct] = -1;
} }
if (recv[direct]) { if (recv[direct]) {
MPI_Wait ( &rreq[direct], &status ); rbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char),
&buf_id[2 + direct]);
} }
else
{
rbuf[direct] = NULL;
buf_id[2 + direct] = -1;
} }
} }
else
{
msize[direct] = 0;
for (idim = 0; idim < ndims; idim++) {
mdims[direct][idim] = 0;
}
sbuf[direct] = NULL;
rbuf[direct] = NULL;
buf_id[direct] = -1;
buf_id[2 + direct] = -1;
}
}
//
// Determine the begining of boundaries
//
strd = 1;
for (idim = 0; idim < bdim - 1; idim++) {
strd = strd * stride[idim];
}
sbind[0] = 0;
rbind[0] = blklen[bdim - 1] * strd;
sbind[1] = (blklen[bdim - 1] - overlap[1])*strd;
rbind[1] = -overlap[1] * strd;
for (direct = 0; direct < 2; direct++) {
bexchange->overlap[direct] = overlap[direct];
bexchange->send[direct] = send[direct];
bexchange->recv[direct] = recv[direct];
bexchange->btype[direct] = btype[direct];
bexchange->sendproc[direct] = sendproc[direct];
bexchange->recvproc[direct] = recvproc[direct];
bexchange->sbind[direct] = sbind[direct];
bexchange->rbind[direct] = rbind[direct];
bexchange->sbuf[direct] = sbuf[direct];
bexchange->rbuf[direct] = rbuf[direct];
bexchange->buf_id[direct] = buf_id[direct];
bexchange->buf_id[2 + direct] = buf_id[2 + direct];
memcpy(bexchange->mdims[direct], mdims[direct], ndims * sizeof(int));
bexchange->msize[direct] = msize[direct];
}
bexchange->comm = comm;
bexchange->fsize = fsize;
bexchange->ndims = ndims;
memcpy(bexchange->stride, stride, ndims * sizeof(int));
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
int P_BExchange_free ( bexchange ) // -------------------------------------------------------------------------- //
int P_BExchange_mp_start( a, bexchange )
void *a;
BExchange *bexchange; BExchange *bexchange;
{ {
int direct, overlap[2]; int direct;
MPI_Datatype btype[2]; char *ach = (char *)a;
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
overlap[direct] = bexchange->overlap[direct];
btype[direct] = bexchange->btype[direct]; if (bexchange->overlap[direct] > 0) {
if (bexchange->recv[direct]) {
MPI_Irecv(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
bexchange->recvproc[direct], 0, bexchange->comm,
&bexchange->req[2 + direct]);
}
else
{
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
} }
if (bexchange->send[direct]) {
copy_to_buffer(
(char*)bexchange->sbuf[direct],
ach + bexchange->sbind[direct] * bexchange->fsize,
bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);
MPI_Isend(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
bexchange->sendproc[direct], 0, bexchange->comm,
&bexchange->req[direct]);
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
}
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_mp_end ( a, bexchange )
void *a;
BExchange *bexchange;
{
MPI_Status status[4];
int direct;
char *ach = (char *)a;
MPI_Waitall(2, &bexchange->req[2], &status[2]);
for (direct = 0; direct < 2; direct++) { for (direct = 0; direct < 2; direct++) {
if ( overlap[direct] > 0 ) { if (bexchange->overlap[direct] > 0) {
MPI_Type_free ( &btype[direct] ); if (bexchange->recv[direct]) {
copy_from_buffer(ach + bexchange->rbind[direct] * bexchange->fsize,
(char*)bexchange->rbuf[direct],
bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);
}
} }
} }
MPI_Waitall(2, bexchange->req, status);
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype, // -------------------------------------------------------------------------- //
int P_BExchange_mp_free ( bexchange )
BExchange *bexchange;
{
int direct;
for (direct = 0; direct < 2; direct++) {
if (bexchange->overlap[direct] > 0) {
if (bexchange->send[direct]) {
free_plbuf(bexchange->sbuf[direct],
bexchange->buf_id[direct]);
}
if (bexchange->recv[direct]) {
free_plbuf(bexchange->rbuf[direct],
bexchange->buf_id[2 + direct]);
}
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_mp(a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period)
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
BExchange bexchange;
int ierr;
if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap,
datatype, comm, period, &bexchange) != 0) {
return ierr;
}
P_BExchange_mp_start(a, &bexchange);
P_BExchange_mp_end(a, &bexchange);
P_BExchange_mp_free(&bexchange);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.5 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange)
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
BExchange *bexchange;
{
int direct;
int ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
if (ierr != 0) return ierr;
for (direct = 0; direct < 2; direct++) {
if (bexchange->overlap[direct] > 0) {
if (bexchange->send[direct]) {
MPI_Send_init(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
bexchange->sendproc[direct], 0, bexchange->comm,
&bexchange->req[direct]);
}
if (bexchange->recv[direct]) {
MPI_Recv_init(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct],
bexchange->recvproc[direct], 0, bexchange->comm,
&bexchange->req[2 + direct]);
}
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_start( a, bexchange )
void *a;
BExchange *bexchange;
{
int direct;
char *ach = (char *)a;
for (direct = 0; direct < 2; direct++) {
if (bexchange->overlap[direct] > 0) {
if (bexchange->recv[direct]) {
MPI_Start(&bexchange->req[2 + direct]);
}
else
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
if (bexchange->send[direct]) {
copy_to_buffer((char*)bexchange->sbuf[direct],
ach + bexchange->sbind[direct] * bexchange->fsize,
bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize);
MPI_Start(&bexchange->req[direct]);
}
else
bexchange->req[direct] = MPI_REQUEST_NULL;
}
else
{
bexchange->req[direct] = MPI_REQUEST_NULL;
bexchange->req[2 + direct] = MPI_REQUEST_NULL;
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_end( a, bexchange )
void* a;
BExchange *bexchange;
{
return P_BExchange_mp_end(a, bexchange);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_free( bexchange )
BExchange *bexchange;
{
int direct;
P_BExchange_mp_free(bexchange);
for (direct = 0; direct < 2; direct++) {
if (bexchange->overlap[direct] > 0) {
if (bexchange->send[direct]) {
MPI_Request_free(&bexchange->req[direct]);
}
if (bexchange->recv[direct]) {
MPI_Request_free(&bexchange->req[2 + direct]);
}
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period ) comm, period )
void *a; void *a;
MPI_Datatype datatype; MPI_Datatype datatype;
int ndims, *stride, *blklen, bdim, overlap[2]; int ndims, *stride, *blklen, bdim, overlap[2];
...@@ -200,10 +737,178 @@ int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype, ...@@ -200,10 +737,178 @@ int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype,
{ {
BExchange bexchange; BExchange bexchange;
int ierr; int ierr;
if ( ierr = P_BExchange_init ( ndims, stride, blklen, bdim, overlap, if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap,
datatype, comm, period, &bexchange ) != 0 ) { return ierr; } datatype, comm, period, &bexchange) != 0) {
return ierr;
}
PST_BExchange_mp_start(a, &bexchange);
PST_BExchange_mp_end(a, &bexchange);
PST_BExchange_mp_free(&bexchange);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_init ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, exch_mode )
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
BExchange *bexchange;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_start ( a, bexchange, exch_mode )
void *a;
BExchange *bexchange;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_BExchange_start(a, bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_BExchange_mp_start(a, bexchange);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_BExchange_start(bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_BExchange_mp_start(a, bexchange);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_end ( a, bexchange, exch_mode )
void *a;
BExchange *bexchange;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_BExchange_end(bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_BExchange_mp_end(a, bexchange);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_BExchange_end(bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_BExchange_mp_end(a, bexchange);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_free ( bexchange, exch_mode )
BExchange *bexchange;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_BExchange_free(bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_BExchange_mp_free(bexchange);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_BExchange_free(bexchange);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_BExchange_mp_free(bexchange);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_opt ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_mode )
void *a;
int ndims, *stride, *blklen, bdim, overlap[2];
MPI_Datatype datatype;
MPI_Comm comm;
int period;
int exch_mode;
{
BExchange bexchange;
int ierr;
if (exch_mode == IS_MPI_TYPED) {
if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, &bexchange) != 0) {
return ierr;
}
P_BExchange_start(a, &bexchange); P_BExchange_start(a, &bexchange);
P_BExchange_end(&bexchange); P_BExchange_end(&bexchange);
P_BExchange_free(&bexchange); P_BExchange_free(&bexchange);
return 0; return 0;
} }
if (exch_mode == IS_MPI_MANUAL_PACK) {
if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, &bexchange) != 0) {
return ierr;
}
P_BExchange_mp_start(a, &bexchange);
P_BExchange_mp_end(a, &bexchange);
P_BExchange_mp_free(&bexchange);
return 0;
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, &bexchange) != 0) {
return ierr;
}
PST_BExchange_start(&bexchange);
PST_BExchange_end(&bexchange);
PST_BExchange_free(&bexchange);
return 0;
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype,
comm, period, &bexchange) != 0) {
return ierr;
}
PST_BExchange_mp_start(a, &bexchange);
PST_BExchange_mp_end(a, &bexchange);
PST_BExchange_mp_free(&bexchange);
return 0;
}
return 999;
}
// -------------------------------------------------------------------------- //
\ No newline at end of file
#include <stdlib.h> #include <stdlib.h>
#include "parlib.h" #include "parlib.h"
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE #ifdef FORTRANUNDERSCORE
void p_bexchange_init_ ( ndims, stride, blklen, bdim, overlap, datatype, void p_bexchange_init_ ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr ) comm, period, bexchange, ierr )
...@@ -58,6 +59,7 @@ void p_bexchange_free__ ( bexchange, ierr ) ...@@ -58,6 +59,7 @@ void p_bexchange_free__ ( bexchange, ierr )
#else #else
void p_bexchange_free ( bexchange, ierr ) void p_bexchange_free ( bexchange, ierr )
#endif #endif
BExchange **bexchange; BExchange **bexchange;
MPI_Fint *ierr; MPI_Fint *ierr;
{ {
...@@ -75,12 +77,487 @@ void p_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype, ...@@ -75,12 +77,487 @@ void p_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
void p_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype, void p_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr ) comm, period, ierr )
#endif #endif
void *a; void *a;
int *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr;
MPI_Datatype *datatype; MPI_Fint *datatype;
MPI_Comm *comm; MPI_Fint *comm;
{ {
*ierr = P_BExchange(a, (int)*ndims, (int *)stride, (int *)blklen, *ierr = P_BExchange(a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period); MPI_Comm_f2c(*comm), (int)*period);
} }
// -------------------------------------------------------------------------- //
// v.1.3 - persistent exchanges //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void pst_bexchange_init_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_init__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#else
void pst_bexchange_init(a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr)
#endif
void *a;
MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap,
*period, *ierr, *comm;
BExchange **bexchange;
{
*bexchange = (BExchange *) malloc ( sizeof (BExchange) );
*ierr = PST_BExchange_init ( a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, *bexchange );
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_start_ ( bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_start__ ( bexchange, ierr )
#else
void pst_bexchange_start ( bexchange, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = PST_BExchange_start(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_end_ ( bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_end__ ( bexchange, ierr )
#else
void pst_bexchange_end ( bexchange, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = PST_BExchange_end(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_free_ ( bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_free__ ( bexchange, ierr )
#else
void pst_bexchange_free ( bexchange, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr;
{
PST_BExchange_free(*bexchange);
free(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#else
void pst_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#endif
void *a;
MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr;
MPI_Fint *datatype;
MPI_Fint *comm;
{
*ierr = PST_BExchange(a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period);
}
// -------------------------------------------------------------------------- //
// v.1.4 - manual packing //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void p_bexchange_mp_init_ ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_mp_init__ ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#else
void p_bexchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#endif
MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap,
*period, *ierr, *comm;
BExchange **bexchange;
{
*bexchange = (BExchange *)malloc(sizeof (BExchange));
*ierr = P_BExchange_mp_init((int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_mp_start_ ( a, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_mp_start__ ( a, bexchange, ierr )
#else
void p_bexchange_mp_start ( a, bexchange, ierr )
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = P_BExchange_mp_start(a, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_mp_end_ ( a, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_mp_end__ ( a, bexchange, ierr )
#else
void p_bexchange_mp_end(a, bexchange, ierr)
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = P_BExchange_mp_end(a, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_mp_free_ ( bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_mp_free__ ( bexchange, ierr )
#else
void p_bexchange_mp_free ( bexchange, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr;
{
P_BExchange_mp_free(*bexchange);
free(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_mp_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_mp__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#else
void p_bexchange_mp ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#endif
void *a;
MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr;
MPI_Fint *datatype;
MPI_Fint *comm;
{
*ierr = P_BExchange_mp(a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period);
}
// -------------------------------------------------------------------------- //
// v.1.4 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void pst_bexchange_mp_init_ ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_mp_init__ ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#else
void pst_bexchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, ierr )
#endif
MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap,
*period, *ierr, *comm;
BExchange **bexchange;
{
*bexchange = (BExchange *)malloc(sizeof (BExchange));
*ierr = PST_BExchange_mp_init((int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_mp_start_ ( a, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_mp_start__ ( a, bexchange, ierr )
#else
void pst_bexchange_mp_start ( a, bexchange, ierr )
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = PST_BExchange_mp_start(a, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_mp_end_ ( a, bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_mp_end__ ( a, bexchange, ierr )
#else
void pst_bexchange_mp_end(a, bexchange, ierr)
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr;
{
*ierr = PST_BExchange_mp_end(a, *bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_mp_free_ ( bexchange, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_mp_free__ ( bexchange, ierr )
#else
void pst_bexchange_mp_free ( bexchange, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr;
{
PST_BExchange_mp_free(*bexchange);
free(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void pst_bexchange_mp_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_bexchange_mp__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#else
void pst_bexchange_mp ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, ierr )
#endif
void *a;
MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr;
MPI_Fint *datatype;
MPI_Fint *comm;
{
*ierr = PST_BExchange_mp(a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period);
}
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void p_bexchange_opt_init_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_opt_init__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, exch_mode, ierr )
#else
void p_bexchange_opt_init ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, bexchange, exch_mode, ierr )
#endif
void *a;
MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap,
*period, *ierr, *comm, *exch_mode;
BExchange **bexchange;
{
*bexchange = (BExchange *) malloc ( sizeof (BExchange) );
*ierr = P_BExchange_opt_init ( a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, *bexchange, (int)*exch_mode );
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_opt_start_ ( a, bexchange, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_opt_start__ ( a, bexchange, exch_mode, ierr )
#else
void p_bexchange_opt_start ( a, bexchange, exch_mode, ierr )
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr, *exch_mode;
{
*ierr = P_BExchange_opt_start(a, *bexchange, (int)*exch_mode);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_opt_end_ ( a, bexchange, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_opt_end__ ( a, bexchange, exch_mode, ierr )
#else
void p_bexchange_opt_end ( a, bexchange, exch_mode, ierr )
#endif
void *a;
BExchange **bexchange;
MPI_Fint *ierr, *exch_mode;
{
*ierr = P_BExchange_opt_end(a, *bexchange, (int)*exch_mode);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_opt_free_ ( bexchange, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_opt_free__ ( bexchange, exch_mode, ierr )
#else
void p_bexchange_opt_free ( bexchange, exch_mode, ierr )
#endif
BExchange **bexchange;
MPI_Fint *ierr, *exch_mode;
{
P_BExchange_opt_free(*bexchange, (int)*exch_mode);
free(*bexchange);
}
#ifdef FORTRANUNDERSCORE
void p_bexchange_opt_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_bexchange_opt__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_mode, ierr )
#else
void p_bexchange_opt ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_mode, ierr )
#endif
void *a;
MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr;
MPI_Fint *datatype;
MPI_Fint *comm;
MPI_Fint *exch_mode;
{
*ierr = P_BExchange_opt(a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, (int)*exch_mode);
}
// -------------------------------------------------------------------------- //
// v.2.0 - regular communications [removed only on correct program exit] //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void reg_bexchange_ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_id, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void reg_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_id, exch_mode, ierr )
#else
void reg_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype,
comm, period, exch_id, exch_mode, ierr )
#endif
void *a;
MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap,
*period, *ierr, *comm, *exch_id, *exch_mode;
{
BExchange *bexchange;
bexchange = (BExchange *) malloc ( sizeof (BExchange) );
*ierr = P_BExchange_opt_init ( a, (int)*ndims, (int *)stride, (int *)blklen,
(int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype),
MPI_Comm_f2c(*comm), (int)*period, bexchange, (int)*exch_mode );
if ((int)*ierr != 0) return;
*exch_id = save_bexch_handle(bexchange, (int)*exch_mode);
}
#ifdef FORTRANUNDERSCORE
void start_bexchange_ ( a, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void start_bexchange__ ( a, exch_id, ierr )
#else
void start_bexchange ( a, exch_id, ierr )
#endif
void *a;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
BExchange *bexchange;
get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id);
*ierr = P_BExchange_opt_start(a, bexchange, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void end_bexchange_ ( a, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void end_bexchange__ ( a, exch_id, ierr )
#else
void end_bexchange ( a, exch_id, ierr )
#endif
void *a;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
BExchange *bexchange;
get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id);
*ierr = P_BExchange_opt_end(a, bexchange, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void run_bexchange_ ( a, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void run_bexchange__ ( a, exch_id, ierr )
#else
void run_bexchange ( a, exch_id, ierr )
#endif
void *a;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
BExchange *bexchange;
get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id);
*ierr = P_BExchange_opt_start(a, bexchange, exch_mode);
if ((int)*ierr != 0) return;
*ierr = P_BExchange_opt_end(a, bexchange, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void unreg_bexchange_ ( exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void unreg_bexchange__ ( exch_id, ierr )
#else
void unreg_bexchange ( exch_id, ierr )
#endif
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
BExchange *bexchange;
get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id);
*ierr = P_BExchange_opt_free(bexchange, exch_mode);
if ((int)*ierr != 0) return;
free(bexchange);
remove_bexch_handle((int)*exch_id);
}
// -------------------------------------------------------------------------- //
#include "parlib.h"
#include "plutils.h"
#include <stdlib.h>
// BExchange list [definition]
// -------------------------------------------------------------------------- //
BExchange* bexch_hlist[MAX_BEXCH_HANDLES]; // list of BExchange handles
int bexch_hmode[MAX_BEXCH_HANDLES]; // Bexchange mode for each handle
int bexch_hptr; // pointer to available BExchange handle
// -------------------------------------------------------------------------- //
// Transpose list [definition]
// -------------------------------------------------------------------------- //
Transposition* transp_hlist[MAX_TRANSP_HANDLES]; // list of Transpose handles
int transp_hmode[MAX_TRANSP_HANDLES]; // Transpose mode for each handle
int transp_hptr; // pointer to available Transpose handle
// -------------------------------------------------------------------------- //
// ParLib init-deinit
// -------------------------------------------------------------------------- //
void ParLib_init()
{
init_plbuf(); // pl-buffers
bexch_hptr = 0; // BExchange list
transp_hptr = 0; // Transpose list
}
void ParLib_deinit()
{
int k, exch_mode;
// BExchange list
BExchange *bexchange;
for (k = 0; k < bexch_hptr; k++) {
get_bexch_handle(&bexchange, &exch_mode, k);
P_BExchange_opt_free(bexchange, exch_mode);
free(bexchange);
}
bexch_hptr = 0;
// Transp list
Transposition *transp;
for (k = 0; k < transp_hptr; k++) {
get_transp_handle(&transp, &exch_mode, k);
P_Transpose_opt_free(transp, exch_mode);
free(transp);
}
transp_hptr = 0;
deinit_plbuf(); // pl-buffers
}
// -------------------------------------------------------------------------- //
// BExchange handle list interface
// -------------------------------------------------------------------------- //
int save_bexch_handle(BExchange *bexchange, int exch_mode)
{
// saving handle
bexch_hlist[bexch_hptr] = bexchange;
bexch_hmode[bexch_hptr] = exch_mode;
bexch_hptr++;
return bexch_hptr - 1;
}
void get_bexch_handle(BExchange** bexchange, int* exch_mode, int exch_id)
{
*bexchange = bexch_hlist[exch_id];
*exch_mode = bexch_hmode[exch_id];
}
void remove_bexch_handle(int exch_id)
{
int k;
for (k = exch_id; k < bexch_hptr - 1; k++) {
bexch_hlist[k] = bexch_hlist[k + 1];
bexch_hmode[k] = bexch_hmode[k + 1];
}
if (bexch_hptr > 0) bexch_hptr--;
}
// -------------------------------------------------------------------------- //
// Transposition handle list interface
// -------------------------------------------------------------------------- //
int save_transp_handle(Transposition *transp, int exch_mode)
{
// saving handle
transp_hlist[transp_hptr] = transp;
transp_hmode[transp_hptr] = exch_mode;
transp_hptr++;
return transp_hptr - 1;
}
void get_transp_handle(Transposition** transp, int* exch_mode, int exch_id)
{
*transp = transp_hlist[exch_id];
*exch_mode = transp_hmode[exch_id];
}
void remove_transp_handle(int exch_id)
{
int k;
for (k = exch_id; k < transp_hptr - 1; k++) {
transp_hlist[k] = transp_hlist[k + 1];
transp_hmode[k] = transp_hmode[k + 1];
}
if (transp_hptr > 0) transp_hptr--;
}
// -------------------------------------------------------------------------- //
...@@ -2,23 +2,63 @@ ...@@ -2,23 +2,63 @@
#include <mpi.h> #include <mpi.h>
#endif #endif
#include "plutils.h"
#define IS_MPI_TYPED 0
#define IS_MPI_MANUAL_PACK 1
#define IS_MPI_TYPED_PERSISTENT 2
#define IS_MPI_MANUAL_PACK_PERSISTENT 3
// ParLib v1.8 initialization
// -------------------------------------------------------------------------- //
void ParLib_init();
void ParLib_deinit();
// -------------------------- //
typedef struct BExchange { typedef struct BExchange {
int overlap[2], send[2], recv[2], btype[2]; int overlap[2], send[2], recv[2];
int sendproc[2], recvproc[2], sbind[2], rbind[2]; int sendproc[2], recvproc[2], sbind[2], rbind[2];
MPI_Datatype btype[2];
MPI_Comm comm; MPI_Comm comm;
MPI_Request sreq[2], rreq[2]; MPI_Request req[4];
MPI_Aint fsize; MPI_Aint fsize;
// manual packing data
// --------------------------------- //
int ndims;
int stride[MAX_PARLIB_MP_DIMS];
int msize[2];
int mdims[2][MAX_PARLIB_MP_DIMS];
void *sbuf[2], *rbuf[2];
// --------------------------------- //
// memory management
// --------------------------------- //
int buf_id[4];
// --------------------------------- //
} BExchange; } BExchange;
typedef struct Transposition { // BExchange list [declaration]
MPI_Datatype *stype, *rtype; // -------------------------------------------------------------------------- //
int *sbeg, *rbeg; #define MAX_BEXCH_HANDLES 1024
MPI_Comm comm;
int nproc, iproc; extern BExchange* bexch_hlist[MAX_BEXCH_HANDLES]; // list of BExchange handles
MPI_Request *sreq, *rreq; extern int bexch_hmode[MAX_BEXCH_HANDLES]; // Bexchange mode for each handle
MPI_Aint fsize; extern int bexch_hptr; // pointer to available BExchange handle
} Transposition; // -------------------------------------------------------------------------- //
// BExchange handle list interface
// -------------------------------------------------------------------------- //
int save_bexch_handle(BExchange *bexchange, int exch_mode);
void get_bexch_handle(BExchange** bexchange, int* exch_mode, int exch_id);
void remove_bexch_handle(int exch_id);
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_BExchange_init ( int, int*, int*, int, int*, MPI_Datatype, int P_BExchange_init ( int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, BExchange* ); MPI_Comm, int, BExchange* );
int P_BExchange_start ( void*, BExchange* ); int P_BExchange_start ( void*, BExchange* );
...@@ -26,7 +66,101 @@ int P_BExchange_end ( BExchange* ); ...@@ -26,7 +66,101 @@ int P_BExchange_end ( BExchange* );
int P_BExchange_free ( BExchange* ); int P_BExchange_free ( BExchange* );
int P_BExchange ( void*, int, int*, int*, int, int*, MPI_Datatype, int P_BExchange ( void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int ); MPI_Comm, int );
// -------------------------------------------------------------------------- //
// v.1.3 - persistent exchanges //
// -------------------------------------------------------------------------- //
int PST_BExchange_init(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, BExchange*);
int PST_BExchange_start(BExchange*);
int PST_BExchange_end(BExchange*);
int PST_BExchange_free(BExchange*);
int PST_BExchange(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.4 - manual packing //
// -------------------------------------------------------------------------- //
int P_BExchange_mp_init(int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, BExchange*);
int P_BExchange_mp_start(void*, BExchange*);
int P_BExchange_mp_end(void*, BExchange*);
int P_BExchange_mp_free(BExchange*);
int P_BExchange_mp(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.4 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
int PST_BExchange_mp_init(int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, BExchange*);
int PST_BExchange_mp_start(void*, BExchange*);
int PST_BExchange_mp_end(void*, BExchange*);
int PST_BExchange_mp_free(BExchange*);
int PST_BExchange_mp(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
int P_BExchange_opt_init(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, BExchange*, int);
int P_BExchange_opt_start(void*, BExchange*, int);
int P_BExchange_opt_end(void*, BExchange*, int);
int P_BExchange_opt_free(BExchange*, int);
int P_BExchange_opt(void*, int, int*, int*, int, int*, MPI_Datatype,
MPI_Comm, int, int);
// -------------------------------------------------------------------------- //
typedef struct Transposition {
void *psrc, *pdest; // used only in persistent-type communications
MPI_Datatype *stype, *rtype;
int *sbeg, *rbeg;
MPI_Comm comm;
int nproc, iproc;
MPI_Request *req;
MPI_Aint fsize;
// manual packing data
// --------------------------------- //
int ndims;
int sstride[MAX_PARLIB_MP_DIMS], rstride[MAX_PARLIB_MP_DIMS];
int **sdims, **rdims;
int *ssize, *rsize;
void **sbuf, **rbuf; // [nproc] buffers
// --------------------------------- //
// memory management
// --------------------------------- //
int *mem_dims; // [nproc * ndims] pool
void *mem_sbuf, *mem_rbuf; // [nproc * sum(msize)] pools
int buf_id[9];
// --------------------------------- //
} Transposition;
// Transpose list [declaration]
// -------------------------------------------------------------------------- //
#define MAX_TRANSP_HANDLES 128
extern Transposition* transp_hlist[MAX_TRANSP_HANDLES]; // list of Transpose handles
extern int transp_hmode[MAX_TRANSP_HANDLES]; // Transpose mode for each handle
extern int transp_hptr; // pointer to available Transpose handle
// -------------------------------------------------------------------------- //
// Transposition handle list interface
// -------------------------------------------------------------------------- //
int save_transp_handle(Transposition *transp, int exch_mode);
void get_transp_handle(Transposition** transp, int* exch_mode, int exch_id);
void remove_transp_handle(int exch_id);
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_init ( int , int, int*, int, int*, int*, int*, int*, int P_Transpose_init ( int , int, int*, int, int*, int*, int*, int*,
MPI_Datatype, MPI_Comm, int, Transposition* ); MPI_Datatype, MPI_Comm, int, Transposition* );
int P_Transpose_start ( void*, void*, Transposition* ); int P_Transpose_start ( void*, void*, Transposition* );
...@@ -34,3 +168,50 @@ int P_Transpose_end ( Transposition* ); ...@@ -34,3 +168,50 @@ int P_Transpose_end ( Transposition* );
int P_Transpose_free ( Transposition* ); int P_Transpose_free ( Transposition* );
int P_Transpose ( int, void*, int, int*, void*, int, int*, int*, int*, int P_Transpose ( int, void*, int, int*, void*, int, int*, int*, int*,
int*, MPI_Datatype, MPI_Comm, int ); int*, MPI_Datatype, MPI_Comm, int );
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges //
// -------------------------------------------------------------------------- //
int PST_Transpose_init(int, void*, int, int*, void*, int, int*, int*, int*, int*,
MPI_Datatype, MPI_Comm, int, Transposition*);
int PST_Transpose_start(Transposition*);
int PST_Transpose_end(Transposition*);
int PST_Transpose_free(Transposition*);
int PST_Transpose(int, void*, int, int*, void*, int, int*, int*, int*,
int*, MPI_Datatype, MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.7 - manual packing //
// -------------------------------------------------------------------------- //
int P_Transpose_mp_init(int, int, int*, int, int*, int*, int*, int*,
MPI_Datatype, MPI_Comm, int, Transposition*);
int P_Transpose_mp_start(void*, void*, Transposition*);
int P_Transpose_mp_end(void*, void*, Transposition*);
int P_Transpose_mp_free(Transposition*);
int P_Transpose_mp(int, void*, int, int*, void*, int, int*, int*, int*,
int*, MPI_Datatype, MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp_init(int, void*, int, int*, void*, int, int*, int*, int*, int*,
MPI_Datatype, MPI_Comm, int, Transposition*);
int PST_Transpose_mp_start(void*, void*, Transposition*);
int PST_Transpose_mp_end(void*, void*, Transposition*);
int PST_Transpose_mp_free(Transposition*);
int PST_Transpose_mp(int, void*, int, int*, void*, int, int*, int*, int*,
int*, MPI_Datatype, MPI_Comm, int);
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
int P_Transpose_opt_init(int, void*, int, int*, void*, int, int*, int*, int*, int*,
MPI_Datatype, MPI_Comm, int, Transposition*, int);
int P_Transpose_opt_start(void*, void*, Transposition*, int);
int P_Transpose_opt_end(void*, void*, Transposition*, int);
int P_Transpose_opt_free(Transposition*, int);
int P_Transpose_opt(int, void*, int, int*, void*, int, int*, int*, int*,
int*, MPI_Datatype, MPI_Comm, int, int);
// -------------------------------------------------------------------------- //
#include <stdlib.h>
#include "parlib.h" #include "parlib.h"
// Additionall calls to make this version 1.1 compliant with version 2.1 // ParLib v1.8 initialization
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE #ifdef FORTRANUNDERSCORE
void parlib_init_() void parlib_init_()
...@@ -10,6 +11,7 @@ void parlib_init__() ...@@ -10,6 +11,7 @@ void parlib_init__()
void parlib_init() void parlib_init()
#endif #endif
{ {
ParLib_init();
} }
#ifdef FORTRANUNDERSCORE #ifdef FORTRANUNDERSCORE
...@@ -20,5 +22,6 @@ void parlib_deinit__() ...@@ -20,5 +22,6 @@ void parlib_deinit__()
void parlib_deinit() void parlib_deinit()
#endif #endif
{ {
ParLib_deinit();
} }
// -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- //
INTEGER HANDLE_SIZE INTEGER HANDLE_SIZE
PARAMETER (HANDLE_SIZE = 2) PARAMETER (HANDLE_SIZE = 2)
INTEGER MAX_PARLIB_MP_DIMS
PARAMETER (MAX_PARLIB_MP_DIMS = 6)
INTEGER IS_MPI_TYPED
PARAMETER (IS_MPI_TYPED = 0)
INTEGER IS_MPI_MANUAL_PACK
PARAMETER (IS_MPI_MANUAL_PACK = 1)
INTEGER IS_MPI_TYPED_PERSISTENT
PARAMETER (IS_MPI_TYPED_PERSISTENT = 2)
INTEGER IS_MPI_MANUAL_PACK_PERSISTENT
PARAMETER (IS_MPI_MANUAL_PACK_PERSISTENT = 3)
#include "plutils.h"
#include <stdlib.h>
#include <string.h>
// parlib buffers [definition]
// -------------------------------------------------------------------------- //
void *plbuf[MAX_PL_BUFS];
int plbuf_size[MAX_PL_BUFS];
int plbuf_status[MAX_PL_BUFS];
int plbuf_ptr;
// -------------------------------------------------------------------------- //
// using loop_count pragma expectations
// ----------------------------------------------------------------------------------- //
#define CP_EXPECT_5D_J1 21 // number of vertical levels[1]
#define CP_EXPECT_5D_J2 73 // number of vertical levels[2]
#define CP_EXPECT_5D_K1 1 // number of variables[1]
#define CP_EXPECT_5D_K2 2 // number of variables[2]
#define CP_EXPECT_5D_K3 5 // number of variables[3]
#define CP_EXPECT_5D_K4 10 // number of variables[4]
#define CP_EXPECT_5D_Q1 1 // number of time scices[1]
#define CP_EXPECT_5D_Q2 2 // number of time scices[2]
// ----------------------------------------------------------------------------------- //
// buffer memory interface
// ----------------------------------------------------------------------------------- //
void init_plbuf()
{
int k;
for (k = 0; k < MAX_PL_BUFS; k++) {
plbuf_size[k] = 0;
plbuf_status[k] = 0;
}
plbuf_ptr = 0;
}
void deinit_plbuf()
{
int k;
for (k = 0; k < MAX_PL_BUFS; k++) {
if (plbuf_size[k] > 0) {
free(plbuf[k]);
plbuf_size[k] = 0;
}
plbuf_status[k] = 0;
}
plbuf_ptr = 0;
}
void* get_plbuf(int msize, int* id)
{
int k, kbeg = plbuf_ptr;
for (k = kbeg; k < MAX_PL_BUFS; k++) {
if (!plbuf_status[k]) {
if (msize > plbuf_size[k]) {
if (plbuf_size[k] > 0) free(plbuf[k]);
plbuf_size[k] = msize;
plbuf[k] = (void*)malloc(plbuf_size[k]);
}
plbuf_status[k] = 1;
plbuf_ptr = k + 1;
(*id) = k;
return plbuf[k];
}
}
// no free buffer found:
(*id) = MAX_PL_BUFS;
return (void*)malloc(msize);
}
void free_plbuf(void* ptr, int id)
{
if (id < 0) return;
if (id >= MAX_PL_BUFS) {
free(ptr);
return;
}
plbuf_status[id] = 0;
if (id < plbuf_ptr) plbuf_ptr = id;
}
// ----------------------------------------------------------------------------------- //
// 1D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_1d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx)
{
int i;
if (nx < MIN_MEMCPY_BLOCK)
for (i = 0; i < nx; i++)
buf[i] = a[i];
else
memcpy(buf, a, nx * sizeof(char));
}
inline void copy_from_buffer_1d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx)
{
int i;
if (nx < MIN_MEMCPY_BLOCK)
for (i = 0; i < nx; i++)
a[i] = buf[i];
else
memcpy(a, buf, nx * sizeof(char));
}
// ----------------------------------------------------------------------------------- //
// 2D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_2d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx, const int ny,
const int shx)
{
int i, j, idx = 0, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
buf[bidx + i] = a[idx + i];
}
else
{
const int nbx = nx * sizeof(char);
for (j = 0; j < ny; j++, bidx += nx) {
idx = j * shx;
memcpy(&buf[bidx], &a[idx], nbx);
}
}
}
inline void copy_from_buffer_2d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx, const int ny,
const int shx)
{
int i, j, idx = 0, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
a[idx + i] = buf[bidx + i];
}
else
{
const int nbx = nx * sizeof(char);
for (j = 0; j < ny; j++, bidx += nx) {
idx = j * shx;
memcpy(&a[idx], &buf[bidx], nbx);
}
}
}
// ----------------------------------------------------------------------------------- //
// 3D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_3d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx, const int ny, const int nz,
const int shx, const int shxy)
{
int i, j, k, idx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (k = 0; k < nz; k++)
{
idx = k * shxy;
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
buf[bidx + i] = a[idx + i];
}
}
else
{
const int nbx = nx * sizeof(char);
for (k = 0; k < nz; k++)
{
idx = k * shxy;
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&buf[bidx], &a[idx], nbx);
}
}
}
}
inline void copy_from_buffer_3d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx, const int ny, const int nz,
const int shx, const int shxy)
{
int i, j, k, idx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (k = 0; k < nz; k++)
{
idx = k * shxy;
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
a[idx + i] = buf[bidx + i];
}
}
else
{
const int nbx = nx * sizeof(char);
for (k = 0; k < nz; k++)
{
idx = k * shxy;
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&a[idx], &buf[bidx], nbx);
}
}
}
}
// ----------------------------------------------------------------------------------- //
// 4D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_4d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx, const int ny, const int nz, const int np,
const int shx, const int shxy, const int shxyz)
{
int i, j, k, p, idx, shidx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (p = 0; p < np; p++)
{
shidx = p * shxyz;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx += shxy)
{
idx = shidx;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
buf[bidx + i] = a[idx + i];
}
}
}
else
for (p = 0; p < np; p++)
{
shidx = p * shxyz;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx += shxy)
{
idx = shidx;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&buf[bidx], &a[idx], nx * sizeof(char));
}
}
}
}
inline void copy_from_buffer_4d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx, const int ny, const int nz, const int np,
const int shx, const int shxy, const int shxyz)
{
int i, j, k, p, idx, shidx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (p = 0; p < np; p++)
{
shidx = p * shxyz;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx += shxy)
{
idx = shidx;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
a[idx + i] = buf[bidx + i];
}
}
}
else
for (p = 0; p < np; p++)
{
shidx = p * shxyz;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx += shxy)
{
idx = shidx;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&a[idx], &buf[bidx], nx * sizeof(char));
}
}
}
}
// ----------------------------------------------------------------------------------- //
// 5D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_5d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx, const int ny, const int nz, const int np, const int nq,
const int shx, const int shxy, const int shxyz, const int shxyzp)
{
int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
#endif
for (q = 0; q < nq; q++)
{
shidx_q = q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
buf[bidx + i] = a[idx + i];
}
}
}
}
else
{
const int nbx = nx * sizeof(char);
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
#endif
for (q = 0; q < nq; q++)
{
shidx_q = q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&buf[bidx], &a[idx], nbx);
}
}
}
}
}
}
inline void copy_from_buffer_5d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx, const int ny, const int nz, const int np, const int nq,
const int shx, const int shxy, const int shxyz, const int shxyzp)
{
int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
#endif
for (q = 0; q < nq; q++)
{
shidx_q = q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
a[idx + i] = buf[bidx + i];
}
}
}
}
else
{
const int nbx = nx * sizeof(char);
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2)
#endif
for (q = 0; q < nq; q++)
{
shidx_q = q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4)
#endif
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
#if defined(__INTEL_COMPILER)
#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2)
#endif
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&a[idx], &buf[bidx], nbx);
}
}
}
}
}
}
// ----------------------------------------------------------------------------------- //
// 6D copy
// ----------------------------------------------------------------------------------- //
inline void copy_to_buffer_6d(char* _RESTRICT buf, const char* _RESTRICT const a,
const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
{
int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (s = 0; s < ns; s++)
for (q = 0; q < nq; q++)
{
shidx_q = s * shxyzpq + q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
buf[bidx + i] = a[idx + i];
}
}
}
}
else
{
const int nbx = nx * sizeof(char);
for (s = 0; s < ns; s++)
for (q = 0; q < nq; q++)
{
shidx_q = s * shxyzpq + q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&buf[bidx], &a[idx], nbx);
}
}
}
}
}
}
inline void copy_from_buffer_6d(char* _RESTRICT a, const char* _RESTRICT const buf,
const int nx, const int ny, const int nz, const int np, const int nq, const int ns,
const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq)
{
int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0;
if (nx < MIN_MEMCPY_BLOCK)
for (s = 0; s < ns; s++)
for (q = 0; q < nq; q++)
{
shidx_q = s * shxyzpq + q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
for (j = 0; j < ny; j++, idx += shx, bidx += nx)
for (i = 0; i < nx; i++) {
a[idx + i] = buf[bidx + i];
}
}
}
}
else
{
const int nbx = nx * sizeof(char);
for (s = 0; s < ns; s++)
for (q = 0; q < nq; q++)
{
shidx_q = s * shxyzpq + q * shxyzp;
for (p = 0; p < np; p++, shidx_q += shxyz)
{
shidx_p = shidx_q;
for (k = 0; k < nz; k++, shidx_p += shxy)
{
idx = shidx_p;
for (j = 0; j < ny; j++, idx += shx, bidx += nx) {
memcpy(&a[idx], &buf[bidx], nbx);
}
}
}
}
}
}
// ----------------------------------------------------------------------------------- //
// COPY-TO
// ----------------------------------------------------------------------------------- //
void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
const int ndims,
const int* _RESTRICT const msgdim,
const int* _RESTRICT const stride,
const int fsize)
{
if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
if (ndims == 1) {
const int nx = msgdim[0] * fsize;
copy_to_buffer_1d(buf, a, nx);
return;
}
if (ndims == 2) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1];
const int shx = stride[0] * fsize;
copy_to_buffer_2d(buf, a, nx, ny,
shx);
return;
}
if (ndims == 3) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
copy_to_buffer_3d(buf, a, nx, ny, nz,
shx, shxy);
return;
}
if (ndims == 4) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
copy_to_buffer_4d(buf, a, nx, ny, nz, np,
shx, shxy, shxyz);
return;
}
if (ndims == 5) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3], nq = msgdim[4];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
const int shxyzp = stride[3] * shxyz;
copy_to_buffer_5d(buf, a, nx, ny, nz, np, nq,
shx, shxy, shxyz, shxyzp);
return;
}
if (ndims == 6) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3], nq = msgdim[4],
ns = msgdim[5];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
const int shxyzp = stride[3] * shxyz;
const int shxyzpq = stride[4] * shxyzp;
copy_to_buffer_6d(buf, a, nx, ny, nz, np, nq, ns,
shx, shxy, shxyz, shxyzp, shxyzpq);
return;
}
}
// ----------------------------------------------------------------------------------- //
// COPY-FROM
// ----------------------------------------------------------------------------------- //
void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
const int ndims,
const int* _RESTRICT const msgdim,
const int* _RESTRICT const stride,
const int fsize)
{
if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return;
if (ndims == 1) {
const int nx = msgdim[0] * fsize;
copy_from_buffer_1d(a, buf, nx);
return;
}
if (ndims == 2) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1];
const int shx = stride[0] * fsize;
copy_from_buffer_2d(a, buf, nx, ny,
shx);
return;
}
if (ndims == 3) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
copy_from_buffer_3d(a, buf, nx, ny, nz,
shx, shxy);
return;
}
if (ndims == 4) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
copy_from_buffer_4d(a, buf, nx, ny, nz, np,
shx, shxy, shxyz);
return;
}
if (ndims == 5) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3], nq = msgdim[4];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
const int shxyzp = stride[3] * shxyz;
copy_from_buffer_5d(a, buf, nx, ny, nz, np, nq,
shx, shxy, shxyz, shxyzp);
return;
}
if (ndims == 6) {
const int nx = msgdim[0] * fsize,
ny = msgdim[1], nz = msgdim[2],
np = msgdim[3], nq = msgdim[4], ns = msgdim[5];
const int shx = stride[0] * fsize;
const int shxy = stride[1] * shx;
const int shxyz = stride[2] * shxy;
const int shxyzp = stride[3] * shxyz;
const int shxyzpq = stride[4] * shxyzp;
copy_from_buffer_6d(a, buf, nx, ny, nz, np, nq, ns,
shx, shxy, shxyz, shxyzp, shxyzpq);
return;
}
}
// ----------------------------------------------------------------------------------- //
#pragma once
#define MAX_PARLIB_MP_DIMS 6 // maximum number of dims for manual packing
#define MIN_MEMCPY_BLOCK 256 // minimum block (in bytes) for memcpy copy (magic number)
#define MAX_PL_BUFS 4096 // maximum number of parlib internal buffers
// _RESTRICT definition
// ------------------------------------------------------------------- //
#if defined(__INTEL_COMPILER)
#define _RESTRICT restrict
#elif defined(__GNUC__) && !defined(_WIN32) && !defined(_CYGWIN32__)
#define _RESTRICT __restrict__
#elif defined(_MSC_VER)
#define _RESTRICT __restrict
#else
#define _RESTRICT
#endif
// ------------------------------------------------------------------- //
// parlib buffers [declaration]
// -------------------------------------------------------------------------- //
extern void *plbuf[MAX_PL_BUFS];
extern int plbuf_size[MAX_PL_BUFS];
extern int plbuf_status[MAX_PL_BUFS];
extern int plbuf_ptr;
// -------------------------------------------------------------------------- //
void init_plbuf();
void deinit_plbuf();
void* get_plbuf(int msize, int* id);
void free_plbuf(void* ptr, int id);
// -------------------------------------------------------------------------- //
void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a,
const int ndims,
const int* _RESTRICT const msgdim,
const int* _RESTRICT const stride,
const int fsize);
void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf,
const int ndims,
const int* _RESTRICT const msgdim,
const int* _RESTRICT const stride,
const int fsize);
// -------------------------------------------------------------------------- //
#include <stdlib.h>
#include "parlib.h" #include "parlib.h"
#include "plutils.h"
#include <stdlib.h>
#include <string.h>
/*
* Error codes:
* 0 - success
* 1 - number of dimensions < 2
* 2 - wrong communicated dimension [source]
* 3 - wrong communicated dimension [dest]
* 4 - wrong communicated dimensions [source == dest]
* 5 - nonpositive dimension
* 6 - negative boundary width
* 7 - number of dimensions exceeds maximum value (only for MP - manual packing)
*/
// -------------------------------------------------------------------------- //
int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period, lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp ) transp )
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap; int *blklen, *overlap;
MPI_Datatype datatype; MPI_Datatype datatype;
...@@ -14,12 +32,18 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -14,12 +32,18 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
int idim, nproc, iproc, ip, strd, count; int idim, nproc, iproc, ip, strd, count;
int wblka, wblkb, begb; int wblka, wblkb, begb;
int ifsta, ifstb, idir, suma, sumb; int ifsta, ifstb, idir, suma, sumb;
MPI_Aint fsize; MPI_Aint lb, fsize;
MPI_Datatype oldtype, *stype, *rtype; MPI_Datatype oldtype, *stype, *rtype;
int *sbeg, *rbeg; int *sbeg, *rbeg;
/*
* Check input parameters // Setting degenerate-success cases conditions for consistency
*/ // including: nproc=0, iproc=MPI_UNDEFINED
transp->nproc = 0;
transp->iproc = MPI_UNDEFINED;
//
// Check input parameters
//
if (ndims < 2) { return 1; } if (ndims < 2) { return 1; }
if (dim_source < 1 || dim_source > ndims) { return 2; } if (dim_source < 1 || dim_source > ndims) { return 2; }
if (dim_dest < 1 || dim_dest > ndims) { return 3; } if (dim_dest < 1 || dim_dest > ndims) { return 3; }
...@@ -30,9 +54,11 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -30,9 +54,11 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
for (idir = 0; idir < 2; idir++) { for (idir = 0; idir < 2; idir++) {
if (overlap[idir] < 0) { return 6; } if (overlap[idir] < 0) { return 6; }
} }
/* //
* Define the number of processors in the group and the rank // Define the number of processors in the group and the rank
*/ //
if (comm == MPI_COMM_NULL) { return 0; } // empty communicator
MPI_Comm_size(comm, &nproc); MPI_Comm_size(comm, &nproc);
if (nproc == 0) { return 0; } if (nproc == 0) { return 0; }
MPI_Comm_rank(comm, &iproc); MPI_Comm_rank(comm, &iproc);
...@@ -58,19 +84,24 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -58,19 +84,24 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
if (overlap[0] > lblks_dest[0]) { return 12; } if (overlap[0] > lblks_dest[0]) { return 12; }
if (overlap[1] > lblks_dest[nproc - 1]) { return 13; } if (overlap[1] > lblks_dest[nproc - 1]) { return 13; }
MPI_Type_extent ( datatype, &fsize ); MPI_Type_get_extent(datatype, &lb, &fsize);
/* //
* Allocate memory // Allocate memory
*/ //
stype = transp->stype = stype = transp->stype = (MPI_Datatype *)get_plbuf(2 * nproc * sizeof(MPI_Datatype),
(MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc ); &transp->buf_id[0]);
rtype = transp->rtype = rtype = transp->rtype = &transp->stype[nproc];
(MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc );
sbeg = transp->sbeg = (int *) malloc ( sizeof(int)*nproc ); sbeg = transp->sbeg = (int*)get_plbuf(2 * nproc * sizeof(int),
rbeg = transp->rbeg = (int *) malloc ( sizeof(int)*nproc ); &transp->buf_id[1]);
/* rbeg = transp->rbeg = &transp->sbeg[nproc];
* Define data types for the blocks and the beginings of the blocks
*/ transp->req = (MPI_Request *)get_plbuf(2 * nproc * sizeof(MPI_Request),
&transp->buf_id[2]);
//
// Define data types for the blocks and the beginings of the blocks
//
ifsta = ifstb = 1; ifsta = ifstb = 1;
for (ip = 0; ip < nproc; ip++) { for (ip = 0; ip < nproc; ip++) {
wblka = lblks_source[iproc]; wblka = lblks_source[iproc];
...@@ -78,22 +109,41 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -78,22 +109,41 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
if (ip > 0 || period) wblkb += overlap[0]; if (ip > 0 || period) wblkb += overlap[0];
if (ip < nproc - 1 || period) wblkb += overlap[1]; if (ip < nproc - 1 || period) wblkb += overlap[1];
oldtype = datatype; if (dim_source - 1 == 0) {
strd = 1; count = wblka;
for ( idim = 0; idim < ndims; idim++ ) { }
if ( idim == dim_source-1 ) { else if (dim_dest - 1 == 0) {
count = wblkb;
}
else {
count = blklen[0];
}
MPI_Type_contiguous(count, datatype, stype + ip);
oldtype = stype[ip];
if (dim_source - 1 == 0) {
strd = blklen[0];
}
else {
strd = stride[0];
}
for (idim = 1; idim < ndims; idim++) {
if (dim_source - 1 == idim) {
count = wblka; count = wblka;
} else if ( idim == dim_dest-1 ) { }
else if (dim_dest - 1 == idim) {
count = wblkb; count = wblkb;
} else { }
else {
count = blklen[idim]; count = blklen[idim];
} }
MPI_Type_hvector ( count, 1, strd*fsize, oldtype, stype+ip ); MPI_Type_create_hvector(count, 1, strd*fsize, oldtype, stype + ip);
if ( idim > 0 ) { MPI_Type_free ( &oldtype ); } MPI_Type_free(&oldtype);
oldtype = stype[ip]; oldtype = stype[ip];
if (idim == dim_source - 1) { if (idim == dim_source - 1) {
strd *= blklen[idim]; strd *= blklen[idim];
} else { }
else {
strd *= stride[idim]; strd *= stride[idim];
} }
} }
...@@ -103,22 +153,42 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -103,22 +153,42 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
wblkb = lblks_dest[iproc]; wblkb = lblks_dest[iproc];
if (iproc > 0 || period) wblkb += overlap[0]; if (iproc > 0 || period) wblkb += overlap[0];
if (iproc < nproc - 1 || period) wblkb += overlap[1]; if (iproc < nproc - 1 || period) wblkb += overlap[1];
oldtype = datatype;
strd = 1; if (dim_source - 1 == 0) {
for ( idim = 0; idim < ndims; idim++ ) { count = wblka;
if ( idim == dim_source-1 ) { }
else if (dim_dest - 1 == 0) {
count = wblkb;
}
else {
count = blklen[0];
}
MPI_Type_contiguous(count, datatype, rtype + ip);
oldtype = rtype[ip];
if (dim_dest - 1 == 0) {
strd = blklen[0];
}
else {
strd = stride[0];
}
for (idim = 1; idim < ndims; idim++) {
if (dim_source - 1 == idim) {
count = wblka; count = wblka;
} else if ( idim == dim_dest-1 ) { }
else if (dim_dest - 1 == idim) {
count = wblkb; count = wblkb;
} else { }
else {
count = blklen[idim]; count = blklen[idim];
} }
MPI_Type_hvector ( count, 1, strd*fsize, oldtype, rtype+ip ); MPI_Type_create_hvector(count, 1, strd*fsize, oldtype, rtype + ip);
if ( idim > 0 ) { MPI_Type_free ( &oldtype ); } MPI_Type_free(&oldtype);
oldtype = rtype[ip]; oldtype = rtype[ip];
if (idim == dim_dest - 1) { if (idim == dim_dest - 1) {
strd *= blklen[idim]; strd *= blklen[idim];
} else { }
else {
strd *= stride[idim]; strd *= stride[idim];
} }
} }
...@@ -130,7 +200,8 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -130,7 +200,8 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
for (idim = 0; idim < dim_dest - 1; idim++) { for (idim = 0; idim < dim_dest - 1; idim++) {
if (idim == dim_source - 1) { if (idim == dim_source - 1) {
strd *= blklen[idim]; strd *= blklen[idim];
} else { }
else {
strd *= stride[idim]; strd *= stride[idim];
} }
} }
...@@ -143,7 +214,8 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -143,7 +214,8 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
for (idim = 0; idim < dim_source - 1; idim++) { for (idim = 0; idim < dim_source - 1; idim++) {
if (idim == dim_dest - 1) { if (idim == dim_dest - 1) {
strd *= blklen[idim]; strd *= blklen[idim];
} else { }
else {
strd *= stride[idim]; strd *= stride[idim];
} }
} }
...@@ -157,102 +229,84 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -157,102 +229,84 @@ int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
transp->fsize = fsize; transp->fsize = fsize;
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_start ( arr_source, arr_dest, transp ) int P_Transpose_start ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest; void *arr_source, *arr_dest;
Transposition *transp; Transposition *transp;
{ {
char *arr_source_ch = (char *) arr_source; char *arr_source_ch = (char *) arr_source;
char *arr_dest_ch = (char *) arr_dest; char *arr_dest_ch = (char *) arr_dest;
int nproc = transp->nproc;
int iproc = transp->iproc;
MPI_Aint fsize = transp->fsize;
int *sbeg = transp->sbeg;
int *rbeg = transp->rbeg;
MPI_Datatype *stype = transp->stype;
MPI_Datatype *rtype = transp->rtype;
MPI_Comm comm = transp->comm;
MPI_Request *sreq, *rreq;
int ip; int ip;
char *src, *dest; char *src, *dest;
if ( nproc == 0 ) { return 0; } if (transp->nproc == 0) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; } if (transp->iproc == MPI_UNDEFINED) { return 0; }
/*
* Allocate memory //
*/ // Start the communication
sreq = transp->sreq = //
(MPI_Request *) malloc ( sizeof(MPI_Request)*nproc ); for (ip = 0; ip < transp->nproc; ip++) {
rreq = transp->rreq = dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
(MPI_Request *) malloc ( sizeof(MPI_Request)*nproc ); src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
/* if ((dest == src) && (transp->iproc == ip)) {
* Start the communication transp->req[ip] = MPI_REQUEST_NULL;
*/ transp->req[transp->nproc + ip] = MPI_REQUEST_NULL;
for ( ip = 0; ip < nproc; ip++ ) { }
dest = arr_dest_ch+rbeg[ip]*fsize; else {
src = arr_source_ch+sbeg[ip]*fsize; MPI_Irecv(dest, 1, transp->rtype[ip],
if ( dest == src && iproc == ip ) { ip, 0, transp->comm,
rreq[ip] = sreq[ip] = MPI_REQUEST_NULL; &transp->req[transp->nproc + ip]);
} else {
MPI_Irecv ( dest, 1, rtype[ip], ip, 0, comm, rreq+ip ); MPI_Isend(src, 1, transp->stype[ip],
MPI_Isend ( src, 1, stype[ip], ip, 0, comm, sreq+ip ); ip, 0, transp->comm,
&transp->req[ip]);
} }
} }
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_end ( transp ) int P_Transpose_end ( transp )
Transposition *transp; Transposition *transp;
{ {
int nproc = transp->nproc; if (transp->nproc == 0) { return 0; }
int iproc = transp->iproc; if (transp->iproc == MPI_UNDEFINED) { return 0; }
MPI_Request *sreq = transp->sreq;
MPI_Request *rreq = transp->rreq;
int ip;
MPI_Status status;
if ( nproc == 0 ) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; }
for ( ip = 0; ip < nproc; ip++ ) { MPI_Waitall(2 * transp->nproc, transp->req, MPI_STATUSES_IGNORE);
MPI_Wait ( rreq+ip, &status );
MPI_Wait ( sreq+ip, &status );
}
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_free ( transp ) int P_Transpose_free ( transp )
Transposition *transp; Transposition *transp;
{ {
int nproc = transp->nproc;
int iproc = transp->iproc;
int *sbeg = transp->sbeg;
int *rbeg = transp->rbeg;
MPI_Datatype *stype = transp->stype;
MPI_Datatype *rtype = transp->rtype;
MPI_Request *sreq = transp->sreq;
MPI_Request *rreq = transp->rreq;
int ip; int ip;
if ( nproc == 0 ) { return 0; } if (transp->nproc == 0) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; } if (transp->iproc == MPI_UNDEFINED) { return 0; }
for ( ip = 0; ip < nproc; ip++ ) { for (ip = 0; ip < transp->nproc; ip++) {
MPI_Type_free ( rtype+ip ); MPI_Type_free(transp->rtype + ip);
MPI_Type_free ( stype+ip ); MPI_Type_free(transp->stype + ip);
} }
free ( stype );
free ( rtype ); free_plbuf(transp->stype, transp->buf_id[0]);
free ( sbeg ); free_plbuf(transp->sbeg, transp->buf_id[1]);
free ( rbeg ); free_plbuf(transp->req, transp->buf_id[2]);
free ( sreq );
free ( rreq );
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period ) period )
void *arr_source, *arr_dest; void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap; int *blklen, *overlap;
...@@ -261,6 +315,7 @@ int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, ...@@ -261,6 +315,7 @@ int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
int period; int period;
{ {
Transposition transp; Transposition transp;
int ierr; int ierr;
if (ierr = P_Transpose_init(ndims, dim_source, lblks_source, if (ierr = P_Transpose_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
...@@ -273,3 +328,861 @@ int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, ...@@ -273,3 +328,861 @@ int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
P_Transpose_free(&transp); P_Transpose_free(&transp);
return 0; return 0;
} }
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges //
// -------------------------------------------------------------------------- //
int PST_Transpose_init ( ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
Transposition *transp;
{
char *arr_source_ch = (char *)arr_source;
char *arr_dest_ch = (char *)arr_dest;
int ip;
char *src, *dest;
int ierr = P_Transpose_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, transp);
if (ierr != 0) return ierr;
transp->psrc = arr_source;
transp->pdest = arr_dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
//
// Setup the communication
//
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
continue;
}
else {
MPI_Recv_init(dest, 1, transp->rtype[ip],
ip, 0, transp->comm,
&transp->req[transp->nproc + ip]);
MPI_Send_init(src, 1, transp->stype[ip],
ip, 0, transp->comm,
&transp->req[ip]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_start ( transp )
Transposition *transp;
{
char *arr_source_ch = (char *)transp->psrc;
char *arr_dest_ch = (char *)transp->pdest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
//
// Start the communication
//
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
transp->req[ip] = MPI_REQUEST_NULL;
transp->req[transp->nproc + ip] = MPI_REQUEST_NULL;
}
else {
MPI_Start(&transp->req[transp->nproc + ip]);
MPI_Start(&transp->req[ip]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_end ( transp )
Transposition *transp;
{
return P_Transpose_end(transp);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_free ( transp )
Transposition *transp;
{
char *arr_source_ch = (char *)transp->psrc;
char *arr_dest_ch = (char *)transp->pdest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
// we have to free persistent requests first...
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
continue;
}
else {
MPI_Request_free(&transp->req[transp->nproc + ip]);
MPI_Request_free(&transp->req[ip]);
}
}
return P_Transpose_free(transp);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
Transposition transp;
int ierr;
if (ierr = PST_Transpose_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, &transp) != 0)
{
return ierr;
}
PST_Transpose_start(&transp);
PST_Transpose_end(&transp);
PST_Transpose_free(&transp);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.7 - manual packing //
// -------------------------------------------------------------------------- //
int P_Transpose_mp_init ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp )
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
Transposition *transp;
{
int idim, nproc, iproc, ip, strd, count;
int wblka, wblkb, begb;
int ifsta, ifstb, idir, suma, sumb;
MPI_Aint lb, fsize;
MPI_Datatype oldtype, *stype, *rtype;
int *sbeg, *rbeg;
int **sdims, **rdims;
int *ssize, *rsize;
void **sbuf, **rbuf;
int sbuf_size, rbuf_size, ssh, rsh;
// Setting degenerate-success cases conditions for consistency
// including: nproc=0, iproc=MPI_UNDEFINED
transp->nproc = 0;
transp->iproc = MPI_UNDEFINED;
//
// Check input parameters
//
if (ndims < 2) { return 1; }
if (dim_source < 1 || dim_source > ndims) { return 2; }
if (dim_dest < 1 || dim_dest > ndims) { return 3; }
if (dim_source == dim_dest) { return 4; }
for (idim = 0; idim < ndims; idim++) {
if (stride[idim] <= 0) { return 5; }
}
for (idir = 0; idir < 2; idir++) {
if (overlap[idir] < 0) { return 6; }
}
if (ndims > MAX_PARLIB_MP_DIMS) return 7;
//
// Define the number of processors in the group and the rank
//
if (comm == MPI_COMM_NULL) { return 0; } // empty communicator
MPI_Comm_size(comm, &nproc);
if (nproc == 0) { return 0; }
MPI_Comm_rank(comm, &iproc);
if (iproc == MPI_UNDEFINED) { return 0; }
suma = sumb = 0;
for (ip = 0; ip < nproc; ip++) {
suma += lblks_source[ip];
sumb += lblks_dest[ip];
if (lblks_source[ip] <= 0) { return 14; }
if (lblks_dest[ip] <= 0) { return 15; }
}
if (lblks_source[iproc] > blklen[dim_source - 1]) { return 8; }
if (lblks_dest[iproc] > blklen[dim_dest - 1]) { return 9; }
if (suma > stride[dim_source - 1]) { return 10; }
if (sumb > stride[dim_dest - 1]) { return 11; }
for (idim = 0; idim < ndims; idim++) {
if (idim != dim_source - 1 && idim != dim_dest - 1) {
if (blklen[idim] > stride[idim]) { return 7; }
}
}
if (overlap[0] > lblks_dest[0]) { return 12; }
if (overlap[1] > lblks_dest[nproc - 1]) { return 13; }
MPI_Type_get_extent(datatype, &lb, &fsize);
//
// Allocate memory
//
stype = transp->stype = (MPI_Datatype *)get_plbuf(2 * nproc * sizeof(MPI_Datatype),
&transp->buf_id[0]);
rtype = transp->rtype = &transp->stype[nproc];
sbeg = transp->sbeg = (int*)get_plbuf(2 * nproc * sizeof(int),
&transp->buf_id[1]);
rbeg = transp->rbeg = &transp->sbeg[nproc];
transp->req = (MPI_Request *)get_plbuf(2 * nproc * sizeof(MPI_Request),
&transp->buf_id[2]);
sdims = transp->sdims = (int**)get_plbuf(2 * nproc * sizeof(int*),
&transp->buf_id[3]);
rdims = transp->rdims = &transp->sdims[nproc];
ssize = transp->ssize = (int*)get_plbuf(2 * nproc * sizeof(int),
&transp->buf_id[4]);
rsize = transp->rsize = &transp->ssize[nproc];
sbuf = transp->sbuf = (void**)get_plbuf(2 * nproc * sizeof(void*),
&transp->buf_id[5]);
rbuf = transp->rbuf = &transp->sbuf[nproc];
transp->mem_dims = (int*)get_plbuf(2 * nproc * ndims * sizeof(int),
&transp->buf_id[6]);
for (ip = 0; ip < nproc; ip++) {
sdims[ip] = transp->sdims[ip] = &transp->mem_dims[ip * ndims];
rdims[ip] = transp->rdims[ip] = &transp->mem_dims[(nproc + ip) * ndims];
}
//
// Define data types for the blocks and the beginings of the blocks
//
sbuf_size = 0;
rbuf_size = 0;
ifsta = ifstb = 1;
for (ip = 0; ip < nproc; ip++) {
wblka = lblks_source[iproc];
wblkb = lblks_dest[ip];
if (ip > 0 || period) wblkb += overlap[0];
if (ip < nproc - 1 || period) wblkb += overlap[1];
ssize[ip] = 1;
stype[ip] = datatype;
for (idim = 0; idim < ndims; idim++) {
if (dim_source - 1 == idim) {
count = wblka;
}
else if (dim_dest - 1 == idim) {
count = wblkb;
}
else {
count = blklen[idim];
}
sdims[ip][idim] = count;
ssize[ip] *= count;
}
sbuf_size += ssize[ip];
wblka = lblks_source[ip];
wblkb = lblks_dest[iproc];
if (iproc > 0 || period) wblkb += overlap[0];
if (iproc < nproc - 1 || period) wblkb += overlap[1];
rsize[ip] = 1;
rtype[ip] = datatype;
for (idim = 0; idim < ndims; idim++) {
if (dim_source - 1 == idim) {
count = wblka;
}
else if (dim_dest - 1 == idim) {
count = wblkb;
}
else {
count = blklen[idim];
}
rdims[ip][idim] = count;
rsize[ip] *= count;
}
rbuf_size += rsize[ip];
begb = ifstb;
if (ip > 0 || period) begb -= overlap[0];
strd = 1;
for (idim = 0; idim < dim_dest - 1; idim++) {
if (idim == dim_source - 1) {
strd *= blklen[idim];
}
else {
strd *= stride[idim];
}
}
sbeg[ip] = strd*(begb - 1);
rbeg[ip] = 0;
if (iproc > 0 || period) rbeg[ip] -= overlap[0] * strd;
strd = 1;
for (idim = 0; idim < dim_source - 1; idim++) {
if (idim == dim_dest - 1) {
strd *= blklen[idim];
}
else {
strd *= stride[idim];
}
}
rbeg[ip] += strd*(ifsta - 1);
ifsta += lblks_source[ip];
ifstb += lblks_dest[ip];
}
//
// Define message buffers
//
transp->mem_sbuf = (void*)get_plbuf(sbuf_size * fsize * sizeof(char),
&transp->buf_id[7]);
transp->mem_rbuf = (void*)get_plbuf(rbuf_size * fsize * sizeof(char),
&transp->buf_id[8]);
ssh = 0;
rsh = 0;
for (ip = 0; ip < nproc; ip++) {
sbuf[ip] = transp->sbuf[ip] = (void*)((char*)transp->mem_sbuf + ssh);
rbuf[ip] = transp->rbuf[ip] = (void*)((char*)transp->mem_rbuf + rsh);
ssh += ssize[ip] * fsize;
rsh += rsize[ip] * fsize;
}
transp->nproc = nproc;
transp->iproc = iproc;
transp->comm = comm;
transp->fsize = fsize;
transp->ndims = ndims;
memcpy(transp->sstride, stride, ndims * sizeof(int));
memcpy(transp->rstride, stride, ndims * sizeof(int));
// modifying send-recv strides to take into account arr_src != arr_dest
for (idim = 0; idim < ndims; idim++) {
if (idim == dim_source - 1) {
transp->sstride[idim] = blklen[idim];
}
if (idim == dim_dest - 1) {
transp->rstride[idim] = blklen[idim];
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_mp_start ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest;
Transposition *transp;
{
char *arr_source_ch = (char *) arr_source;
char *arr_dest_ch = (char *) arr_dest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
//
// Start the communication
//
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
transp->req[ip] = MPI_REQUEST_NULL;
transp->req[transp->nproc + ip] = MPI_REQUEST_NULL;
}
else {
MPI_Irecv(transp->rbuf[ip], transp->rsize[ip], transp->rtype[ip],
ip, 0, transp->comm,
&transp->req[transp->nproc + ip]);
copy_to_buffer((char*)transp->sbuf[ip], src,
transp->ndims, transp->sdims[ip], transp->sstride, transp->fsize);
MPI_Isend(transp->sbuf[ip], transp->ssize[ip], transp->stype[ip],
ip, 0, transp->comm,
&transp->req[ip]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_mp_end ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest;
Transposition *transp;
{
char *arr_source_ch = (char *)arr_source;
char *arr_dest_ch = (char *)arr_dest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
MPI_Waitall(transp->nproc, &transp->req[transp->nproc], MPI_STATUSES_IGNORE);
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
continue;
}
else {
copy_from_buffer(dest, (char*)transp->rbuf[ip],
transp->ndims, transp->rdims[ip], transp->rstride, transp->fsize);
}
}
MPI_Waitall(transp->nproc, transp->req, MPI_STATUSES_IGNORE);
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_mp_free ( transp )
Transposition *transp;
{
int ip;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
free_plbuf(transp->stype, transp->buf_id[0]);
free_plbuf(transp->sbeg, transp->buf_id[1]);
free_plbuf(transp->req, transp->buf_id[2]);
free_plbuf(transp->sdims, transp->buf_id[3]);
free_plbuf(transp->ssize, transp->buf_id[4]);
free_plbuf(transp->sbuf, transp->buf_id[5]);
free_plbuf(transp->mem_dims, transp->buf_id[6]);
free_plbuf(transp->mem_sbuf, transp->buf_id[7]);
free_plbuf(transp->mem_rbuf, transp->buf_id[8]);
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
Transposition transp;
int ierr;
if (ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, &transp) != 0)
{
return ierr;
}
P_Transpose_mp_start(arr_source, arr_dest, &transp);
P_Transpose_mp_end(arr_source, arr_dest, &transp);
P_Transpose_mp_free(&transp);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp_init ( ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
Transposition *transp;
{
char *arr_source_ch = (char *)arr_source;
char *arr_dest_ch = (char *)arr_dest;
int ip;
char *src, *dest;
int ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period, transp);
if (ierr != 0) return ierr;
transp->psrc = arr_source;
transp->pdest = arr_dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
//
// Setup the communication
//
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
continue;
}
else {
MPI_Recv_init(transp->rbuf[ip], transp->rsize[ip], transp->rtype[ip],
ip, 0, transp->comm,
&transp->req[transp->nproc + ip]);
MPI_Send_init(transp->sbuf[ip], transp->ssize[ip], transp->stype[ip],
ip, 0, transp->comm,
&transp->req[ip]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp_start ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest;
Transposition *transp;
{
char *arr_source_ch = (char *) arr_source;
char *arr_dest_ch = (char *) arr_dest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
//
// Start the communication
//
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
transp->req[ip] = MPI_REQUEST_NULL;
transp->req[transp->nproc + ip] = MPI_REQUEST_NULL;
}
else {
MPI_Start(&transp->req[transp->nproc + ip]);
copy_to_buffer((char*)transp->sbuf[ip], src,
transp->ndims, transp->sdims[ip], transp->sstride, transp->fsize);
MPI_Start(&transp->req[ip]);
}
}
return 0;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp_end ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest;
Transposition *transp;
{
return P_Transpose_mp_end(arr_source, arr_dest, transp);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp_free ( transp )
Transposition *transp;
{
char *arr_source_ch = (char *)transp->psrc;
char *arr_dest_ch = (char *)transp->pdest;
int ip;
char *src, *dest;
if (transp->nproc == 0) { return 0; }
if (transp->iproc == MPI_UNDEFINED) { return 0; }
// we have to free persistent requests first...
for (ip = 0; ip < transp->nproc; ip++) {
dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize;
src = arr_source_ch + transp->sbeg[ip] * transp->fsize;
if ((dest == src) && (transp->iproc == ip)) {
continue;
}
else {
MPI_Request_free(&transp->req[transp->nproc + ip]);
MPI_Request_free(&transp->req[ip]);
}
}
return P_Transpose_mp_free(transp);
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int PST_Transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
Transposition transp;
int ierr;
if (ierr = PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, &transp) != 0)
{
return ierr;
}
PST_Transpose_mp_start(arr_source, arr_dest, &transp);
PST_Transpose_mp_end(arr_source, arr_dest, &transp);
PST_Transpose_mp_free(&transp);
return 0;
}
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
int P_Transpose_opt_init ( ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, exch_mode )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
Transposition *transp;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_Transpose_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_Transpose_mp_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, transp);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_Transpose_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, transp);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_opt_start ( arr_source, arr_dest, transp, exch_mode )
void *arr_source, *arr_dest;
Transposition *transp;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_Transpose_start(arr_source, arr_dest, transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_Transpose_mp_start(arr_source, arr_dest, transp);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_Transpose_start(transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_Transpose_mp_start(arr_source, arr_dest, transp);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_opt_end ( arr_source, arr_dest, transp, exch_mode )
void *arr_source, *arr_dest;
Transposition *transp;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_Transpose_end(transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_Transpose_mp_end(arr_source, arr_dest, transp);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_Transpose_end(transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_Transpose_mp_end(arr_source, arr_dest, transp);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_opt_free ( transp, exch_mode )
Transposition *transp;
int exch_mode;
{
if (exch_mode == IS_MPI_TYPED) {
return P_Transpose_free(transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
return P_Transpose_mp_free(transp);
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
return PST_Transpose_free(transp);
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
return PST_Transpose_mp_free(transp);
}
return 999;
}
// -------------------------------------------------------------------------- //
// -------------------------------------------------------------------------- //
int P_Transpose_opt ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, exch_mode )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
int exch_mode;
{
Transposition transp;
int ierr;
if (exch_mode == IS_MPI_TYPED) {
if (ierr = P_Transpose_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, &transp) != 0)
{
return ierr;
}
P_Transpose_start(arr_source, arr_dest, &transp);
P_Transpose_end(&transp);
P_Transpose_free(&transp);
return 0;
}
if (exch_mode == IS_MPI_MANUAL_PACK) {
if (ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, &transp) != 0)
{
return ierr;
}
P_Transpose_mp_start(arr_source, arr_dest, &transp);
P_Transpose_mp_end(arr_source, arr_dest, &transp);
P_Transpose_mp_free(&transp);
return 0;
}
if (exch_mode == IS_MPI_TYPED_PERSISTENT) {
if (ierr = PST_Transpose_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, &transp) != 0)
{
return ierr;
}
PST_Transpose_start(&transp);
PST_Transpose_end(&transp);
PST_Transpose_free(&transp);
return 0;
}
if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) {
if (ierr = PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source,
arr_dest, dim_dest, lblks_dest, stride, blklen, overlap,
datatype, comm, period, &transp) != 0)
{
return ierr;
}
PST_Transpose_mp_start(arr_source, arr_dest, &transp);
PST_Transpose_mp_end(arr_source, arr_dest, &transp);
PST_Transpose_mp_free(&transp);
return 0;
}
return 999;
}
// -------------------------------------------------------------------------- //
\ No newline at end of file
#include <stdlib.h> #include <stdlib.h>
#include "parlib.h" #include "parlib.h"
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE #ifdef FORTRANUNDERSCORE
void p_transpose_init_ ( ndims, dim_source, lblks_source, dim_dest, void p_transpose_init_ ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period, lblks_dest, stride, blklen, overlap, datatype, comm, period,
...@@ -14,6 +16,7 @@ void p_transpose_init ( ndims, dim_source, lblks_source, dim_dest, ...@@ -14,6 +16,7 @@ void p_transpose_init ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period, lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr ) transp, ierr )
#endif #endif
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint **transp, *ierr; MPI_Fint **transp, *ierr;
...@@ -32,6 +35,7 @@ void p_transpose_start__ ( arr_source, arr_dest, transp, ierr ) ...@@ -32,6 +35,7 @@ void p_transpose_start__ ( arr_source, arr_dest, transp, ierr )
#else #else
void p_transpose_start ( arr_source, arr_dest, transp, ierr ) void p_transpose_start ( arr_source, arr_dest, transp, ierr )
#endif #endif
void *arr_source, *arr_dest; void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr; MPI_Fint **transp, *ierr;
{ {
...@@ -46,6 +50,7 @@ void p_transpose_end__ ( transp, ierr ) ...@@ -46,6 +50,7 @@ void p_transpose_end__ ( transp, ierr )
#else #else
void p_transpose_end ( transp, ierr ) void p_transpose_end ( transp, ierr )
#endif #endif
MPI_Fint **transp, *ierr; MPI_Fint **transp, *ierr;
{ {
*ierr = P_Transpose_end((Transposition *)*transp); *ierr = P_Transpose_end((Transposition *)*transp);
...@@ -58,6 +63,7 @@ void p_transpose_free__ ( transp, ierr ) ...@@ -58,6 +63,7 @@ void p_transpose_free__ ( transp, ierr )
#else #else
void p_transpose_free ( transp, ierr ) void p_transpose_free ( transp, ierr )
#endif #endif
MPI_Fint **transp, *ierr; MPI_Fint **transp, *ierr;
{ {
*ierr = P_Transpose_free((Transposition *)*transp); *ierr = P_Transpose_free((Transposition *)*transp);
...@@ -77,6 +83,7 @@ void p_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, ...@@ -77,6 +83,7 @@ void p_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr ) period, ierr )
#endif #endif
void *arr_source, *arr_dest; void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
...@@ -86,3 +93,504 @@ void p_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, ...@@ -86,3 +93,504 @@ void p_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period); MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period);
} }
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void pst_transpose_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#else
void pst_transpose_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint **transp, *ierr;
{
*transp = (MPI_Fint *)malloc(sizeof(Transposition));
*ierr = PST_Transpose_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest,
*dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period,
(Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_start_ ( transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_start__ ( transp, ierr )
#else
void pst_transpose_start ( transp, ierr )
#endif
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_start((Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_end_ ( transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_end__ ( transp, ierr )
#else
void pst_transpose_end ( transp, ierr )
#endif
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_end((Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_free_ ( transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_free__ ( transp, ierr )
#else
void pst_transpose_free ( transp, ierr )
#endif
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_free((Transposition *)*transp);
free(*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose__ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#else
void pst_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint *ierr;
{
*ierr = PST_Transpose(*ndims, arr_source, *dim_source, lblks_source,
arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period);
}
// -------------------------------------------------------------------------- //
// v.1.7 - manual packing //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void p_transpose_mp_init_ ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_mp_init__ ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#else
void p_transpose_mp_init ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#endif
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint **transp, *ierr;
{
*transp = (MPI_Fint *)malloc(sizeof(Transposition));
*ierr = P_Transpose_mp_init(*ndims, *dim_source, lblks_source,
*dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period,
(Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_mp_start_ ( arr_source, arr_dest, transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_mp_start__ ( arr_source, arr_dest, transp, ierr )
#else
void p_transpose_mp_start ( arr_source, arr_dest, transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr;
{
*ierr = P_Transpose_mp_start(arr_source, arr_dest,
(Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_mp_end_ ( arr_source, arr_dest, transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_mp_end__ ( arr_source, arr_dest, transp, ierr )
#else
void p_transpose_mp_end ( arr_source, arr_dest, transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr;
{
*ierr = P_Transpose_mp_end(arr_source, arr_dest, (Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_mp_free_ ( transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_mp_free__ ( transp, ierr )
#else
void p_transpose_mp_free ( transp, ierr )
#endif
MPI_Fint **transp, *ierr;
{
*ierr = P_Transpose_mp_free((Transposition *)*transp);
free(*transp);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_mp_ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_mp__ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#else
void p_transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint *ierr;
{
*ierr = P_Transpose_mp(*ndims, arr_source, *dim_source, lblks_source,
arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period);
}
// -------------------------------------------------------------------------- //
// v.1.95 - persistent exchanges for manual packing //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void pst_transpose_mp_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_mp_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#else
void pst_transpose_mp_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint **transp, *ierr;
{
*transp = (MPI_Fint *)malloc(sizeof(Transposition));
*ierr = PST_Transpose_mp_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest,
*dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period,
(Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_mp_start_ ( arr_source, arr_dest, transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_mp_start__ ( arr_source, arr_dest, transp, ierr )
#else
void pst_transpose_mp_start ( arr_source, arr_dest, transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_mp_start(arr_source, arr_dest,
(Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_mp_end_ ( arr_source, arr_dest, transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_mp_end__ ( arr_source, arr_dest, transp, ierr )
#else
void pst_transpose_mp_end ( arr_source, arr_dest, transp, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_mp_end(arr_source, arr_dest, (Transposition *)*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_mp_free_ ( transp, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_mp_free__ ( transp, ierr )
#else
void pst_transpose_mp_free ( transp, ierr )
#endif
MPI_Fint **transp, *ierr;
{
*ierr = PST_Transpose_mp_free((Transposition *)*transp);
free(*transp);
}
#ifdef FORTRANUNDERSCORE
void pst_transpose_mp_ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void pst_transpose_mp__ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#else
void pst_transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint *ierr;
{
*ierr = PST_Transpose_mp(*ndims, arr_source, *dim_source, lblks_source,
arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period);
}
// -------------------------------------------------------------------------- //
// v.1.95 - choice subroutines //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void p_transpose_opt_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_opt_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, exch_mode, ierr)
#else
void p_transpose_opt_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp, exch_mode, ierr)
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint **transp, *ierr, *exch_mode;
{
*transp = (MPI_Fint *)malloc(sizeof(Transposition));
*ierr = P_Transpose_opt_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest,
*dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period,
(Transposition *)*transp, *exch_mode);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_opt_start_ ( arr_source, arr_dest, transp, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_opt_start__ ( arr_source, arr_dest, transp, exch_mode, ierr )
#else
void p_transpose_opt_start ( arr_source, arr_dest, transp, exch_mode, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr, *exch_mode;
{
*ierr = P_Transpose_opt_start(arr_source, arr_dest,
(Transposition *)*transp, *exch_mode);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_opt_end_ ( arr_source, arr_dest, transp, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_opt_end__ ( arr_source, arr_dest, transp, exch_mode, ierr )
#else
void p_transpose_opt_end ( arr_source, arr_dest, transp, exch_mode, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint **transp, *ierr, *exch_mode;
{
*ierr = P_Transpose_opt_end(arr_source, arr_dest,
(Transposition *)*transp, *exch_mode);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_opt_free_ ( transp, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_opt_free__ ( transp, exch_mode, ierr )
#else
void p_transpose_opt_free ( transp, exch_mode, ierr )
#endif
MPI_Fint **transp, *ierr, *exch_mode;
{
*ierr = P_Transpose_opt_free((Transposition *)*transp, *exch_mode);
free(*transp);
}
#ifdef FORTRANUNDERSCORE
void p_transpose_opt_ ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, exch_mode, ierr)
#elif defined(FORTRANDOUBLEUNDERSCORE)
void p_transpose_opt__ (ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, exch_mode, ierr)
#else
void p_transpose_opt ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, exch_mode, ierr)
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint *ierr, *exch_mode;
{
*ierr = P_Transpose_opt(*ndims, arr_source, *dim_source, lblks_source,
arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, *exch_mode);
}
// -------------------------------------------------------------------------- //
// v.2.0 - regular communications [removed only on correct program exit] //
// -------------------------------------------------------------------------- //
#ifdef FORTRANUNDERSCORE
void reg_transpose_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
exch_id, exch_mode, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void reg_transpose__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
exch_id, exch_mode, ierr)
#else
void reg_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
exch_id, exch_mode, ierr)
#endif
void *arr_source, *arr_dest;
MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest;
MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period;
MPI_Fint *ierr, *exch_id, *exch_mode;
{
Transposition *transp;
transp = (Transposition *)malloc(sizeof(Transposition));
*ierr = P_Transpose_opt_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest,
*dim_dest, lblks_dest, stride, blklen, overlap,
MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period,
transp, *exch_mode);
if ((int)*ierr != 0) return;
*exch_id = save_transp_handle(transp, (int)*exch_mode);
}
#ifdef FORTRANUNDERSCORE
void start_transpose_ ( arr_source, arr_dest, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void start_transpose__ ( arr_source, arr_dest, exch_id, ierr )
#else
void start_transpose ( arr_source, arr_dest, exch_id, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
Transposition *transp;
get_transp_handle(&transp, &exch_mode, (int)*exch_id);
*ierr = P_Transpose_opt_start(arr_source, arr_dest, transp, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void end_transpose_ ( arr_source, arr_dest, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void end_transpose__ ( arr_source, arr_dest, exch_id, ierr )
#else
void end_transpose ( arr_source, arr_dest, exch_id, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
Transposition *transp;
get_transp_handle(&transp, &exch_mode, (int)*exch_id);
*ierr = P_Transpose_opt_end(arr_source, arr_dest, transp, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void run_transpose_ ( arr_source, arr_dest, exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void run_transpose__ ( arr_source, arr_dest, exch_id, ierr )
#else
void run_transpose ( arr_source, arr_dest, exch_id, ierr )
#endif
void *arr_source, *arr_dest;
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
Transposition *transp;
get_transp_handle(&transp, &exch_mode, (int)*exch_id);
*ierr = P_Transpose_opt_start(arr_source, arr_dest, transp, exch_mode);
if ((int)*ierr != 0) return;
*ierr = P_Transpose_opt_end(arr_source, arr_dest, transp, exch_mode);
}
#ifdef FORTRANUNDERSCORE
void unreg_transpose_ ( exch_id, ierr )
#elif defined(FORTRANDOUBLEUNDERSCORE)
void unreg_transpose__ ( exch_id, ierr )
#else
void unreg_transpose ( exch_id, ierr )
#endif
MPI_Fint *ierr, *exch_id;
{
int exch_mode;
Transposition *transp;
get_transp_handle(&transp, &exch_mode, (int)*exch_id);
*ierr = P_Transpose_opt_free(transp, exch_mode);
if ((int)*ierr != 0) return;
free(transp);
remove_transp_handle((int)*exch_id);
}
// -------------------------------------------------------------------------- //
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment