diff --git a/ParLib.src/Makefile b/ParLib.src/Makefile index 71c1bfb5dbc3f33c2c15df2b04000f2e15841d6e..c81ae8c1ba1da7017c8c85e3a3bc6badec10d390 100644 --- a/ParLib.src/Makefile +++ b/ParLib.src/Makefile @@ -1,81 +1,83 @@ -.SUFFIXES: .c .o .a - -AR = ar crl -RANLIB = ranlib - -INSTALLDIR = $(HOME)/ParLib.v1.1 -#INSTALLDIR = $(HOME) - - -default: all -all: setvars libparlib.a libparlibf.a - -# -# For INM's HP-cluster -# - CC = mpicc - LIBS = -lmpich -# LIBPATH = -L/opt/mpich-gm/lib -# INCPATH = -I/opt/mpich-gm/include - DEFINES = -DFORTRANUNDERSCORE - -setvars: - -libparlib.a: bexchange.o transpose.o - $(AR) $@ $? - $(RANLIB) $@ - -libparlibf.a: parlibf.o bexchangef.o transposef.o - $(AR) $@ $? - $(RANLIB) $@ - -.c.o: - $(CC) $(INCPATH) -c $(DEFINES) $< - -clean: - rm -f *.o *.a - -rebuild: clean all - -install: libparlib.a libparlibf.a - @if !(test -d $(INSTALLDIR)) ; then \ - echo Creating directory $(INSTALLDIR)...; \ - mkdir $(INSTALLDIR) ; \ - fi - @if !(test -d $(INSTALLDIR)/lib) ; then \ - echo Creating directory $(INSTALLDIR)/lib...; \ - mkdir $(INSTALLDIR)/lib ; \ - fi - cp $? $(INSTALLDIR)/lib - @if !(test -d $(INSTALLDIR)/include) ; then \ - echo Creating directory $(INSTALLDIR)/include...; \ - mkdir $(INSTALLDIR)/include ; \ - fi - cp -r parlib.h parlibf.h $(INSTALLDIR)/include - @if !(test -d $(INSTALLDIR)/man) ; then \ - echo Creating directory $(INSTALLDIR)/man...; \ - mkdir $(INSTALLDIR)/man ; \ - fi - @if !(test -d $(INSTALLDIR)/man/man3) ; then \ - echo Creating directory $(INSTALLDIR)/man/man3...; \ - mkdir $(INSTALLDIR)/man/man3 ; \ - fi - cp -r man/* $(INSTALLDIR)/man/man3 - ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ - $(INSTALLDIR)/man/man3/P_BExchange_start.3 - ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ - $(INSTALLDIR)/man/man3/P_BExchange_end.3 - ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ - $(INSTALLDIR)/man/man3/P_BExchange_free.3 - ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ - $(INSTALLDIR)/man/man3/P_Transpose_start.3 - ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ - $(INSTALLDIR)/man/man3/P_Transpose_end.3 - ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ - $(INSTALLDIR)/man/man3/P_Transpose_free.3 - -parlibf.o: parlib.h -bexchange.o: parlib.h -bexchangef.o: parlib.h -transpose.o: parlib.h -transposef.o: parlib.h +.SUFFIXES: .c .o .a + +AR = ar crl +RANLIB = ranlib + +INSTALLDIR = $(HOME)/ParLib.v2.1 +#INSTALLDIR = $(HOME) + + +default: all +all: setvars libparlib.a libparlibf.a + +# +# For INM's HP-cluster +# + CC = mpicc -restrict -no-ansi-alias + LIBS = -lmpich +# LIBPATH = -L/opt/mpich-gm/lib +# INCPATH = -I/opt/mpich-gm/include + DEFINES = -DFORTRANUNDERSCORE + +setvars: + +libparlib.a: parlib.o plutils.o bexchange.o transpose.o + $(AR) $@ $? + $(RANLIB) $@ + +libparlibf.a: parlibf.o bexchangef.o transposef.o + $(AR) $@ $? + $(RANLIB) $@ + +.c.o: + $(CC) $(INCPATH) -c $(DEFINES) $< + +clean: + rm -f *.o *.a + +rebuild: clean all + +install: libparlib.a libparlibf.a + @if !(test -d $(INSTALLDIR)) ; then \ + echo Creating directory $(INSTALLDIR)...; \ + mkdir $(INSTALLDIR) ; \ + fi + @if !(test -d $(INSTALLDIR)/lib) ; then \ + echo Creating directory $(INSTALLDIR)/lib...; \ + mkdir $(INSTALLDIR)/lib ; \ + fi + cp $? $(INSTALLDIR)/lib + @if !(test -d $(INSTALLDIR)/include) ; then \ + echo Creating directory $(INSTALLDIR)/include...; \ + mkdir $(INSTALLDIR)/include ; \ + fi + cp -r parlib.h parlibf.h $(INSTALLDIR)/include + @if !(test -d $(INSTALLDIR)/man) ; then \ + echo Creating directory $(INSTALLDIR)/man...; \ + mkdir $(INSTALLDIR)/man ; \ + fi + @if !(test -d $(INSTALLDIR)/man/man3) ; then \ + echo Creating directory $(INSTALLDIR)/man/man3...; \ + mkdir $(INSTALLDIR)/man/man3 ; \ + fi + cp -r man/* $(INSTALLDIR)/man/man3 + ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ + $(INSTALLDIR)/man/man3/P_BExchange_start.3 + ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ + $(INSTALLDIR)/man/man3/P_BExchange_end.3 + ln -s $(INSTALLDIR)/man/man3/P_BExchange_init.3 \ + $(INSTALLDIR)/man/man3/P_BExchange_free.3 + ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ + $(INSTALLDIR)/man/man3/P_Transpose_start.3 + ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ + $(INSTALLDIR)/man/man3/P_Transpose_end.3 + ln -s $(INSTALLDIR)/man/man3/P_Transpose_init.3 \ + $(INSTALLDIR)/man/man3/P_Transpose_free.3 + +plutils.o: plutils.h +parlib.o: parlib.h plutils.h +parlibf.o: parlib.h plutils.h +bexchange.o: parlib.h plutils.h +bexchangef.o: parlib.h plutils.h +transpose.o: parlib.h plutils.h +transposef.o: parlib.h plutils.h diff --git a/ParLib.src/bexchange.c b/ParLib.src/bexchange.c index 4daf989adf5459c29d2503f4fcf51c10a258127a..52e66f75a38440bcb7a6ec47bd8068f205417308 100644 --- a/ParLib.src/bexchange.c +++ b/ParLib.src/bexchange.c @@ -1,209 +1,914 @@ -#include "parlib.h" - -/* - * Error codes: - * 0 - success - * 1 - nonpositive number of dimensions - * 2 - wrong communicated dimension - * 3 - negative boundary width - * 4 - nonpositive dimension - * 5 - boundary width exceeds the array block length - */ -int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, - comm, period, bexchange ) - int ndims, *stride, *blklen, bdim, overlap[2], period; - MPI_Datatype datatype; - MPI_Comm comm; - BExchange *bexchange; -{ - int nproc, iproc, direct, idim, sendproc[2], recvproc[2]; - int count, strd, sbind[2], rbind[2], send[2], recv[2]; - MPI_Aint fsize; - MPI_Datatype oldtype, btype[2]; - MPI_Request sreq[2], rreq[2]; - -/* - * Check input parameters - */ - if ( ndims < 1 ) {return 1;} - if ( bdim < 1 || bdim > ndims ) {return 2;} - if ( overlap[0] == 0 && overlap[1] == 0 ) {return 0;} /* success */ - for ( idim = 0; idim < ndims; idim++ ) { - if ( stride[idim] <= 0 ) {return 4;} - } - for ( direct = 0; direct < 2; direct++ ) { - if ( overlap[direct] < 0 ) {return 3;} - if ( overlap[direct] > blklen[bdim-1] ) {return 5;} - } -/* - * Define the number of processors in the group and the rank - */ - MPI_Comm_size ( comm, &nproc ); - if ( nproc == 0 ) {return 0;} /* success */ - MPI_Comm_rank ( comm, &iproc ); - if ( iproc == MPI_UNDEFINED ) {return 0;} /* the process does not belong to the group */ - sendproc[0] = ( iproc == 0 ? nproc-1 : iproc-1 ); - recvproc[0] = ( iproc == nproc-1 ? 0 : iproc+1 ); - sendproc[1] = recvproc[0]; - recvproc[1] = sendproc[0]; - send[0] = iproc > 0 || period; - recv[0] = iproc < nproc-1 || period; - send[1] = recv[0]; - recv[1] = send[0]; - MPI_Type_extent ( datatype, &fsize ); -/* - * Define data types for the boundaries - */ - for ( direct = 0; direct < 2; direct++ ) { - if ( overlap[direct] > 0 ) { - oldtype = datatype; - strd = 1; - for ( idim = 0; idim < ndims; idim++ ) { - if ( idim+1 == bdim ) { - count = overlap[direct]; - } else { - count = blklen[idim]; - } - MPI_Type_hvector ( count, 1, strd * fsize, oldtype, - &btype[direct] ); - if ( idim > 0 ) { - MPI_Type_free ( &oldtype ); - } - oldtype = btype[direct]; - strd = strd * stride[idim]; - } - MPI_Type_commit ( &btype[direct] ); - } - } -/* - * Determine the begining of boundaries - */ - strd = 1; - for ( idim = 0; idim < bdim - 1; idim++ ) { - strd = strd * stride[idim]; - } - sbind[0] = 0; - rbind[0] = blklen[bdim-1]*strd; - sbind[1] = (blklen[bdim-1]-overlap[1])*strd; - rbind[1] = -overlap[1]*strd; - - for ( direct = 0; direct < 2; direct++ ) { - bexchange->overlap[direct] = overlap[direct]; - bexchange->send[direct] = send[direct]; - bexchange->recv[direct] = recv[direct]; - bexchange->btype[direct] = btype[direct]; - bexchange->sendproc[direct] = sendproc[direct]; - bexchange->recvproc[direct] = recvproc[direct]; - bexchange->sbind[direct] = sbind[direct]; - bexchange->rbind[direct] = rbind[direct]; - } - bexchange->comm = comm; - bexchange->fsize = fsize; - return 0; -} - -int P_BExchange_start ( a, bexchange ) - void *a; - BExchange *bexchange; -{ - int direct, overlap[2], send[2], recv[2], btype[2]; - int sendproc[2], recvproc[2], sbind[2], rbind[2]; - MPI_Comm comm; - MPI_Request sreq[2], rreq[2]; - MPI_Aint fsize; - char *ach = (char *) a; - - for ( direct = 0; direct < 2; direct++ ) { - overlap[direct] = bexchange->overlap[direct]; - send[direct] = bexchange->send[direct]; - recv[direct] = bexchange->recv[direct]; - btype[direct] = bexchange->btype[direct]; - sendproc[direct] = bexchange->sendproc[direct]; - recvproc[direct] = bexchange->recvproc[direct]; - sbind[direct] = bexchange->sbind[direct]; - rbind[direct] = bexchange->rbind[direct]; - } - comm = bexchange->comm; - fsize = bexchange->fsize; - - for ( direct = 0; direct < 2; direct++ ) { - if ( overlap[direct] > 0 ) { - if ( send[direct] ) { - MPI_Isend ( ach+sbind[direct]*fsize, 1, btype[direct], - sendproc[direct], 0, comm, &sreq[direct] ); - } - if ( recv[direct] ) { - MPI_Irecv ( ach+rbind[direct]*fsize, 1, btype[direct], - recvproc[direct], 0, comm, &rreq[direct] ); - } - } - } - for ( direct = 0; direct < 2; direct++ ) { - bexchange->sreq[direct]=sreq[direct]; - bexchange->rreq[direct]=rreq[direct]; - } - return 0; -} - -int P_BExchange_end ( bexchange ) - BExchange *bexchange; -{ - MPI_Status status; - int direct, overlap[2], send[2], recv[2]; - MPI_Request sreq[2], rreq[2]; - - for ( direct = 0; direct < 2; direct++ ) { - overlap[direct] = bexchange->overlap[direct]; - send[direct] = bexchange->send[direct]; - recv[direct] = bexchange->recv[direct]; - sreq[direct] = bexchange->sreq[direct]; - rreq[direct] = bexchange->rreq[direct]; - } - - for ( direct = 0; direct < 2; direct++ ) { - if ( overlap[direct] > 0 ) { - if ( send[direct] ) { - MPI_Wait ( &sreq[direct], &status ); - } - if ( recv[direct] ) { - MPI_Wait ( &rreq[direct], &status ); - } - } - } - return 0; -} - -int P_BExchange_free ( bexchange ) - BExchange *bexchange; -{ - int direct, overlap[2]; - MPI_Datatype btype[2]; - for ( direct = 0; direct < 2; direct++ ) { - overlap[direct] = bexchange->overlap[direct]; - btype[direct] = bexchange->btype[direct]; - } - for ( direct = 0; direct < 2; direct++ ) { - if ( overlap[direct] > 0 ) { - MPI_Type_free ( &btype[direct] ); - } - } - return 0; -} - -int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype, - comm, period ) - void *a; - MPI_Datatype datatype; - int ndims, *stride, *blklen, bdim, overlap[2]; - int period; - MPI_Comm comm; -{ - BExchange bexchange; - int ierr; - if ( ierr = P_BExchange_init ( ndims, stride, blklen, bdim, overlap, - datatype, comm, period, &bexchange ) != 0 ) { return ierr; } - P_BExchange_start ( a, &bexchange ); - P_BExchange_end ( &bexchange ); - P_BExchange_free ( &bexchange ); - return 0; -} +#include "parlib.h" +#include "plutils.h" + +#include <stdlib.h> +#include <string.h> + +/* + * Error codes: + * 0 - success + * 1 - nonpositive number of dimensions + * 2 - wrong communicated dimension + * 3 - negative boundary width + * 4 - nonpositive dimension + * 5 - boundary width exceeds the array block length + * 6 - number of dimensions exceeds maximum value (only for MP - manual packing) + * 999 - incorrect exchange mode (only for generic calls) + */ + +int P_BExchange_init ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange ) + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + BExchange *bexchange; +{ + int nproc, iproc, direct, idim, sendproc[2], recvproc[2]; + int count, strd, sbind[2], rbind[2], send[2], recv[2]; + MPI_Aint fsize, lb; + MPI_Datatype oldtype, btype[2]; + + +// Setting degenerate-success cases conditions for consistency +// including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED + for (direct = 0; direct < 2; direct++) { + bexchange->overlap[direct] = 0; + } + +// +// Check input parameters +// + if (ndims < 1) { return 1; } + if (bdim < 1 || bdim > ndims) { return 2; } + if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */ + for (idim = 0; idim < ndims; idim++) { + if (stride[idim] <= 0) { return 4; } + } + for (direct = 0; direct < 2; direct++) { + if (overlap[direct] < 0) { return 3; } + if (overlap[direct] > blklen[bdim - 1]) { return 5; } + } + +// +// Define the number of processors in the group and the rank +// + if (comm == MPI_COMM_NULL) { return 0; } // empty communicator + + MPI_Comm_size(comm, &nproc); + if (nproc == 0) { return 0; } /* success */ + MPI_Comm_rank(comm, &iproc); + if (iproc == MPI_UNDEFINED) { return 0; } /* the process does not belong to the group */ + sendproc[0] = (iproc == 0 ? nproc - 1 : iproc - 1); + recvproc[0] = (iproc == nproc - 1 ? 0 : iproc + 1); + sendproc[1] = recvproc[0]; + recvproc[1] = sendproc[0]; + send[0] = iproc > 0 || period; + recv[0] = iproc < nproc - 1 || period; + send[1] = recv[0]; + recv[1] = send[0]; + MPI_Type_get_extent(datatype, &lb, &fsize); +// +// Define data types for the boundaries +// + // checking if one data type will suffice ... + const int ndsize = (overlap[0] == overlap[1]) ? 1 : 2; + + for (direct = 0; direct < ndsize; direct++) { + if (overlap[direct] > 0) { + + if (bdim == 1) { + count = overlap[direct]; + } + else { + count = blklen[0]; + } + MPI_Type_contiguous(count, datatype, &btype[direct]); + oldtype = btype[direct]; + strd = stride[0]; + + for (idim = 1; idim < ndims; idim++) { + if (bdim == idim + 1) { + count = overlap[direct]; + } + else { + count = blklen[idim]; + } + MPI_Type_create_hvector(count, 1, strd * fsize, oldtype, + &btype[direct]); + + MPI_Type_free(&oldtype); + oldtype = btype[direct]; + strd = strd * stride[idim]; + } + MPI_Type_commit(&btype[direct]); + } + } + if (ndsize == 1) btype[1] = btype[0]; // using same MPI-datatype + + +// +// Determine the begining of boundaries +// + strd = 1; + for (idim = 0; idim < bdim - 1; idim++) { + strd = strd * stride[idim]; + } + sbind[0] = 0; + rbind[0] = blklen[bdim - 1] * strd; + sbind[1] = (blklen[bdim - 1] - overlap[1])*strd; + rbind[1] = -overlap[1] * strd; + + for (direct = 0; direct < 2; direct++) { + bexchange->overlap[direct] = overlap[direct]; + bexchange->send[direct] = send[direct]; + bexchange->recv[direct] = recv[direct]; + bexchange->btype[direct] = btype[direct]; + bexchange->sendproc[direct] = sendproc[direct]; + bexchange->recvproc[direct] = recvproc[direct]; + bexchange->sbind[direct] = sbind[direct]; + bexchange->rbind[direct] = rbind[direct]; + } + bexchange->comm = comm; + bexchange->fsize = fsize; + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_start ( a, bexchange ) + void *a; + BExchange *bexchange; +{ + int direct; + char *ach = (char *) a; + + for (direct = 0; direct < 2; direct++) { + + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + MPI_Isend(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct], + bexchange->sendproc[direct], 0, bexchange->comm, + &bexchange->req[direct]); + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + } + if (bexchange->recv[direct]) { + MPI_Irecv(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct], + bexchange->recvproc[direct], 0, bexchange->comm, + &bexchange->req[2 + direct]); + } + else + { + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_end ( bexchange ) + BExchange *bexchange; +{ + MPI_Status status[4]; + + MPI_Waitall(4, bexchange->req, status); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_free ( bexchange ) + BExchange *bexchange; +{ + int direct; + + // checking if one data type sufficed at init ... + const int ndsize = (bexchange->overlap[0] == bexchange->overlap[1]) ? 1 : 2; + + for (direct = 0; direct < ndsize; direct++) { + if (bexchange->overlap[direct] > 0) { + MPI_Type_free(&bexchange->btype[direct]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period ) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + BExchange bexchange; + int ierr; + if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, + datatype, comm, period, &bexchange) != 0) { + return ierr; + } + P_BExchange_start(a, &bexchange); + P_BExchange_end(&bexchange); + P_BExchange_free(&bexchange); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.3 - persistent exchanges // +// -------------------------------------------------------------------------- // +int PST_BExchange_init ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange ) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + BExchange *bexchange; +{ + int direct; + char *ach = (char *)a; + + int ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + if (ierr != 0) return ierr; + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + MPI_Send_init(ach + bexchange->sbind[direct] * bexchange->fsize, 1, bexchange->btype[direct], + bexchange->sendproc[direct], 0, bexchange->comm, + &bexchange->req[direct]); + } + if (bexchange->recv[direct]) { + MPI_Recv_init(ach + bexchange->rbind[direct] * bexchange->fsize, 1, bexchange->btype[direct], + bexchange->recvproc[direct], 0, bexchange->comm, + &bexchange->req[2 + direct]); + } + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_start ( bexchange ) + BExchange *bexchange; +{ + int direct; + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + + if (bexchange->send[direct]) { + MPI_Start(&bexchange->req[direct]); + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + } + + if (bexchange->recv[direct]) { + MPI_Start(&bexchange->req[2 + direct]); + } + else + { + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_end(bexchange) + BExchange *bexchange; +{ + return P_BExchange_end(bexchange); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_free(bexchange) + BExchange *bexchange; +{ + int direct; + + P_BExchange_free(bexchange); + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + MPI_Request_free(&bexchange->req[direct]); + } + if (bexchange->recv[direct]) { + MPI_Request_free(&bexchange->req[2 + direct]); + } + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period ) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + BExchange bexchange; + int ierr; + if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, + datatype, comm, period, &bexchange) != 0) { + return ierr; + } + PST_BExchange_start(&bexchange); + PST_BExchange_end(&bexchange); + PST_BExchange_free(&bexchange); + return 0; +} +// -------------------------------------------------------------------------- // + +// v.1.4 - manual packing // +// -------------------------------------------------------------------------- // +int P_BExchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange ) + + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + BExchange *bexchange; +{ + int nproc, iproc, direct, idim, sendproc[2], recvproc[2]; + int count, strd, sbind[2], rbind[2], send[2], recv[2]; + void *sbuf[2], *rbuf[2]; + int buf_id[4]; + int mdims[2][MAX_PARLIB_MP_DIMS]; + int msize[2]; + MPI_Aint lb, fsize; + MPI_Datatype btype[2]; + +// Setting degenerate-success cases conditions for consistency +// including: overlap[]=0, nproc=0, iproc=MPI_UNDEFINED + for (direct = 0; direct < 2; direct++) { + bexchange->overlap[direct] = 0; + } + +// +// Check input parameters +// + if (ndims < 1) { return 1; } + if (bdim < 1 || bdim > ndims) { return 2; } + if (overlap[0] == 0 && overlap[1] == 0) { return 0; } /* success */ + for (idim = 0; idim < ndims; idim++) { + if (stride[idim] <= 0) { return 4; } + } + for (direct = 0; direct < 2; direct++) { + if (overlap[direct] < 0) { return 3; } + if (overlap[direct] > blklen[bdim - 1]) { return 5; } + } + if (ndims > MAX_PARLIB_MP_DIMS) { return 6; } +// +// Define the number of processors in the group and the rank +// + if (comm == MPI_COMM_NULL) { return 0; } // empty communicator + + MPI_Comm_size(comm, &nproc); + if (nproc == 0) { return 0; } /* success */ + MPI_Comm_rank(comm, &iproc); + if (iproc == MPI_UNDEFINED) { return 0; } /* the process does not belong to the group */ + sendproc[0] = (iproc == 0 ? nproc - 1 : iproc - 1); + recvproc[0] = (iproc == nproc - 1 ? 0 : iproc + 1); + sendproc[1] = recvproc[0]; + recvproc[1] = sendproc[0]; + send[0] = iproc > 0 || period; + recv[0] = iproc < nproc - 1 || period; + send[1] = recv[0]; + recv[1] = send[0]; + MPI_Type_get_extent(datatype, &lb, &fsize); +// +// Define data type, message sizes and buffers for the boundaries +// + + for (direct = 0; direct < 2; direct++) { + if (overlap[direct] > 0) { + + msize[direct] = 1; + btype[direct] = datatype; + + for (idim = 0; idim < ndims; idim++) { + if (bdim == idim + 1) { + count = overlap[direct]; + } + else { + count = blklen[idim]; + } + mdims[direct][idim] = count; + msize[direct] *= count; + } + + if (send[direct]) { + sbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char), + &buf_id[direct]); + } + else + { + sbuf[direct] = NULL; + buf_id[direct] = -1; + } + + if (recv[direct]) { + rbuf[direct] = get_plbuf(msize[direct] * fsize * sizeof(char), + &buf_id[2 + direct]); + } + else + { + rbuf[direct] = NULL; + buf_id[2 + direct] = -1; + } + } + else + { + msize[direct] = 0; + for (idim = 0; idim < ndims; idim++) { + mdims[direct][idim] = 0; + } + + sbuf[direct] = NULL; + rbuf[direct] = NULL; + + buf_id[direct] = -1; + buf_id[2 + direct] = -1; + } + } +// +// Determine the begining of boundaries +// + strd = 1; + for (idim = 0; idim < bdim - 1; idim++) { + strd = strd * stride[idim]; + } + sbind[0] = 0; + rbind[0] = blklen[bdim - 1] * strd; + sbind[1] = (blklen[bdim - 1] - overlap[1])*strd; + rbind[1] = -overlap[1] * strd; + + for (direct = 0; direct < 2; direct++) { + bexchange->overlap[direct] = overlap[direct]; + bexchange->send[direct] = send[direct]; + bexchange->recv[direct] = recv[direct]; + bexchange->btype[direct] = btype[direct]; + bexchange->sendproc[direct] = sendproc[direct]; + bexchange->recvproc[direct] = recvproc[direct]; + bexchange->sbind[direct] = sbind[direct]; + bexchange->rbind[direct] = rbind[direct]; + + bexchange->sbuf[direct] = sbuf[direct]; + bexchange->rbuf[direct] = rbuf[direct]; + bexchange->buf_id[direct] = buf_id[direct]; + bexchange->buf_id[2 + direct] = buf_id[2 + direct]; + + memcpy(bexchange->mdims[direct], mdims[direct], ndims * sizeof(int)); + bexchange->msize[direct] = msize[direct]; + } + bexchange->comm = comm; + bexchange->fsize = fsize; + + bexchange->ndims = ndims; + memcpy(bexchange->stride, stride, ndims * sizeof(int)); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_mp_start( a, bexchange ) + void *a; + BExchange *bexchange; +{ + int direct; + char *ach = (char *)a; + + for (direct = 0; direct < 2; direct++) { + + if (bexchange->overlap[direct] > 0) { + if (bexchange->recv[direct]) { + MPI_Irecv(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct], + bexchange->recvproc[direct], 0, bexchange->comm, + &bexchange->req[2 + direct]); + } + else + { + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + if (bexchange->send[direct]) { + copy_to_buffer( + (char*)bexchange->sbuf[direct], + ach + bexchange->sbind[direct] * bexchange->fsize, + bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize); + + MPI_Isend(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct], + bexchange->sendproc[direct], 0, bexchange->comm, + &bexchange->req[direct]); + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + } + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_mp_end ( a, bexchange ) + void *a; + BExchange *bexchange; +{ + MPI_Status status[4]; + + int direct; + char *ach = (char *)a; + + MPI_Waitall(2, &bexchange->req[2], &status[2]); + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->recv[direct]) { + copy_from_buffer(ach + bexchange->rbind[direct] * bexchange->fsize, + (char*)bexchange->rbuf[direct], + bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize); + } + } + } + + MPI_Waitall(2, bexchange->req, status); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_mp_free ( bexchange ) + BExchange *bexchange; +{ + int direct; + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + free_plbuf(bexchange->sbuf[direct], + bexchange->buf_id[direct]); + } + if (bexchange->recv[direct]) { + free_plbuf(bexchange->rbuf[direct], + bexchange->buf_id[2 + direct]); + } + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_mp(a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + BExchange bexchange; + int ierr; + if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, + datatype, comm, period, &bexchange) != 0) { + return ierr; + } + P_BExchange_mp_start(a, &bexchange); + P_BExchange_mp_end(a, &bexchange); + P_BExchange_mp_free(&bexchange); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.5 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +int PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange) + + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + BExchange *bexchange; +{ + int direct; + + int ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + if (ierr != 0) return ierr; + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + MPI_Send_init(bexchange->sbuf[direct], bexchange->msize[direct], bexchange->btype[direct], + bexchange->sendproc[direct], 0, bexchange->comm, + &bexchange->req[direct]); + } + if (bexchange->recv[direct]) { + MPI_Recv_init(bexchange->rbuf[direct], bexchange->msize[direct], bexchange->btype[direct], + bexchange->recvproc[direct], 0, bexchange->comm, + &bexchange->req[2 + direct]); + } + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_mp_start( a, bexchange ) + void *a; + BExchange *bexchange; +{ + int direct; + char *ach = (char *)a; + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + + if (bexchange->recv[direct]) { + MPI_Start(&bexchange->req[2 + direct]); + } + else + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + + if (bexchange->send[direct]) { + copy_to_buffer((char*)bexchange->sbuf[direct], + ach + bexchange->sbind[direct] * bexchange->fsize, + bexchange->ndims, bexchange->mdims[direct], bexchange->stride, bexchange->fsize); + + MPI_Start(&bexchange->req[direct]); + } + else + bexchange->req[direct] = MPI_REQUEST_NULL; + } + else + { + bexchange->req[direct] = MPI_REQUEST_NULL; + bexchange->req[2 + direct] = MPI_REQUEST_NULL; + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_mp_end( a, bexchange ) + void* a; + BExchange *bexchange; +{ + return P_BExchange_mp_end(a, bexchange); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_mp_free( bexchange ) + BExchange *bexchange; +{ + int direct; + + P_BExchange_mp_free(bexchange); + + for (direct = 0; direct < 2; direct++) { + if (bexchange->overlap[direct] > 0) { + if (bexchange->send[direct]) { + MPI_Request_free(&bexchange->req[direct]); + } + if (bexchange->recv[direct]) { + MPI_Request_free(&bexchange->req[2 + direct]); + } + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_BExchange_mp( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period ) + + void *a; + MPI_Datatype datatype; + int ndims, *stride, *blklen, bdim, overlap[2]; + int period; + MPI_Comm comm; +{ + BExchange bexchange; + int ierr; + if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, + datatype, comm, period, &bexchange) != 0) { + return ierr; + } + PST_BExchange_mp_start(a, &bexchange); + PST_BExchange_mp_end(a, &bexchange); + PST_BExchange_mp_free(&bexchange); + return 0; +} +// -------------------------------------------------------------------------- // + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +int P_BExchange_opt_init ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, exch_mode ) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + BExchange *bexchange; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_opt_start ( a, bexchange, exch_mode ) + void *a; + BExchange *bexchange; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_BExchange_start(a, bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_BExchange_mp_start(a, bexchange); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_BExchange_start(bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_BExchange_mp_start(a, bexchange); + } + + return 999; +} +// -------------------------------------------------------------------------- // + + +// -------------------------------------------------------------------------- // +int P_BExchange_opt_end ( a, bexchange, exch_mode ) + void *a; + BExchange *bexchange; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_BExchange_end(bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_BExchange_mp_end(a, bexchange); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_BExchange_end(bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_BExchange_mp_end(a, bexchange); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_opt_free ( bexchange, exch_mode ) + BExchange *bexchange; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_BExchange_free(bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_BExchange_mp_free(bexchange); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_BExchange_free(bexchange); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_BExchange_mp_free(bexchange); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_BExchange_opt ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_mode ) + + void *a; + int ndims, *stride, *blklen, bdim, overlap[2]; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + int exch_mode; +{ + BExchange bexchange; + int ierr; + + if (exch_mode == IS_MPI_TYPED) { + if (ierr = P_BExchange_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, &bexchange) != 0) { + return ierr; + } + + P_BExchange_start(a, &bexchange); + P_BExchange_end(&bexchange); + P_BExchange_free(&bexchange); + return 0; + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + if (ierr = P_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, &bexchange) != 0) { + return ierr; + } + + P_BExchange_mp_start(a, &bexchange); + P_BExchange_mp_end(a, &bexchange); + P_BExchange_mp_free(&bexchange); + return 0; + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + if (ierr = PST_BExchange_init(a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, &bexchange) != 0) { + return ierr; + } + + PST_BExchange_start(&bexchange); + PST_BExchange_end(&bexchange); + PST_BExchange_free(&bexchange); + return 0; + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + if (ierr = PST_BExchange_mp_init(ndims, stride, blklen, bdim, overlap, datatype, + comm, period, &bexchange) != 0) { + return ierr; + } + PST_BExchange_mp_start(a, &bexchange); + PST_BExchange_mp_end(a, &bexchange); + PST_BExchange_mp_free(&bexchange); + return 0; + } + + return 999; +} +// -------------------------------------------------------------------------- // \ No newline at end of file diff --git a/ParLib.src/bexchangef.c b/ParLib.src/bexchangef.c index 5acbf5e24914b4f0a56e837ce54e09219fa10918..9129931bd53566c30bc68fb2175bec98a690e8c3 100644 --- a/ParLib.src/bexchangef.c +++ b/ParLib.src/bexchangef.c @@ -1,6 +1,7 @@ #include <stdlib.h> #include "parlib.h" +// -------------------------------------------------------------------------- // #ifdef FORTRANUNDERSCORE void p_bexchange_init_ ( ndims, stride, blklen, bdim, overlap, datatype, comm, period, bexchange, ierr ) @@ -12,14 +13,14 @@ void p_bexchange_init ( ndims, stride, blklen, bdim, overlap, datatype, comm, period, bexchange, ierr ) #endif - MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, - *period, *ierr, *comm; + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm; BExchange **bexchange; { - *bexchange = (BExchange *) malloc ( sizeof (BExchange) ); - *ierr = P_BExchange_init ( (int)*ndims, (int *)stride, (int *)blklen, - (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), - MPI_Comm_f2c(*comm), (int)*period, *bexchange ); + *bexchange = (BExchange *) malloc ( sizeof (BExchange) ); + *ierr = P_BExchange_init ( (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, *bexchange ); } #ifdef FORTRANUNDERSCORE @@ -30,11 +31,11 @@ void p_bexchange_start__ ( a, bexchange, ierr ) void p_bexchange_start ( a, bexchange, ierr ) #endif - void *a; - BExchange **bexchange; + void *a; + BExchange **bexchange; MPI_Fint *ierr; { - *ierr = P_BExchange_start ( a, *bexchange ); + *ierr = P_BExchange_start(a, *bexchange); } #ifdef FORTRANUNDERSCORE @@ -45,10 +46,10 @@ void p_bexchange_end__ ( bexchange, ierr ) void p_bexchange_end ( bexchange, ierr ) #endif - BExchange **bexchange; - MPI_Fint *ierr; + BExchange **bexchange; + MPI_Fint *ierr; { - *ierr = P_BExchange_end ( *bexchange ); + *ierr = P_BExchange_end(*bexchange); } #ifdef FORTRANUNDERSCORE @@ -58,11 +59,12 @@ void p_bexchange_free__ ( bexchange, ierr ) #else void p_bexchange_free ( bexchange, ierr ) #endif - BExchange **bexchange; - MPI_Fint *ierr; + + BExchange **bexchange; + MPI_Fint *ierr; { - P_BExchange_free ( *bexchange ); - free ( *bexchange ); + P_BExchange_free(*bexchange); + free(*bexchange); } #ifdef FORTRANUNDERSCORE @@ -75,12 +77,487 @@ void p_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype, void p_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype, comm, period, ierr ) #endif + + void *a; + MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; + MPI_Fint *datatype; + MPI_Fint *comm; +{ + *ierr = P_BExchange(a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period); +} +// -------------------------------------------------------------------------- // + +// v.1.3 - persistent exchanges // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void pst_bexchange_init_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_init__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#else +void pst_bexchange_init(a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr) +#endif + + void *a; + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm; + BExchange **bexchange; +{ + *bexchange = (BExchange *) malloc ( sizeof (BExchange) ); + *ierr = PST_BExchange_init ( a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, *bexchange ); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_start_ ( bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_start__ ( bexchange, ierr ) +#else +void pst_bexchange_start ( bexchange, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = PST_BExchange_start(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_end_ ( bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_end__ ( bexchange, ierr ) +#else +void pst_bexchange_end ( bexchange, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = PST_BExchange_end(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_free_ ( bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_free__ ( bexchange, ierr ) +#else +void pst_bexchange_free ( bexchange, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr; +{ + PST_BExchange_free(*bexchange); + free(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#else +void pst_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#endif + + void *a; + MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; + MPI_Fint *datatype; + MPI_Fint *comm; +{ + *ierr = PST_BExchange(a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period); +} +// -------------------------------------------------------------------------- // + +// v.1.4 - manual packing // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void p_bexchange_mp_init_ ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_mp_init__ ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#else +void p_bexchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#endif + + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm; + BExchange **bexchange; +{ + *bexchange = (BExchange *)malloc(sizeof (BExchange)); + *ierr = P_BExchange_mp_init((int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_mp_start_ ( a, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_mp_start__ ( a, bexchange, ierr ) +#else +void p_bexchange_mp_start ( a, bexchange, ierr ) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = P_BExchange_mp_start(a, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_mp_end_ ( a, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_mp_end__ ( a, bexchange, ierr ) +#else +void p_bexchange_mp_end(a, bexchange, ierr) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = P_BExchange_mp_end(a, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_mp_free_ ( bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_mp_free__ ( bexchange, ierr ) +#else +void p_bexchange_mp_free ( bexchange, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr; +{ + P_BExchange_mp_free(*bexchange); + free(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_mp_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_mp__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#else +void p_bexchange_mp ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#endif + + void *a; + MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; + MPI_Fint *datatype; + MPI_Fint *comm; +{ + *ierr = P_BExchange_mp(a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period); +} +// -------------------------------------------------------------------------- // + +// v.1.4 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void pst_bexchange_mp_init_ ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_mp_init__ ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#else +void pst_bexchange_mp_init ( ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, ierr ) +#endif + + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm; + BExchange **bexchange; +{ + *bexchange = (BExchange *)malloc(sizeof (BExchange)); + *ierr = PST_BExchange_mp_init((int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_mp_start_ ( a, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_mp_start__ ( a, bexchange, ierr ) +#else +void pst_bexchange_mp_start ( a, bexchange, ierr ) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = PST_BExchange_mp_start(a, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_mp_end_ ( a, bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_mp_end__ ( a, bexchange, ierr ) +#else +void pst_bexchange_mp_end(a, bexchange, ierr) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr; +{ + *ierr = PST_BExchange_mp_end(a, *bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_mp_free_ ( bexchange, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_mp_free__ ( bexchange, ierr ) +#else +void pst_bexchange_mp_free ( bexchange, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr; +{ + PST_BExchange_mp_free(*bexchange); + free(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void pst_bexchange_mp_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_bexchange_mp__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#else +void pst_bexchange_mp ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, ierr ) +#endif + + void *a; + MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; + MPI_Fint *datatype; + MPI_Fint *comm; +{ + *ierr = PST_BExchange_mp(a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period); +} +// -------------------------------------------------------------------------- // + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void p_bexchange_opt_init_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_opt_init__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, exch_mode, ierr ) +#else +void p_bexchange_opt_init ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, bexchange, exch_mode, ierr ) +#endif + + void *a; + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm, *exch_mode; + BExchange **bexchange; +{ + *bexchange = (BExchange *) malloc ( sizeof (BExchange) ); + *ierr = P_BExchange_opt_init ( a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, *bexchange, (int)*exch_mode ); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_opt_start_ ( a, bexchange, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_opt_start__ ( a, bexchange, exch_mode, ierr ) +#else +void p_bexchange_opt_start ( a, bexchange, exch_mode, ierr ) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr, *exch_mode; +{ + *ierr = P_BExchange_opt_start(a, *bexchange, (int)*exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_opt_end_ ( a, bexchange, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_opt_end__ ( a, bexchange, exch_mode, ierr ) +#else +void p_bexchange_opt_end ( a, bexchange, exch_mode, ierr ) +#endif + + void *a; + BExchange **bexchange; + MPI_Fint *ierr, *exch_mode; +{ + *ierr = P_BExchange_opt_end(a, *bexchange, (int)*exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_opt_free_ ( bexchange, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_opt_free__ ( bexchange, exch_mode, ierr ) +#else +void p_bexchange_opt_free ( bexchange, exch_mode, ierr ) +#endif + + BExchange **bexchange; + MPI_Fint *ierr, *exch_mode; +{ + P_BExchange_opt_free(*bexchange, (int)*exch_mode); + free(*bexchange); +} + +#ifdef FORTRANUNDERSCORE +void p_bexchange_opt_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_bexchange_opt__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_mode, ierr ) +#else +void p_bexchange_opt ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_mode, ierr ) +#endif + void *a; - int *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; - MPI_Datatype *datatype; - MPI_Comm *comm; + MPI_Fint *ndims, *stride, *blklen, *bdim, *overlap, *period, *ierr; + MPI_Fint *datatype; + MPI_Fint *comm; + MPI_Fint *exch_mode; { - *ierr = P_BExchange ( a, (int)*ndims, (int *)stride, (int *)blklen, + *ierr = P_BExchange_opt(a, (int)*ndims, (int *)stride, (int *)blklen, + (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), + MPI_Comm_f2c(*comm), (int)*period, (int)*exch_mode); +} +// -------------------------------------------------------------------------- // + + +// v.2.0 - regular communications [removed only on correct program exit] // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void reg_bexchange_ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_id, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void reg_bexchange__ ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_id, exch_mode, ierr ) +#else +void reg_bexchange ( a, ndims, stride, blklen, bdim, overlap, datatype, + comm, period, exch_id, exch_mode, ierr ) +#endif + + void *a; + MPI_Fint *datatype, *ndims, *stride, *blklen, *bdim, *overlap, + *period, *ierr, *comm, *exch_id, *exch_mode; +{ + BExchange *bexchange; + bexchange = (BExchange *) malloc ( sizeof (BExchange) ); + + *ierr = P_BExchange_opt_init ( a, (int)*ndims, (int *)stride, (int *)blklen, (int)*bdim, (int *)overlap, MPI_Type_f2c(*datatype), - MPI_Comm_f2c(*comm), (int)*period ); + MPI_Comm_f2c(*comm), (int)*period, bexchange, (int)*exch_mode ); + if ((int)*ierr != 0) return; + + *exch_id = save_bexch_handle(bexchange, (int)*exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void start_bexchange_ ( a, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void start_bexchange__ ( a, exch_id, ierr ) +#else +void start_bexchange ( a, exch_id, ierr ) +#endif + + void *a; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + BExchange *bexchange; + + get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id); + + *ierr = P_BExchange_opt_start(a, bexchange, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void end_bexchange_ ( a, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void end_bexchange__ ( a, exch_id, ierr ) +#else +void end_bexchange ( a, exch_id, ierr ) +#endif + + void *a; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + BExchange *bexchange; + + get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id); + + *ierr = P_BExchange_opt_end(a, bexchange, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void run_bexchange_ ( a, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void run_bexchange__ ( a, exch_id, ierr ) +#else +void run_bexchange ( a, exch_id, ierr ) +#endif + + void *a; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + BExchange *bexchange; + + get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id); + + *ierr = P_BExchange_opt_start(a, bexchange, exch_mode); + if ((int)*ierr != 0) return; + + *ierr = P_BExchange_opt_end(a, bexchange, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void unreg_bexchange_ ( exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void unreg_bexchange__ ( exch_id, ierr ) +#else +void unreg_bexchange ( exch_id, ierr ) +#endif + + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + BExchange *bexchange; + + get_bexch_handle(&bexchange, &exch_mode, (int)*exch_id); + + *ierr = P_BExchange_opt_free(bexchange, exch_mode); + if ((int)*ierr != 0) return; + free(bexchange); + + remove_bexch_handle((int)*exch_id); } +// -------------------------------------------------------------------------- // diff --git a/ParLib.src/parlib.c b/ParLib.src/parlib.c new file mode 100644 index 0000000000000000000000000000000000000000..bf06244ba0b241f258c915c01e23b56378703a0e --- /dev/null +++ b/ParLib.src/parlib.c @@ -0,0 +1,116 @@ +#include "parlib.h" +#include "plutils.h" +#include <stdlib.h> + + +// BExchange list [definition] +// -------------------------------------------------------------------------- // +BExchange* bexch_hlist[MAX_BEXCH_HANDLES]; // list of BExchange handles +int bexch_hmode[MAX_BEXCH_HANDLES]; // Bexchange mode for each handle +int bexch_hptr; // pointer to available BExchange handle +// -------------------------------------------------------------------------- // + +// Transpose list [definition] +// -------------------------------------------------------------------------- // +Transposition* transp_hlist[MAX_TRANSP_HANDLES]; // list of Transpose handles +int transp_hmode[MAX_TRANSP_HANDLES]; // Transpose mode for each handle +int transp_hptr; // pointer to available Transpose handle +// -------------------------------------------------------------------------- // + + +// ParLib init-deinit +// -------------------------------------------------------------------------- // +void ParLib_init() +{ + init_plbuf(); // pl-buffers + + bexch_hptr = 0; // BExchange list + transp_hptr = 0; // Transpose list +} + +void ParLib_deinit() +{ + int k, exch_mode; + + // BExchange list + BExchange *bexchange; + for (k = 0; k < bexch_hptr; k++) { + get_bexch_handle(&bexchange, &exch_mode, k); + + P_BExchange_opt_free(bexchange, exch_mode); + free(bexchange); + } + bexch_hptr = 0; + + // Transp list + Transposition *transp; + for (k = 0; k < transp_hptr; k++) { + get_transp_handle(&transp, &exch_mode, k); + + P_Transpose_opt_free(transp, exch_mode); + free(transp); + } + transp_hptr = 0; + + deinit_plbuf(); // pl-buffers +} +// -------------------------------------------------------------------------- // + + +// BExchange handle list interface +// -------------------------------------------------------------------------- // +int save_bexch_handle(BExchange *bexchange, int exch_mode) +{ + // saving handle + bexch_hlist[bexch_hptr] = bexchange; + bexch_hmode[bexch_hptr] = exch_mode; + bexch_hptr++; + + return bexch_hptr - 1; +} + +void get_bexch_handle(BExchange** bexchange, int* exch_mode, int exch_id) +{ + *bexchange = bexch_hlist[exch_id]; + *exch_mode = bexch_hmode[exch_id]; +} + +void remove_bexch_handle(int exch_id) +{ + int k; + for (k = exch_id; k < bexch_hptr - 1; k++) { + bexch_hlist[k] = bexch_hlist[k + 1]; + bexch_hmode[k] = bexch_hmode[k + 1]; + } + if (bexch_hptr > 0) bexch_hptr--; +} +// -------------------------------------------------------------------------- // + +// Transposition handle list interface +// -------------------------------------------------------------------------- // +int save_transp_handle(Transposition *transp, int exch_mode) +{ + // saving handle + transp_hlist[transp_hptr] = transp; + transp_hmode[transp_hptr] = exch_mode; + transp_hptr++; + + return transp_hptr - 1; +} + +void get_transp_handle(Transposition** transp, int* exch_mode, int exch_id) +{ + *transp = transp_hlist[exch_id]; + *exch_mode = transp_hmode[exch_id]; +} + +void remove_transp_handle(int exch_id) +{ + int k; + for (k = exch_id; k < transp_hptr - 1; k++) { + transp_hlist[k] = transp_hlist[k + 1]; + transp_hmode[k] = transp_hmode[k + 1]; + } + if (transp_hptr > 0) transp_hptr--; +} +// -------------------------------------------------------------------------- // diff --git a/ParLib.src/parlib.h b/ParLib.src/parlib.h index 52f27c30eb0a03ccf373a62aa62e60081cefd98a..f02678a5956a2149c40668482a82fd3ccca593e9 100644 --- a/ParLib.src/parlib.h +++ b/ParLib.src/parlib.h @@ -1,36 +1,217 @@ -#ifndef _MPI_INCLUDE -#include <mpi.h> -#endif - -typedef struct BExchange { - int overlap[2], send[2], recv[2], btype[2]; - int sendproc[2], recvproc[2], sbind[2], rbind[2]; - MPI_Comm comm; - MPI_Request sreq[2], rreq[2]; - MPI_Aint fsize; -} BExchange; - -typedef struct Transposition { - MPI_Datatype *stype, *rtype; - int *sbeg, *rbeg; - MPI_Comm comm; - int nproc, iproc; - MPI_Request *sreq, *rreq; - MPI_Aint fsize; -} Transposition; - -int P_BExchange_init ( int, int*, int*, int, int*, MPI_Datatype, - MPI_Comm, int, BExchange* ); -int P_BExchange_start ( void*, BExchange* ); -int P_BExchange_end ( BExchange* ); -int P_BExchange_free ( BExchange* ); -int P_BExchange ( void*, int, int*, int*, int, int*, MPI_Datatype, - MPI_Comm, int ); - -int P_Transpose_init ( int , int, int*, int, int*, int*, int*, int*, - MPI_Datatype, MPI_Comm, int, Transposition* ); -int P_Transpose_start ( void*, void*, Transposition* ); -int P_Transpose_end ( Transposition* ); -int P_Transpose_free ( Transposition* ); -int P_Transpose ( int, void*, int, int*, void*, int, int*, int*, int*, - int*, MPI_Datatype, MPI_Comm, int ); +#ifndef _MPI_INCLUDE +#include <mpi.h> +#endif + +#include "plutils.h" + + +#define IS_MPI_TYPED 0 +#define IS_MPI_MANUAL_PACK 1 +#define IS_MPI_TYPED_PERSISTENT 2 +#define IS_MPI_MANUAL_PACK_PERSISTENT 3 + +// ParLib v1.8 initialization +// -------------------------------------------------------------------------- // +void ParLib_init(); +void ParLib_deinit(); +// -------------------------- // + +typedef struct BExchange { + int overlap[2], send[2], recv[2]; + int sendproc[2], recvproc[2], sbind[2], rbind[2]; + MPI_Datatype btype[2]; + + MPI_Comm comm; + MPI_Request req[4]; + MPI_Aint fsize; + + // manual packing data + // --------------------------------- // + int ndims; + int stride[MAX_PARLIB_MP_DIMS]; + + int msize[2]; + int mdims[2][MAX_PARLIB_MP_DIMS]; + + void *sbuf[2], *rbuf[2]; + // --------------------------------- // + + // memory management + // --------------------------------- // + int buf_id[4]; + // --------------------------------- // +} BExchange; + +// BExchange list [declaration] +// -------------------------------------------------------------------------- // +#define MAX_BEXCH_HANDLES 1024 + +extern BExchange* bexch_hlist[MAX_BEXCH_HANDLES]; // list of BExchange handles +extern int bexch_hmode[MAX_BEXCH_HANDLES]; // Bexchange mode for each handle +extern int bexch_hptr; // pointer to available BExchange handle +// -------------------------------------------------------------------------- // +// BExchange handle list interface +// -------------------------------------------------------------------------- // +int save_bexch_handle(BExchange *bexchange, int exch_mode); +void get_bexch_handle(BExchange** bexchange, int* exch_mode, int exch_id); +void remove_bexch_handle(int exch_id); +// -------------------------------------------------------------------------- // + + +// -------------------------------------------------------------------------- // +int P_BExchange_init ( int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, BExchange* ); +int P_BExchange_start ( void*, BExchange* ); +int P_BExchange_end ( BExchange* ); +int P_BExchange_free ( BExchange* ); +int P_BExchange ( void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int ); +// -------------------------------------------------------------------------- // + +// v.1.3 - persistent exchanges // +// -------------------------------------------------------------------------- // +int PST_BExchange_init(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, BExchange*); +int PST_BExchange_start(BExchange*); +int PST_BExchange_end(BExchange*); +int PST_BExchange_free(BExchange*); +int PST_BExchange(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int); +// -------------------------------------------------------------------------- // + +// v.1.4 - manual packing // +// -------------------------------------------------------------------------- // +int P_BExchange_mp_init(int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, BExchange*); +int P_BExchange_mp_start(void*, BExchange*); +int P_BExchange_mp_end(void*, BExchange*); +int P_BExchange_mp_free(BExchange*); +int P_BExchange_mp(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int); +// -------------------------------------------------------------------------- // + +// v.1.4 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +int PST_BExchange_mp_init(int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, BExchange*); +int PST_BExchange_mp_start(void*, BExchange*); +int PST_BExchange_mp_end(void*, BExchange*); +int PST_BExchange_mp_free(BExchange*); +int PST_BExchange_mp(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int); +// -------------------------------------------------------------------------- // + + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +int P_BExchange_opt_init(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, BExchange*, int); +int P_BExchange_opt_start(void*, BExchange*, int); +int P_BExchange_opt_end(void*, BExchange*, int); +int P_BExchange_opt_free(BExchange*, int); +int P_BExchange_opt(void*, int, int*, int*, int, int*, MPI_Datatype, + MPI_Comm, int, int); +// -------------------------------------------------------------------------- // + + +typedef struct Transposition { + void *psrc, *pdest; // used only in persistent-type communications + + MPI_Datatype *stype, *rtype; + int *sbeg, *rbeg; + MPI_Comm comm; + + int nproc, iproc; + MPI_Request *req; + MPI_Aint fsize; + + // manual packing data + // --------------------------------- // + int ndims; + int sstride[MAX_PARLIB_MP_DIMS], rstride[MAX_PARLIB_MP_DIMS]; + + int **sdims, **rdims; + int *ssize, *rsize; + + void **sbuf, **rbuf; // [nproc] buffers + // --------------------------------- // + + // memory management + // --------------------------------- // + int *mem_dims; // [nproc * ndims] pool + void *mem_sbuf, *mem_rbuf; // [nproc * sum(msize)] pools + + int buf_id[9]; + // --------------------------------- // +} Transposition; + +// Transpose list [declaration] +// -------------------------------------------------------------------------- // +#define MAX_TRANSP_HANDLES 128 + +extern Transposition* transp_hlist[MAX_TRANSP_HANDLES]; // list of Transpose handles +extern int transp_hmode[MAX_TRANSP_HANDLES]; // Transpose mode for each handle +extern int transp_hptr; // pointer to available Transpose handle +// -------------------------------------------------------------------------- // +// Transposition handle list interface +// -------------------------------------------------------------------------- // +int save_transp_handle(Transposition *transp, int exch_mode); +void get_transp_handle(Transposition** transp, int* exch_mode, int exch_id); +void remove_transp_handle(int exch_id); +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_init ( int , int, int*, int, int*, int*, int*, int*, + MPI_Datatype, MPI_Comm, int, Transposition* ); +int P_Transpose_start ( void*, void*, Transposition* ); +int P_Transpose_end ( Transposition* ); +int P_Transpose_free ( Transposition* ); +int P_Transpose ( int, void*, int, int*, void*, int, int*, int*, int*, + int*, MPI_Datatype, MPI_Comm, int ); +// -------------------------------------------------------------------------- // + +// v.1.95 - persistent exchanges // +// -------------------------------------------------------------------------- // +int PST_Transpose_init(int, void*, int, int*, void*, int, int*, int*, int*, int*, + MPI_Datatype, MPI_Comm, int, Transposition*); +int PST_Transpose_start(Transposition*); +int PST_Transpose_end(Transposition*); +int PST_Transpose_free(Transposition*); +int PST_Transpose(int, void*, int, int*, void*, int, int*, int*, int*, + int*, MPI_Datatype, MPI_Comm, int); +// -------------------------------------------------------------------------- // + +// v.1.7 - manual packing // +// -------------------------------------------------------------------------- // +int P_Transpose_mp_init(int, int, int*, int, int*, int*, int*, int*, + MPI_Datatype, MPI_Comm, int, Transposition*); +int P_Transpose_mp_start(void*, void*, Transposition*); +int P_Transpose_mp_end(void*, void*, Transposition*); +int P_Transpose_mp_free(Transposition*); +int P_Transpose_mp(int, void*, int, int*, void*, int, int*, int*, int*, + int*, MPI_Datatype, MPI_Comm, int); +// -------------------------------------------------------------------------- // + +// v.1.95 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +int PST_Transpose_mp_init(int, void*, int, int*, void*, int, int*, int*, int*, int*, + MPI_Datatype, MPI_Comm, int, Transposition*); +int PST_Transpose_mp_start(void*, void*, Transposition*); +int PST_Transpose_mp_end(void*, void*, Transposition*); +int PST_Transpose_mp_free(Transposition*); +int PST_Transpose_mp(int, void*, int, int*, void*, int, int*, int*, int*, + int*, MPI_Datatype, MPI_Comm, int); +// -------------------------------------------------------------------------- // + + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +int P_Transpose_opt_init(int, void*, int, int*, void*, int, int*, int*, int*, int*, + MPI_Datatype, MPI_Comm, int, Transposition*, int); +int P_Transpose_opt_start(void*, void*, Transposition*, int); +int P_Transpose_opt_end(void*, void*, Transposition*, int); +int P_Transpose_opt_free(Transposition*, int); +int P_Transpose_opt(int, void*, int, int*, void*, int, int*, int*, int*, + int*, MPI_Datatype, MPI_Comm, int, int); +// -------------------------------------------------------------------------- // + diff --git a/ParLib.src/parlibf.c b/ParLib.src/parlibf.c index d6b42ea0d92ccf16f19fdb3eadcb3ed2201068a2..620ad39b539c8672d0ac5b5e59bd07c4020a3629 100644 --- a/ParLib.src/parlibf.c +++ b/ParLib.src/parlibf.c @@ -1,6 +1,7 @@ +#include <stdlib.h> #include "parlib.h" -// Additionall calls to make this version 1.1 compliant with version 2.1 +// ParLib v1.8 initialization // -------------------------------------------------------------------------- // #ifdef FORTRANUNDERSCORE void parlib_init_() @@ -10,6 +11,7 @@ void parlib_init__() void parlib_init() #endif { + ParLib_init(); } #ifdef FORTRANUNDERSCORE @@ -20,5 +22,6 @@ void parlib_deinit__() void parlib_deinit() #endif { + ParLib_deinit(); } // -------------------------------------------------------------------------- // diff --git a/ParLib.src/parlibf.h b/ParLib.src/parlibf.h index 5305b1ab4f5bd2592f8f9674194713e46e14fb8b..f1be3578118265310db6692cbd9f5ec642a2f41d 100644 --- a/ParLib.src/parlibf.h +++ b/ParLib.src/parlibf.h @@ -1,2 +1,17 @@ INTEGER HANDLE_SIZE PARAMETER (HANDLE_SIZE = 2) + + INTEGER MAX_PARLIB_MP_DIMS + PARAMETER (MAX_PARLIB_MP_DIMS = 6) + + INTEGER IS_MPI_TYPED + PARAMETER (IS_MPI_TYPED = 0) + + INTEGER IS_MPI_MANUAL_PACK + PARAMETER (IS_MPI_MANUAL_PACK = 1) + + INTEGER IS_MPI_TYPED_PERSISTENT + PARAMETER (IS_MPI_TYPED_PERSISTENT = 2) + + INTEGER IS_MPI_MANUAL_PACK_PERSISTENT + PARAMETER (IS_MPI_MANUAL_PACK_PERSISTENT = 3) diff --git a/ParLib.src/plutils.c b/ParLib.src/plutils.c new file mode 100644 index 0000000000000000000000000000000000000000..453b4a329266bf85884ccfbe5f0166548777032d --- /dev/null +++ b/ParLib.src/plutils.c @@ -0,0 +1,719 @@ +#include "plutils.h" + +#include <stdlib.h> +#include <string.h> + +// parlib buffers [definition] +// -------------------------------------------------------------------------- // +void *plbuf[MAX_PL_BUFS]; +int plbuf_size[MAX_PL_BUFS]; +int plbuf_status[MAX_PL_BUFS]; + +int plbuf_ptr; +// -------------------------------------------------------------------------- // + +// using loop_count pragma expectations +// ----------------------------------------------------------------------------------- // +#define CP_EXPECT_5D_J1 21 // number of vertical levels[1] +#define CP_EXPECT_5D_J2 73 // number of vertical levels[2] +#define CP_EXPECT_5D_K1 1 // number of variables[1] +#define CP_EXPECT_5D_K2 2 // number of variables[2] +#define CP_EXPECT_5D_K3 5 // number of variables[3] +#define CP_EXPECT_5D_K4 10 // number of variables[4] +#define CP_EXPECT_5D_Q1 1 // number of time scices[1] +#define CP_EXPECT_5D_Q2 2 // number of time scices[2] +// ----------------------------------------------------------------------------------- // + + +// buffer memory interface +// ----------------------------------------------------------------------------------- // +void init_plbuf() +{ + int k; + for (k = 0; k < MAX_PL_BUFS; k++) { + plbuf_size[k] = 0; + plbuf_status[k] = 0; + } + plbuf_ptr = 0; +} + +void deinit_plbuf() +{ + int k; + for (k = 0; k < MAX_PL_BUFS; k++) { + if (plbuf_size[k] > 0) { + free(plbuf[k]); + plbuf_size[k] = 0; + } + plbuf_status[k] = 0; + } + plbuf_ptr = 0; +} + + +void* get_plbuf(int msize, int* id) +{ + int k, kbeg = plbuf_ptr; + + for (k = kbeg; k < MAX_PL_BUFS; k++) { + if (!plbuf_status[k]) { + if (msize > plbuf_size[k]) { + if (plbuf_size[k] > 0) free(plbuf[k]); + plbuf_size[k] = msize; + plbuf[k] = (void*)malloc(plbuf_size[k]); + } + + plbuf_status[k] = 1; + plbuf_ptr = k + 1; + + (*id) = k; + return plbuf[k]; + } + } + + // no free buffer found: + (*id) = MAX_PL_BUFS; + return (void*)malloc(msize); +} + +void free_plbuf(void* ptr, int id) +{ + if (id < 0) return; + if (id >= MAX_PL_BUFS) { + free(ptr); + return; + } + + plbuf_status[id] = 0; + if (id < plbuf_ptr) plbuf_ptr = id; +} +// ----------------------------------------------------------------------------------- // + +// 1D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_1d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx) +{ + int i; + + if (nx < MIN_MEMCPY_BLOCK) + for (i = 0; i < nx; i++) + buf[i] = a[i]; + else + memcpy(buf, a, nx * sizeof(char)); +} + +inline void copy_from_buffer_1d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx) +{ + int i; + + if (nx < MIN_MEMCPY_BLOCK) + for (i = 0; i < nx; i++) + a[i] = buf[i]; + else + memcpy(a, buf, nx * sizeof(char)); +} +// ----------------------------------------------------------------------------------- // + +// 2D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_2d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx, const int ny, + const int shx) +{ + int i, j, idx = 0, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + buf[bidx + i] = a[idx + i]; + } + else + { + const int nbx = nx * sizeof(char); + for (j = 0; j < ny; j++, bidx += nx) { + idx = j * shx; + memcpy(&buf[bidx], &a[idx], nbx); + } + } +} + +inline void copy_from_buffer_2d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx, const int ny, + const int shx) +{ + int i, j, idx = 0, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + a[idx + i] = buf[bidx + i]; + } + else + { + const int nbx = nx * sizeof(char); + for (j = 0; j < ny; j++, bidx += nx) { + idx = j * shx; + memcpy(&a[idx], &buf[bidx], nbx); + } + } +} +// ----------------------------------------------------------------------------------- // + +// 3D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_3d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx, const int ny, const int nz, + const int shx, const int shxy) +{ + int i, j, k, idx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (k = 0; k < nz; k++) + { + idx = k * shxy; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + buf[bidx + i] = a[idx + i]; + } + } + else + { + const int nbx = nx * sizeof(char); + + for (k = 0; k < nz; k++) + { + idx = k * shxy; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&buf[bidx], &a[idx], nbx); + } + } + } +} + +inline void copy_from_buffer_3d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx, const int ny, const int nz, + const int shx, const int shxy) +{ + int i, j, k, idx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (k = 0; k < nz; k++) + { + idx = k * shxy; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + a[idx + i] = buf[bidx + i]; + } + } + else + { + const int nbx = nx * sizeof(char); + + for (k = 0; k < nz; k++) + { + idx = k * shxy; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&a[idx], &buf[bidx], nbx); + } + } + } +} +// ----------------------------------------------------------------------------------- // + +// 4D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_4d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx, const int ny, const int nz, const int np, + const int shx, const int shxy, const int shxyz) +{ + int i, j, k, p, idx, shidx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (p = 0; p < np; p++) + { + shidx = p * shxyz; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx += shxy) + { + idx = shidx; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + buf[bidx + i] = a[idx + i]; + } + } + } + else + for (p = 0; p < np; p++) + { + shidx = p * shxyz; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx += shxy) + { + idx = shidx; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&buf[bidx], &a[idx], nx * sizeof(char)); + } + } + } +} + +inline void copy_from_buffer_4d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx, const int ny, const int nz, const int np, + const int shx, const int shxy, const int shxyz) +{ + int i, j, k, p, idx, shidx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (p = 0; p < np; p++) + { + shidx = p * shxyz; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx += shxy) + { + idx = shidx; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + a[idx + i] = buf[bidx + i]; + } + } + } + else + for (p = 0; p < np; p++) + { + shidx = p * shxyz; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx += shxy) + { + idx = shidx; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&a[idx], &buf[bidx], nx * sizeof(char)); + } + } + } +} +// ----------------------------------------------------------------------------------- // + +// 5D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_5d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx, const int ny, const int nz, const int np, const int nq, + const int shx, const int shxy, const int shxyz, const int shxyzp) +{ + int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2) +#endif + for (q = 0; q < nq; q++) + { + shidx_q = q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + buf[bidx + i] = a[idx + i]; + } + } + } + } + else + { + const int nbx = nx * sizeof(char); + +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2) +#endif + for (q = 0; q < nq; q++) + { + shidx_q = q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&buf[bidx], &a[idx], nbx); + } + } + } + } + } +} + +inline void copy_from_buffer_5d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx, const int ny, const int nz, const int np, const int nq, + const int shx, const int shxy, const int shxyz, const int shxyzp) +{ + int i, j, k, p, q, shidx_q, shidx_p, idx, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2) +#endif + for (q = 0; q < nq; q++) + { + shidx_q = q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + a[idx + i] = buf[bidx + i]; + } + } + } + } + else + { + const int nbx = nx * sizeof(char); + +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_Q1, CP_EXPECT_5D_Q2) +#endif + for (q = 0; q < nq; q++) + { + shidx_q = q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_K1, CP_EXPECT_5D_K2, CP_EXPECT_5D_K3, CP_EXPECT_5D_K4) +#endif + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; +#if defined(__INTEL_COMPILER) +#pragma loop_count (CP_EXPECT_5D_J1, CP_EXPECT_5D_J2) +#endif + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&a[idx], &buf[bidx], nbx); + } + } + } + } + } +} +// ----------------------------------------------------------------------------------- // + + +// 6D copy +// ----------------------------------------------------------------------------------- // +inline void copy_to_buffer_6d(char* _RESTRICT buf, const char* _RESTRICT const a, + const int nx, const int ny, const int nz, const int np, const int nq, const int ns, + const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq) +{ + int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (s = 0; s < ns; s++) + for (q = 0; q < nq; q++) + { + shidx_q = s * shxyzpq + q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + buf[bidx + i] = a[idx + i]; + } + } + } + } + else + { + const int nbx = nx * sizeof(char); + + for (s = 0; s < ns; s++) + for (q = 0; q < nq; q++) + { + shidx_q = s * shxyzpq + q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&buf[bidx], &a[idx], nbx); + } + } + } + } + } +} + +inline void copy_from_buffer_6d(char* _RESTRICT a, const char* _RESTRICT const buf, + const int nx, const int ny, const int nz, const int np, const int nq, const int ns, + const int shx, const int shxy, const int shxyz, const int shxyzp, const int shxyzpq) +{ + int i, j, k, p, q, s, idx, shidx_q, shidx_p, bidx = 0; + + if (nx < MIN_MEMCPY_BLOCK) + for (s = 0; s < ns; s++) + for (q = 0; q < nq; q++) + { + shidx_q = s * shxyzpq + q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) + for (i = 0; i < nx; i++) { + a[idx + i] = buf[bidx + i]; + } + } + } + } + else + { + const int nbx = nx * sizeof(char); + + for (s = 0; s < ns; s++) + for (q = 0; q < nq; q++) + { + shidx_q = s * shxyzpq + q * shxyzp; + for (p = 0; p < np; p++, shidx_q += shxyz) + { + shidx_p = shidx_q; + for (k = 0; k < nz; k++, shidx_p += shxy) + { + idx = shidx_p; + for (j = 0; j < ny; j++, idx += shx, bidx += nx) { + memcpy(&a[idx], &buf[bidx], nbx); + } + } + } + } + } +} +// ----------------------------------------------------------------------------------- // + + +// COPY-TO +// ----------------------------------------------------------------------------------- // +void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a, + const int ndims, + const int* _RESTRICT const msgdim, + const int* _RESTRICT const stride, + const int fsize) +{ + if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return; + + if (ndims == 1) { + const int nx = msgdim[0] * fsize; + + copy_to_buffer_1d(buf, a, nx); + return; + } + + if (ndims == 2) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1]; + + const int shx = stride[0] * fsize; + + copy_to_buffer_2d(buf, a, nx, ny, + shx); + return; + } + + if (ndims == 3) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + + copy_to_buffer_3d(buf, a, nx, ny, nz, + shx, shxy); + return; + } + + if (ndims == 4) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + + copy_to_buffer_4d(buf, a, nx, ny, nz, np, + shx, shxy, shxyz); + return; + } + + if (ndims == 5) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3], nq = msgdim[4]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + const int shxyzp = stride[3] * shxyz; + + copy_to_buffer_5d(buf, a, nx, ny, nz, np, nq, + shx, shxy, shxyz, shxyzp); + return; + } + + if (ndims == 6) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3], nq = msgdim[4], + ns = msgdim[5]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + const int shxyzp = stride[3] * shxyz; + const int shxyzpq = stride[4] * shxyzp; + + copy_to_buffer_6d(buf, a, nx, ny, nz, np, nq, ns, + shx, shxy, shxyz, shxyzp, shxyzpq); + return; + } +} +// ----------------------------------------------------------------------------------- // + +// COPY-FROM +// ----------------------------------------------------------------------------------- // +void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf, + const int ndims, + const int* _RESTRICT const msgdim, + const int* _RESTRICT const stride, + const int fsize) +{ + if ((ndims < 1) || (ndims > MAX_PARLIB_MP_DIMS)) return; + + if (ndims == 1) { + const int nx = msgdim[0] * fsize; + + copy_from_buffer_1d(a, buf, nx); + return; + } + + if (ndims == 2) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1]; + + const int shx = stride[0] * fsize; + + copy_from_buffer_2d(a, buf, nx, ny, + shx); + return; + } + + if (ndims == 3) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + + copy_from_buffer_3d(a, buf, nx, ny, nz, + shx, shxy); + return; + } + + if (ndims == 4) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + + copy_from_buffer_4d(a, buf, nx, ny, nz, np, + shx, shxy, shxyz); + return; + } + + if (ndims == 5) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3], nq = msgdim[4]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + const int shxyzp = stride[3] * shxyz; + + copy_from_buffer_5d(a, buf, nx, ny, nz, np, nq, + shx, shxy, shxyz, shxyzp); + return; + } + + if (ndims == 6) { + const int nx = msgdim[0] * fsize, + ny = msgdim[1], nz = msgdim[2], + np = msgdim[3], nq = msgdim[4], ns = msgdim[5]; + + const int shx = stride[0] * fsize; + const int shxy = stride[1] * shx; + const int shxyz = stride[2] * shxy; + const int shxyzp = stride[3] * shxyz; + const int shxyzpq = stride[4] * shxyzp; + + copy_from_buffer_6d(a, buf, nx, ny, nz, np, nq, ns, + shx, shxy, shxyz, shxyzp, shxyzpq); + return; + } +} +// ----------------------------------------------------------------------------------- // diff --git a/ParLib.src/plutils.h b/ParLib.src/plutils.h new file mode 100644 index 0000000000000000000000000000000000000000..2a493fb931affbb39d735995793711518f1b8da6 --- /dev/null +++ b/ParLib.src/plutils.h @@ -0,0 +1,51 @@ +#pragma once + +#define MAX_PARLIB_MP_DIMS 6 // maximum number of dims for manual packing + +#define MIN_MEMCPY_BLOCK 256 // minimum block (in bytes) for memcpy copy (magic number) + +#define MAX_PL_BUFS 4096 // maximum number of parlib internal buffers + +// _RESTRICT definition +// ------------------------------------------------------------------- // +#if defined(__INTEL_COMPILER) +#define _RESTRICT restrict +#elif defined(__GNUC__) && !defined(_WIN32) && !defined(_CYGWIN32__) +#define _RESTRICT __restrict__ +#elif defined(_MSC_VER) +#define _RESTRICT __restrict +#else +#define _RESTRICT +#endif +// ------------------------------------------------------------------- // + + +// parlib buffers [declaration] +// -------------------------------------------------------------------------- // +extern void *plbuf[MAX_PL_BUFS]; +extern int plbuf_size[MAX_PL_BUFS]; +extern int plbuf_status[MAX_PL_BUFS]; + +extern int plbuf_ptr; +// -------------------------------------------------------------------------- // + + +void init_plbuf(); +void deinit_plbuf(); + +void* get_plbuf(int msize, int* id); +void free_plbuf(void* ptr, int id); +// -------------------------------------------------------------------------- // + +void copy_to_buffer(char* _RESTRICT buf, const char* _RESTRICT const a, + const int ndims, + const int* _RESTRICT const msgdim, + const int* _RESTRICT const stride, + const int fsize); + +void copy_from_buffer(char* _RESTRICT a, const char* _RESTRICT const buf, + const int ndims, + const int* _RESTRICT const msgdim, + const int* _RESTRICT const stride, + const int fsize); +// -------------------------------------------------------------------------- // diff --git a/ParLib.src/transpose.c b/ParLib.src/transpose.c index 7f6a9619ba5e5ba29af91dd2a216c15acbec8497..1d536aacddc450860654b79d8578722b5d379435 100644 --- a/ParLib.src/transpose.c +++ b/ParLib.src/transpose.c @@ -1,275 +1,1188 @@ -#include <stdlib.h> -#include "parlib.h" - -int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, - lblks_dest, stride, blklen, overlap, datatype, comm, period, - transp ) - int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; - int *blklen, *overlap; - MPI_Datatype datatype; - MPI_Comm comm; - int period; - Transposition *transp; -{ - int idim, nproc, iproc, ip, strd, count; - int wblka, wblkb, begb; - int ifsta, ifstb, idir, suma, sumb; - MPI_Aint fsize; - MPI_Datatype oldtype, *stype, *rtype; - int *sbeg, *rbeg; -/* - * Check input parameters - */ - if ( ndims < 2) { return 1; } - if ( dim_source < 1 || dim_source > ndims ) { return 2; } - if ( dim_dest < 1 || dim_dest > ndims ) { return 3; } - if ( dim_source == dim_dest ) { return 4; } - for ( idim = 0; idim < ndims; idim++ ) { - if ( stride[idim] <= 0) { return 5; } - } - for (idir = 0; idir < 2; idir++ ) { - if ( overlap[idir] < 0 ) { return 6; } - } -/* - * Define the number of processors in the group and the rank - */ - MPI_Comm_size ( comm, &nproc ); - if ( nproc == 0 ) { return 0; } - MPI_Comm_rank ( comm, &iproc ); - if ( iproc == MPI_UNDEFINED ) { return 0; } - - suma = sumb = 0; - for ( ip = 0; ip < nproc; ip++ ) { - suma += lblks_source[ip]; - sumb += lblks_dest[ip]; - if ( lblks_source[ip] <= 0 ) { return 14; } - if ( lblks_dest[ip] <= 0 ) { return 15; } - } - - if ( lblks_source[iproc] > blklen[dim_source-1] ) { return 8; } - if ( lblks_dest[iproc] > blklen[dim_dest-1] ) { return 9; } - if ( suma > stride[dim_source-1] ) { return 10; } - if ( sumb > stride[dim_dest-1] ) { return 11; } - for ( idim = 0; idim < ndims; idim++ ) { - if ( idim != dim_source-1 && idim != dim_dest-1 ) { - if ( blklen[idim] > stride[idim] ) { return 7; } - } - } - if ( overlap[0] > lblks_dest[0] ) { return 12; } - if ( overlap[1] > lblks_dest[nproc-1] ) { return 13; } - - MPI_Type_extent ( datatype, &fsize ); -/* - * Allocate memory - */ - stype = transp->stype = - (MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc ); - rtype = transp->rtype = - (MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc ); - sbeg = transp->sbeg = (int *) malloc ( sizeof(int)*nproc ); - rbeg = transp->rbeg = (int *) malloc ( sizeof(int)*nproc ); -/* - * Define data types for the blocks and the beginings of the blocks - */ - ifsta = ifstb = 1; - for ( ip = 0; ip < nproc; ip++ ) { - wblka = lblks_source[iproc]; - wblkb = lblks_dest[ip]; - if ( ip > 0 || period ) wblkb += overlap[0]; - if ( ip < nproc-1 || period ) wblkb += overlap[1]; - - oldtype = datatype; - strd = 1; - for ( idim = 0; idim < ndims; idim++ ) { - if ( idim == dim_source-1 ) { - count = wblka; - } else if ( idim == dim_dest-1 ) { - count = wblkb; - } else { - count = blklen[idim]; - } - MPI_Type_hvector ( count, 1, strd*fsize, oldtype, stype+ip ); - if ( idim > 0 ) { MPI_Type_free ( &oldtype ); } - oldtype = stype[ip]; - if ( idim == dim_source-1 ) { - strd *= blklen[idim]; - } else { - strd *= stride[idim]; - } - } - MPI_Type_commit ( stype+ip ); - - wblka = lblks_source[ip]; - wblkb = lblks_dest[iproc]; - if ( iproc > 0 || period ) wblkb += overlap[0]; - if ( iproc < nproc-1 || period ) wblkb += overlap[1]; - oldtype = datatype; - strd = 1; - for ( idim = 0; idim < ndims; idim++ ) { - if ( idim == dim_source-1 ) { - count = wblka; - } else if ( idim == dim_dest-1 ) { - count = wblkb; - } else { - count = blklen[idim]; - } - MPI_Type_hvector ( count, 1, strd*fsize, oldtype, rtype+ip ); - if ( idim > 0 ) { MPI_Type_free ( &oldtype ); } - oldtype = rtype[ip]; - if ( idim == dim_dest-1 ) { - strd *= blklen[idim]; - } else { - strd *= stride[idim]; - } - } - MPI_Type_commit ( rtype+ip ); - - begb = ifstb; - if ( ip > 0 || period ) begb -= overlap[0]; - strd = 1; - for ( idim = 0; idim < dim_dest-1; idim++ ) { - if ( idim == dim_source-1 ) { - strd *= blklen[idim]; - } else { - strd *= stride[idim]; - } - } - sbeg[ip] = strd*(begb-1); - - rbeg[ip] = 0; - if ( iproc > 0 || period ) rbeg[ip] -= overlap[0]*strd; - - strd = 1; - for ( idim = 0; idim < dim_source-1; idim++ ) { - if ( idim == dim_dest-1 ) { - strd *= blklen[idim]; - } else { - strd *= stride[idim]; - } - } - rbeg[ip] += strd*(ifsta-1); - ifsta += lblks_source[ip]; - ifstb += lblks_dest[ip]; - } - transp->nproc = nproc; - transp->iproc = iproc; - transp->comm = comm; - transp->fsize = fsize; - return 0; -} - -int P_Transpose_start ( arr_source, arr_dest, transp ) - void *arr_source, *arr_dest; - Transposition *transp; -{ - char *arr_source_ch = (char *) arr_source; - char *arr_dest_ch = (char *) arr_dest; - int nproc = transp->nproc; - int iproc = transp->iproc; - MPI_Aint fsize = transp->fsize; - int *sbeg = transp->sbeg; - int *rbeg = transp->rbeg; - MPI_Datatype *stype = transp->stype; - MPI_Datatype *rtype = transp->rtype; - MPI_Comm comm = transp->comm; - MPI_Request *sreq, *rreq; - int ip; - char *src, *dest; - - if ( nproc == 0 ) { return 0; } - if ( iproc == MPI_UNDEFINED ) { return 0; } -/* - * Allocate memory - */ - sreq = transp->sreq = - (MPI_Request *) malloc ( sizeof(MPI_Request)*nproc ); - rreq = transp->rreq = - (MPI_Request *) malloc ( sizeof(MPI_Request)*nproc ); -/* - * Start the communication - */ - for ( ip = 0; ip < nproc; ip++ ) { - dest = arr_dest_ch+rbeg[ip]*fsize; - src = arr_source_ch+sbeg[ip]*fsize; - if ( dest == src && iproc == ip ) { - rreq[ip] = sreq[ip] = MPI_REQUEST_NULL; - } else { - MPI_Irecv ( dest, 1, rtype[ip], ip, 0, comm, rreq+ip ); - MPI_Isend ( src, 1, stype[ip], ip, 0, comm, sreq+ip ); - } - } - return 0; -} - -int P_Transpose_end ( transp ) - Transposition *transp; -{ - int nproc = transp->nproc; - int iproc = transp->iproc; - MPI_Request *sreq = transp->sreq; - MPI_Request *rreq = transp->rreq; - int ip; - MPI_Status status; - - if ( nproc == 0 ) { return 0; } - if ( iproc == MPI_UNDEFINED ) { return 0; } - - for ( ip = 0; ip < nproc; ip++ ) { - MPI_Wait ( rreq+ip, &status ); - MPI_Wait ( sreq+ip, &status ); - } - return 0; -} - -int P_Transpose_free ( transp ) - Transposition *transp; -{ - int nproc = transp->nproc; - int iproc = transp->iproc; - int *sbeg = transp->sbeg; - int *rbeg = transp->rbeg; - MPI_Datatype *stype = transp->stype; - MPI_Datatype *rtype = transp->rtype; - MPI_Request *sreq = transp->sreq; - MPI_Request *rreq = transp->rreq; - int ip; - - if ( nproc == 0 ) { return 0; } - if ( iproc == MPI_UNDEFINED ) { return 0; } - - for ( ip = 0; ip < nproc; ip++ ) { - MPI_Type_free ( rtype+ip ); - MPI_Type_free ( stype+ip ); - } - free ( stype ); - free ( rtype ); - free ( sbeg ); - free ( rbeg ); - free ( sreq ); - free ( rreq ); - return 0; -} - -int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, - dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, - period ) - void *arr_source, *arr_dest; - int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; - int *blklen, *overlap; - MPI_Datatype datatype; - MPI_Comm comm; - int period; -{ - Transposition transp; - int ierr; - if ( ierr = P_Transpose_init ( ndims, dim_source, lblks_source, - dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, - period, &transp ) != 0 ) - { - return ierr; - } - P_Transpose_start ( arr_source, arr_dest, &transp ); - P_Transpose_end ( &transp ); - P_Transpose_free ( &transp ); - return 0; -} +#include "parlib.h" +#include "plutils.h" + +#include <stdlib.h> +#include <string.h> + + +/* +* Error codes: +* 0 - success +* 1 - number of dimensions < 2 +* 2 - wrong communicated dimension [source] +* 3 - wrong communicated dimension [dest] +* 4 - wrong communicated dimensions [source == dest] +* 5 - nonpositive dimension +* 6 - negative boundary width +* 7 - number of dimensions exceeds maximum value (only for MP - manual packing) +*/ + +// -------------------------------------------------------------------------- // +int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp ) + + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + Transposition *transp; +{ + int idim, nproc, iproc, ip, strd, count; + int wblka, wblkb, begb; + int ifsta, ifstb, idir, suma, sumb; + MPI_Aint lb, fsize; + MPI_Datatype oldtype, *stype, *rtype; + int *sbeg, *rbeg; + +// Setting degenerate-success cases conditions for consistency +// including: nproc=0, iproc=MPI_UNDEFINED + transp->nproc = 0; + transp->iproc = MPI_UNDEFINED; + +// +// Check input parameters +// + if (ndims < 2) { return 1; } + if (dim_source < 1 || dim_source > ndims) { return 2; } + if (dim_dest < 1 || dim_dest > ndims) { return 3; } + if (dim_source == dim_dest) { return 4; } + for (idim = 0; idim < ndims; idim++) { + if (stride[idim] <= 0) { return 5; } + } + for (idir = 0; idir < 2; idir++) { + if (overlap[idir] < 0) { return 6; } + } +// +// Define the number of processors in the group and the rank +// + if (comm == MPI_COMM_NULL) { return 0; } // empty communicator + + MPI_Comm_size(comm, &nproc); + if (nproc == 0) { return 0; } + MPI_Comm_rank(comm, &iproc); + if (iproc == MPI_UNDEFINED) { return 0; } + + suma = sumb = 0; + for (ip = 0; ip < nproc; ip++) { + suma += lblks_source[ip]; + sumb += lblks_dest[ip]; + if (lblks_source[ip] <= 0) { return 14; } + if (lblks_dest[ip] <= 0) { return 15; } + } + + if (lblks_source[iproc] > blklen[dim_source - 1]) { return 8; } + if (lblks_dest[iproc] > blklen[dim_dest - 1]) { return 9; } + if (suma > stride[dim_source - 1]) { return 10; } + if (sumb > stride[dim_dest - 1]) { return 11; } + for (idim = 0; idim < ndims; idim++) { + if (idim != dim_source - 1 && idim != dim_dest - 1) { + if (blklen[idim] > stride[idim]) { return 7; } + } + } + if (overlap[0] > lblks_dest[0]) { return 12; } + if (overlap[1] > lblks_dest[nproc - 1]) { return 13; } + + MPI_Type_get_extent(datatype, &lb, &fsize); +// +// Allocate memory +// + stype = transp->stype = (MPI_Datatype *)get_plbuf(2 * nproc * sizeof(MPI_Datatype), + &transp->buf_id[0]); + rtype = transp->rtype = &transp->stype[nproc]; + + sbeg = transp->sbeg = (int*)get_plbuf(2 * nproc * sizeof(int), + &transp->buf_id[1]); + rbeg = transp->rbeg = &transp->sbeg[nproc]; + + transp->req = (MPI_Request *)get_plbuf(2 * nproc * sizeof(MPI_Request), + &transp->buf_id[2]); + +// +// Define data types for the blocks and the beginings of the blocks +// + ifsta = ifstb = 1; + for (ip = 0; ip < nproc; ip++) { + wblka = lblks_source[iproc]; + wblkb = lblks_dest[ip]; + if (ip > 0 || period) wblkb += overlap[0]; + if (ip < nproc - 1 || period) wblkb += overlap[1]; + + if (dim_source - 1 == 0) { + count = wblka; + } + else if (dim_dest - 1 == 0) { + count = wblkb; + } + else { + count = blklen[0]; + } + MPI_Type_contiguous(count, datatype, stype + ip); + oldtype = stype[ip]; + if (dim_source - 1 == 0) { + strd = blklen[0]; + } + else { + strd = stride[0]; + } + + for (idim = 1; idim < ndims; idim++) { + if (dim_source - 1 == idim) { + count = wblka; + } + else if (dim_dest - 1 == idim) { + count = wblkb; + } + else { + count = blklen[idim]; + } + MPI_Type_create_hvector(count, 1, strd*fsize, oldtype, stype + ip); + MPI_Type_free(&oldtype); + oldtype = stype[ip]; + if (idim == dim_source - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + MPI_Type_commit(stype + ip); + + wblka = lblks_source[ip]; + wblkb = lblks_dest[iproc]; + if (iproc > 0 || period) wblkb += overlap[0]; + if (iproc < nproc - 1 || period) wblkb += overlap[1]; + + if (dim_source - 1 == 0) { + count = wblka; + } + else if (dim_dest - 1 == 0) { + count = wblkb; + } + else { + count = blklen[0]; + } + MPI_Type_contiguous(count, datatype, rtype + ip); + oldtype = rtype[ip]; + if (dim_dest - 1 == 0) { + strd = blklen[0]; + } + else { + strd = stride[0]; + } + + for (idim = 1; idim < ndims; idim++) { + if (dim_source - 1 == idim) { + count = wblka; + } + else if (dim_dest - 1 == idim) { + count = wblkb; + } + else { + count = blklen[idim]; + } + MPI_Type_create_hvector(count, 1, strd*fsize, oldtype, rtype + ip); + MPI_Type_free(&oldtype); + oldtype = rtype[ip]; + if (idim == dim_dest - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + MPI_Type_commit(rtype + ip); + + begb = ifstb; + if (ip > 0 || period) begb -= overlap[0]; + strd = 1; + for (idim = 0; idim < dim_dest - 1; idim++) { + if (idim == dim_source - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + sbeg[ip] = strd*(begb - 1); + + rbeg[ip] = 0; + if (iproc > 0 || period) rbeg[ip] -= overlap[0] * strd; + + strd = 1; + for (idim = 0; idim < dim_source - 1; idim++) { + if (idim == dim_dest - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + rbeg[ip] += strd*(ifsta - 1); + ifsta += lblks_source[ip]; + ifstb += lblks_dest[ip]; + } + transp->nproc = nproc; + transp->iproc = iproc; + transp->comm = comm; + transp->fsize = fsize; + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_start ( arr_source, arr_dest, transp ) + void *arr_source, *arr_dest; + Transposition *transp; +{ + char *arr_source_ch = (char *) arr_source; + char *arr_dest_ch = (char *) arr_dest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Start the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + transp->req[ip] = MPI_REQUEST_NULL; + transp->req[transp->nproc + ip] = MPI_REQUEST_NULL; + } + else { + MPI_Irecv(dest, 1, transp->rtype[ip], + ip, 0, transp->comm, + &transp->req[transp->nproc + ip]); + + MPI_Isend(src, 1, transp->stype[ip], + ip, 0, transp->comm, + &transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_end ( transp ) + Transposition *transp; +{ + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + MPI_Waitall(2 * transp->nproc, transp->req, MPI_STATUSES_IGNORE); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_free ( transp ) + Transposition *transp; +{ + int ip; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + for (ip = 0; ip < transp->nproc; ip++) { + MPI_Type_free(transp->rtype + ip); + MPI_Type_free(transp->stype + ip); + } + + free_plbuf(transp->stype, transp->buf_id[0]); + free_plbuf(transp->sbeg, transp->buf_id[1]); + free_plbuf(transp->req, transp->buf_id[2]); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + Transposition transp; + + int ierr; + if (ierr = P_Transpose_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, &transp) != 0) + { + return ierr; + } + P_Transpose_start(arr_source, arr_dest, &transp); + P_Transpose_end(&transp); + P_Transpose_free(&transp); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.95 - persistent exchanges // +// -------------------------------------------------------------------------- // +int PST_Transpose_init ( ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + Transposition *transp; +{ + char *arr_source_ch = (char *)arr_source; + char *arr_dest_ch = (char *)arr_dest; + + int ip; + char *src, *dest; + + int ierr = P_Transpose_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, transp); + if (ierr != 0) return ierr; + + transp->psrc = arr_source; + transp->pdest = arr_dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Setup the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + continue; + } + else { + MPI_Recv_init(dest, 1, transp->rtype[ip], + ip, 0, transp->comm, + &transp->req[transp->nproc + ip]); + + MPI_Send_init(src, 1, transp->stype[ip], + ip, 0, transp->comm, + &transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_start ( transp ) + Transposition *transp; +{ + char *arr_source_ch = (char *)transp->psrc; + char *arr_dest_ch = (char *)transp->pdest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Start the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + transp->req[ip] = MPI_REQUEST_NULL; + transp->req[transp->nproc + ip] = MPI_REQUEST_NULL; + } + else { + MPI_Start(&transp->req[transp->nproc + ip]); + MPI_Start(&transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_end ( transp ) + Transposition *transp; +{ + return P_Transpose_end(transp); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_free ( transp ) + Transposition *transp; +{ + char *arr_source_ch = (char *)transp->psrc; + char *arr_dest_ch = (char *)transp->pdest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + // we have to free persistent requests first... + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + continue; + } + else { + MPI_Request_free(&transp->req[transp->nproc + ip]); + MPI_Request_free(&transp->req[ip]); + } + } + + return P_Transpose_free(transp); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + Transposition transp; + + int ierr; + if (ierr = PST_Transpose_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, &transp) != 0) + { + return ierr; + } + PST_Transpose_start(&transp); + PST_Transpose_end(&transp); + PST_Transpose_free(&transp); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.7 - manual packing // +// -------------------------------------------------------------------------- // +int P_Transpose_mp_init ( ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp ) + + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + Transposition *transp; +{ + int idim, nproc, iproc, ip, strd, count; + int wblka, wblkb, begb; + int ifsta, ifstb, idir, suma, sumb; + MPI_Aint lb, fsize; + MPI_Datatype oldtype, *stype, *rtype; + int *sbeg, *rbeg; + + int **sdims, **rdims; + int *ssize, *rsize; + void **sbuf, **rbuf; + + int sbuf_size, rbuf_size, ssh, rsh; + +// Setting degenerate-success cases conditions for consistency +// including: nproc=0, iproc=MPI_UNDEFINED + transp->nproc = 0; + transp->iproc = MPI_UNDEFINED; + +// +// Check input parameters +// + if (ndims < 2) { return 1; } + if (dim_source < 1 || dim_source > ndims) { return 2; } + if (dim_dest < 1 || dim_dest > ndims) { return 3; } + if (dim_source == dim_dest) { return 4; } + for (idim = 0; idim < ndims; idim++) { + if (stride[idim] <= 0) { return 5; } + } + for (idir = 0; idir < 2; idir++) { + if (overlap[idir] < 0) { return 6; } + } + if (ndims > MAX_PARLIB_MP_DIMS) return 7; +// +// Define the number of processors in the group and the rank +// + if (comm == MPI_COMM_NULL) { return 0; } // empty communicator + + MPI_Comm_size(comm, &nproc); + if (nproc == 0) { return 0; } + MPI_Comm_rank(comm, &iproc); + if (iproc == MPI_UNDEFINED) { return 0; } + + suma = sumb = 0; + for (ip = 0; ip < nproc; ip++) { + suma += lblks_source[ip]; + sumb += lblks_dest[ip]; + if (lblks_source[ip] <= 0) { return 14; } + if (lblks_dest[ip] <= 0) { return 15; } + } + + if (lblks_source[iproc] > blklen[dim_source - 1]) { return 8; } + if (lblks_dest[iproc] > blklen[dim_dest - 1]) { return 9; } + if (suma > stride[dim_source - 1]) { return 10; } + if (sumb > stride[dim_dest - 1]) { return 11; } + for (idim = 0; idim < ndims; idim++) { + if (idim != dim_source - 1 && idim != dim_dest - 1) { + if (blklen[idim] > stride[idim]) { return 7; } + } + } + if (overlap[0] > lblks_dest[0]) { return 12; } + if (overlap[1] > lblks_dest[nproc - 1]) { return 13; } + + MPI_Type_get_extent(datatype, &lb, &fsize); +// +// Allocate memory +// + stype = transp->stype = (MPI_Datatype *)get_plbuf(2 * nproc * sizeof(MPI_Datatype), + &transp->buf_id[0]); + rtype = transp->rtype = &transp->stype[nproc]; + + sbeg = transp->sbeg = (int*)get_plbuf(2 * nproc * sizeof(int), + &transp->buf_id[1]); + rbeg = transp->rbeg = &transp->sbeg[nproc]; + + transp->req = (MPI_Request *)get_plbuf(2 * nproc * sizeof(MPI_Request), + &transp->buf_id[2]); + + + sdims = transp->sdims = (int**)get_plbuf(2 * nproc * sizeof(int*), + &transp->buf_id[3]); + rdims = transp->rdims = &transp->sdims[nproc]; + + ssize = transp->ssize = (int*)get_plbuf(2 * nproc * sizeof(int), + &transp->buf_id[4]); + rsize = transp->rsize = &transp->ssize[nproc]; + + sbuf = transp->sbuf = (void**)get_plbuf(2 * nproc * sizeof(void*), + &transp->buf_id[5]); + rbuf = transp->rbuf = &transp->sbuf[nproc]; + + + transp->mem_dims = (int*)get_plbuf(2 * nproc * ndims * sizeof(int), + &transp->buf_id[6]); + for (ip = 0; ip < nproc; ip++) { + sdims[ip] = transp->sdims[ip] = &transp->mem_dims[ip * ndims]; + rdims[ip] = transp->rdims[ip] = &transp->mem_dims[(nproc + ip) * ndims]; + } + +// +// Define data types for the blocks and the beginings of the blocks +// + sbuf_size = 0; + rbuf_size = 0; + + ifsta = ifstb = 1; + for (ip = 0; ip < nproc; ip++) { + wblka = lblks_source[iproc]; + wblkb = lblks_dest[ip]; + if (ip > 0 || period) wblkb += overlap[0]; + if (ip < nproc - 1 || period) wblkb += overlap[1]; + + ssize[ip] = 1; + stype[ip] = datatype; + + for (idim = 0; idim < ndims; idim++) { + if (dim_source - 1 == idim) { + count = wblka; + } + else if (dim_dest - 1 == idim) { + count = wblkb; + } + else { + count = blklen[idim]; + } + + sdims[ip][idim] = count; + ssize[ip] *= count; + } + sbuf_size += ssize[ip]; + + wblka = lblks_source[ip]; + wblkb = lblks_dest[iproc]; + if (iproc > 0 || period) wblkb += overlap[0]; + if (iproc < nproc - 1 || period) wblkb += overlap[1]; + + rsize[ip] = 1; + rtype[ip] = datatype; + + for (idim = 0; idim < ndims; idim++) { + if (dim_source - 1 == idim) { + count = wblka; + } + else if (dim_dest - 1 == idim) { + count = wblkb; + } + else { + count = blklen[idim]; + } + + rdims[ip][idim] = count; + rsize[ip] *= count; + } + rbuf_size += rsize[ip]; + + begb = ifstb; + if (ip > 0 || period) begb -= overlap[0]; + strd = 1; + for (idim = 0; idim < dim_dest - 1; idim++) { + if (idim == dim_source - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + sbeg[ip] = strd*(begb - 1); + + rbeg[ip] = 0; + if (iproc > 0 || period) rbeg[ip] -= overlap[0] * strd; + + strd = 1; + for (idim = 0; idim < dim_source - 1; idim++) { + if (idim == dim_dest - 1) { + strd *= blklen[idim]; + } + else { + strd *= stride[idim]; + } + } + rbeg[ip] += strd*(ifsta - 1); + ifsta += lblks_source[ip]; + ifstb += lblks_dest[ip]; + } + +// +// Define message buffers +// + transp->mem_sbuf = (void*)get_plbuf(sbuf_size * fsize * sizeof(char), + &transp->buf_id[7]); + transp->mem_rbuf = (void*)get_plbuf(rbuf_size * fsize * sizeof(char), + &transp->buf_id[8]); + + ssh = 0; + rsh = 0; + for (ip = 0; ip < nproc; ip++) { + sbuf[ip] = transp->sbuf[ip] = (void*)((char*)transp->mem_sbuf + ssh); + rbuf[ip] = transp->rbuf[ip] = (void*)((char*)transp->mem_rbuf + rsh); + + ssh += ssize[ip] * fsize; + rsh += rsize[ip] * fsize; + } + transp->nproc = nproc; + transp->iproc = iproc; + transp->comm = comm; + transp->fsize = fsize; + + transp->ndims = ndims; + memcpy(transp->sstride, stride, ndims * sizeof(int)); + memcpy(transp->rstride, stride, ndims * sizeof(int)); + + // modifying send-recv strides to take into account arr_src != arr_dest + for (idim = 0; idim < ndims; idim++) { + if (idim == dim_source - 1) { + transp->sstride[idim] = blklen[idim]; + } + if (idim == dim_dest - 1) { + transp->rstride[idim] = blklen[idim]; + } + } + + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_mp_start ( arr_source, arr_dest, transp ) + void *arr_source, *arr_dest; + Transposition *transp; +{ + char *arr_source_ch = (char *) arr_source; + char *arr_dest_ch = (char *) arr_dest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Start the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + transp->req[ip] = MPI_REQUEST_NULL; + transp->req[transp->nproc + ip] = MPI_REQUEST_NULL; + } + else { + MPI_Irecv(transp->rbuf[ip], transp->rsize[ip], transp->rtype[ip], + ip, 0, transp->comm, + &transp->req[transp->nproc + ip]); + + copy_to_buffer((char*)transp->sbuf[ip], src, + transp->ndims, transp->sdims[ip], transp->sstride, transp->fsize); + + MPI_Isend(transp->sbuf[ip], transp->ssize[ip], transp->stype[ip], + ip, 0, transp->comm, + &transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_mp_end ( arr_source, arr_dest, transp ) + void *arr_source, *arr_dest; + Transposition *transp; +{ + char *arr_source_ch = (char *)arr_source; + char *arr_dest_ch = (char *)arr_dest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + MPI_Waitall(transp->nproc, &transp->req[transp->nproc], MPI_STATUSES_IGNORE); + + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + continue; + } + else { + copy_from_buffer(dest, (char*)transp->rbuf[ip], + transp->ndims, transp->rdims[ip], transp->rstride, transp->fsize); + } + } + + MPI_Waitall(transp->nproc, transp->req, MPI_STATUSES_IGNORE); + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_mp_free ( transp ) + Transposition *transp; +{ + int ip; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + free_plbuf(transp->stype, transp->buf_id[0]); + free_plbuf(transp->sbeg, transp->buf_id[1]); + free_plbuf(transp->req, transp->buf_id[2]); + + + free_plbuf(transp->sdims, transp->buf_id[3]); + free_plbuf(transp->ssize, transp->buf_id[4]); + free_plbuf(transp->sbuf, transp->buf_id[5]); + + + free_plbuf(transp->mem_dims, transp->buf_id[6]); + free_plbuf(transp->mem_sbuf, transp->buf_id[7]); + free_plbuf(transp->mem_rbuf, transp->buf_id[8]); + + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + Transposition transp; + int ierr; + if (ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, &transp) != 0) + { + return ierr; + } + P_Transpose_mp_start(arr_source, arr_dest, &transp); + P_Transpose_mp_end(arr_source, arr_dest, &transp); + P_Transpose_mp_free(&transp); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.95 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +int PST_Transpose_mp_init ( ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + Transposition *transp; +{ + char *arr_source_ch = (char *)arr_source; + char *arr_dest_ch = (char *)arr_dest; + + int ip; + char *src, *dest; + + int ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, transp); + if (ierr != 0) return ierr; + + transp->psrc = arr_source; + transp->pdest = arr_dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Setup the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + continue; + } + else { + MPI_Recv_init(transp->rbuf[ip], transp->rsize[ip], transp->rtype[ip], + ip, 0, transp->comm, + &transp->req[transp->nproc + ip]); + + MPI_Send_init(transp->sbuf[ip], transp->ssize[ip], transp->stype[ip], + ip, 0, transp->comm, + &transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_mp_start ( arr_source, arr_dest, transp ) + void *arr_source, *arr_dest; + Transposition *transp; +{ + char *arr_source_ch = (char *) arr_source; + char *arr_dest_ch = (char *) arr_dest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + +// +// Start the communication +// + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + transp->req[ip] = MPI_REQUEST_NULL; + transp->req[transp->nproc + ip] = MPI_REQUEST_NULL; + } + else { + MPI_Start(&transp->req[transp->nproc + ip]); + + copy_to_buffer((char*)transp->sbuf[ip], src, + transp->ndims, transp->sdims[ip], transp->sstride, transp->fsize); + + MPI_Start(&transp->req[ip]); + } + } + return 0; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_mp_end ( arr_source, arr_dest, transp ) + void *arr_source, *arr_dest; + Transposition *transp; +{ + return P_Transpose_mp_end(arr_source, arr_dest, transp); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_mp_free ( transp ) + Transposition *transp; +{ + char *arr_source_ch = (char *)transp->psrc; + char *arr_dest_ch = (char *)transp->pdest; + + int ip; + char *src, *dest; + + if (transp->nproc == 0) { return 0; } + if (transp->iproc == MPI_UNDEFINED) { return 0; } + + // we have to free persistent requests first... + for (ip = 0; ip < transp->nproc; ip++) { + dest = arr_dest_ch + transp->rbeg[ip] * transp->fsize; + src = arr_source_ch + transp->sbeg[ip] * transp->fsize; + if ((dest == src) && (transp->iproc == ip)) { + continue; + } + else { + MPI_Request_free(&transp->req[transp->nproc + ip]); + MPI_Request_free(&transp->req[ip]); + } + } + + return P_Transpose_mp_free(transp); +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int PST_Transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; +{ + Transposition transp; + int ierr; + if (ierr = PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, &transp) != 0) + { + return ierr; + } + PST_Transpose_mp_start(arr_source, arr_dest, &transp); + PST_Transpose_mp_end(arr_source, arr_dest, &transp); + PST_Transpose_mp_free(&transp); + return 0; +} +// -------------------------------------------------------------------------- // + + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +int P_Transpose_opt_init ( ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, exch_mode ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + Transposition *transp; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_Transpose_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_Transpose_mp_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, transp); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_Transpose_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, transp); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_opt_start ( arr_source, arr_dest, transp, exch_mode ) + void *arr_source, *arr_dest; + Transposition *transp; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_Transpose_start(arr_source, arr_dest, transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_Transpose_mp_start(arr_source, arr_dest, transp); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_Transpose_start(transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_Transpose_mp_start(arr_source, arr_dest, transp); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_opt_end ( arr_source, arr_dest, transp, exch_mode ) + void *arr_source, *arr_dest; + Transposition *transp; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_Transpose_end(transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_Transpose_mp_end(arr_source, arr_dest, transp); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_Transpose_end(transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_Transpose_mp_end(arr_source, arr_dest, transp); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_opt_free ( transp, exch_mode ) + Transposition *transp; + int exch_mode; +{ + if (exch_mode == IS_MPI_TYPED) { + return P_Transpose_free(transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + return P_Transpose_mp_free(transp); + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + return PST_Transpose_free(transp); + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + return PST_Transpose_mp_free(transp); + } + + return 999; +} +// -------------------------------------------------------------------------- // + +// -------------------------------------------------------------------------- // +int P_Transpose_opt ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, exch_mode ) + + void *arr_source, *arr_dest; + int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride; + int *blklen, *overlap; + MPI_Datatype datatype; + MPI_Comm comm; + int period; + int exch_mode; +{ + Transposition transp; + int ierr; + + if (exch_mode == IS_MPI_TYPED) { + if (ierr = P_Transpose_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, &transp) != 0) + { + return ierr; + } + + P_Transpose_start(arr_source, arr_dest, &transp); + P_Transpose_end(&transp); + P_Transpose_free(&transp); + return 0; + } + if (exch_mode == IS_MPI_MANUAL_PACK) { + if (ierr = P_Transpose_mp_init(ndims, dim_source, lblks_source, + dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, &transp) != 0) + { + return ierr; + } + + P_Transpose_mp_start(arr_source, arr_dest, &transp); + P_Transpose_mp_end(arr_source, arr_dest, &transp); + P_Transpose_mp_free(&transp); + return 0; + } + if (exch_mode == IS_MPI_TYPED_PERSISTENT) { + if (ierr = PST_Transpose_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, &transp) != 0) + { + return ierr; + } + + PST_Transpose_start(&transp); + PST_Transpose_end(&transp); + PST_Transpose_free(&transp); + return 0; + } + if (exch_mode == IS_MPI_MANUAL_PACK_PERSISTENT) { + if (ierr = PST_Transpose_mp_init(ndims, arr_source, dim_source, lblks_source, + arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, + datatype, comm, period, &transp) != 0) + { + return ierr; + } + + PST_Transpose_mp_start(arr_source, arr_dest, &transp); + PST_Transpose_mp_end(arr_source, arr_dest, &transp); + PST_Transpose_mp_free(&transp); + return 0; + } + + return 999; +} +// -------------------------------------------------------------------------- // \ No newline at end of file diff --git a/ParLib.src/transposef.c b/ParLib.src/transposef.c index b45cc0bcd6be94c67d14a3a0020b4f9c86bfbf8b..dc6c42a471fa3ebb5443333de5a05eef35daa458 100644 --- a/ParLib.src/transposef.c +++ b/ParLib.src/transposef.c @@ -1,6 +1,8 @@ #include <stdlib.h> #include "parlib.h" + +// -------------------------------------------------------------------------- // #ifdef FORTRANUNDERSCORE void p_transpose_init_ ( ndims, dim_source, lblks_source, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, @@ -14,15 +16,16 @@ void p_transpose_init ( ndims, dim_source, lblks_source, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, transp, ierr ) #endif + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; - MPI_Fint **transp, *ierr; + MPI_Fint **transp, *ierr; { - *transp = (MPI_Fint *) malloc ( sizeof(Transposition) ); - *ierr = P_Transpose_init ( *ndims, *dim_source, lblks_source, - *dim_dest, lblks_dest, stride, blklen, overlap, + *transp = (MPI_Fint *)malloc(sizeof(Transposition)); + *ierr = P_Transpose_init(*ndims, *dim_source, lblks_source, + *dim_dest, lblks_dest, stride, blklen, overlap, MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, - (Transposition *) *transp ); + (Transposition *)*transp); } #ifdef FORTRANUNDERSCORE @@ -32,11 +35,12 @@ void p_transpose_start__ ( arr_source, arr_dest, transp, ierr ) #else void p_transpose_start ( arr_source, arr_dest, transp, ierr ) #endif + void *arr_source, *arr_dest; MPI_Fint **transp, *ierr; { - *ierr = P_Transpose_start ( arr_source, arr_dest, - (Transposition *) *transp ); + *ierr = P_Transpose_start(arr_source, arr_dest, + (Transposition *)*transp); } #ifdef FORTRANUNDERSCORE @@ -46,9 +50,10 @@ void p_transpose_end__ ( transp, ierr ) #else void p_transpose_end ( transp, ierr ) #endif + MPI_Fint **transp, *ierr; { - *ierr = P_Transpose_end ( (Transposition *) *transp ); + *ierr = P_Transpose_end((Transposition *)*transp); } #ifdef FORTRANUNDERSCORE @@ -58,10 +63,11 @@ void p_transpose_free__ ( transp, ierr ) #else void p_transpose_free ( transp, ierr ) #endif + MPI_Fint **transp, *ierr; { - *ierr = P_Transpose_free ( (Transposition *) *transp ); - free ( *transp ); + *ierr = P_Transpose_free((Transposition *)*transp); + free(*transp); } #ifdef FORTRANUNDERSCORE @@ -77,12 +83,514 @@ void p_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, period, ierr ) #endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint *ierr; +{ + *ierr = P_Transpose(*ndims, arr_source, *dim_source, lblks_source, + arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period); +} +// -------------------------------------------------------------------------- // + +// v.1.95 - persistent exchanges // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void pst_transpose_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#else +void pst_transpose_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint **transp, *ierr; +{ + *transp = (MPI_Fint *)malloc(sizeof(Transposition)); + *ierr = PST_Transpose_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest, + *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, + (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_start_ ( transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_start__ ( transp, ierr ) +#else +void pst_transpose_start ( transp, ierr ) +#endif + + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_start((Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_end_ ( transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_end__ ( transp, ierr ) +#else +void pst_transpose_end ( transp, ierr ) +#endif + + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_end((Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_free_ ( transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_free__ ( transp, ierr ) +#else +void pst_transpose_free ( transp, ierr ) +#endif + + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_free((Transposition *)*transp); + free(*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose__ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#else +void pst_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#endif + void *arr_source, *arr_dest; MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; MPI_Fint *ierr; { - *ierr = P_Transpose ( *ndims, arr_source, *dim_source, lblks_source, - arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, - MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period ); + *ierr = PST_Transpose(*ndims, arr_source, *dim_source, lblks_source, + arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period); +} +// -------------------------------------------------------------------------- // + +// v.1.7 - manual packing // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void p_transpose_mp_init_ ( ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_mp_init__ ( ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#else +void p_transpose_mp_init ( ndims, dim_source, lblks_source, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#endif + + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint **transp, *ierr; +{ + *transp = (MPI_Fint *)malloc(sizeof(Transposition)); + *ierr = P_Transpose_mp_init(*ndims, *dim_source, lblks_source, + *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, + (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_mp_start_ ( arr_source, arr_dest, transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_mp_start__ ( arr_source, arr_dest, transp, ierr ) +#else +void p_transpose_mp_start ( arr_source, arr_dest, transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr; +{ + *ierr = P_Transpose_mp_start(arr_source, arr_dest, + (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_mp_end_ ( arr_source, arr_dest, transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_mp_end__ ( arr_source, arr_dest, transp, ierr ) +#else +void p_transpose_mp_end ( arr_source, arr_dest, transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr; +{ + *ierr = P_Transpose_mp_end(arr_source, arr_dest, (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_mp_free_ ( transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_mp_free__ ( transp, ierr ) +#else +void p_transpose_mp_free ( transp, ierr ) +#endif + + MPI_Fint **transp, *ierr; +{ + *ierr = P_Transpose_mp_free((Transposition *)*transp); + free(*transp); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_mp_ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_mp__ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#else +void p_transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint *ierr; +{ + *ierr = P_Transpose_mp(*ndims, arr_source, *dim_source, lblks_source, + arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period); +} +// -------------------------------------------------------------------------- // + + +// v.1.95 - persistent exchanges for manual packing // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void pst_transpose_mp_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_mp_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#else +void pst_transpose_mp_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint **transp, *ierr; +{ + *transp = (MPI_Fint *)malloc(sizeof(Transposition)); + *ierr = PST_Transpose_mp_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest, + *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, + (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_mp_start_ ( arr_source, arr_dest, transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_mp_start__ ( arr_source, arr_dest, transp, ierr ) +#else +void pst_transpose_mp_start ( arr_source, arr_dest, transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_mp_start(arr_source, arr_dest, + (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_mp_end_ ( arr_source, arr_dest, transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_mp_end__ ( arr_source, arr_dest, transp, ierr ) +#else +void pst_transpose_mp_end ( arr_source, arr_dest, transp, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_mp_end(arr_source, arr_dest, (Transposition *)*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_mp_free_ ( transp, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_mp_free__ ( transp, ierr ) +#else +void pst_transpose_mp_free ( transp, ierr ) +#endif + + MPI_Fint **transp, *ierr; +{ + *ierr = PST_Transpose_mp_free((Transposition *)*transp); + free(*transp); +} + +#ifdef FORTRANUNDERSCORE +void pst_transpose_mp_ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void pst_transpose_mp__ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#else +void pst_transpose_mp ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint *ierr; +{ + *ierr = PST_Transpose_mp(*ndims, arr_source, *dim_source, lblks_source, + arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period); +} +// -------------------------------------------------------------------------- // + + +// v.1.95 - choice subroutines // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void p_transpose_opt_init_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_opt_init__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, exch_mode, ierr) +#else +void p_transpose_opt_init ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + transp, exch_mode, ierr) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint **transp, *ierr, *exch_mode; +{ + *transp = (MPI_Fint *)malloc(sizeof(Transposition)); + *ierr = P_Transpose_opt_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest, + *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, + (Transposition *)*transp, *exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_opt_start_ ( arr_source, arr_dest, transp, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_opt_start__ ( arr_source, arr_dest, transp, exch_mode, ierr ) +#else +void p_transpose_opt_start ( arr_source, arr_dest, transp, exch_mode, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr, *exch_mode; +{ + *ierr = P_Transpose_opt_start(arr_source, arr_dest, + (Transposition *)*transp, *exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_opt_end_ ( arr_source, arr_dest, transp, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_opt_end__ ( arr_source, arr_dest, transp, exch_mode, ierr ) +#else +void p_transpose_opt_end ( arr_source, arr_dest, transp, exch_mode, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint **transp, *ierr, *exch_mode; +{ + *ierr = P_Transpose_opt_end(arr_source, arr_dest, + (Transposition *)*transp, *exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_opt_free_ ( transp, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_opt_free__ ( transp, exch_mode, ierr ) +#else +void p_transpose_opt_free ( transp, exch_mode, ierr ) +#endif + + MPI_Fint **transp, *ierr, *exch_mode; +{ + *ierr = P_Transpose_opt_free((Transposition *)*transp, *exch_mode); + free(*transp); +} + +#ifdef FORTRANUNDERSCORE +void p_transpose_opt_ ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, exch_mode, ierr) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void p_transpose_opt__ (ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, exch_mode, ierr) +#else +void p_transpose_opt ( ndims, arr_source, dim_source, lblks_source, arr_dest, + dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm, + period, exch_mode, ierr) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint *ierr, *exch_mode; +{ + *ierr = P_Transpose_opt(*ndims, arr_source, *dim_source, lblks_source, + arr_dest, *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, *exch_mode); +} +// -------------------------------------------------------------------------- // + + +// v.2.0 - regular communications [removed only on correct program exit] // +// -------------------------------------------------------------------------- // +#ifdef FORTRANUNDERSCORE +void reg_transpose_( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + exch_id, exch_mode, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void reg_transpose__( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + exch_id, exch_mode, ierr) +#else +void reg_transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest, dim_dest, + lblks_dest, stride, blklen, overlap, datatype, comm, period, + exch_id, exch_mode, ierr) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ndims, *dim_source, *lblks_source, *dim_dest, *lblks_dest; + MPI_Fint *stride, *blklen, *overlap, *datatype, *comm, *period; + MPI_Fint *ierr, *exch_id, *exch_mode; +{ + Transposition *transp; + transp = (Transposition *)malloc(sizeof(Transposition)); + + *ierr = P_Transpose_opt_init(*ndims, arr_source, *dim_source, lblks_source, arr_dest, + *dim_dest, lblks_dest, stride, blklen, overlap, + MPI_Type_f2c(*datatype), MPI_Comm_f2c(*comm), *period, + transp, *exch_mode); + if ((int)*ierr != 0) return; + + *exch_id = save_transp_handle(transp, (int)*exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void start_transpose_ ( arr_source, arr_dest, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void start_transpose__ ( arr_source, arr_dest, exch_id, ierr ) +#else +void start_transpose ( arr_source, arr_dest, exch_id, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + Transposition *transp; + + get_transp_handle(&transp, &exch_mode, (int)*exch_id); + + *ierr = P_Transpose_opt_start(arr_source, arr_dest, transp, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void end_transpose_ ( arr_source, arr_dest, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void end_transpose__ ( arr_source, arr_dest, exch_id, ierr ) +#else +void end_transpose ( arr_source, arr_dest, exch_id, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + Transposition *transp; + + get_transp_handle(&transp, &exch_mode, (int)*exch_id); + + *ierr = P_Transpose_opt_end(arr_source, arr_dest, transp, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void run_transpose_ ( arr_source, arr_dest, exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void run_transpose__ ( arr_source, arr_dest, exch_id, ierr ) +#else +void run_transpose ( arr_source, arr_dest, exch_id, ierr ) +#endif + + void *arr_source, *arr_dest; + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + Transposition *transp; + + get_transp_handle(&transp, &exch_mode, (int)*exch_id); + + *ierr = P_Transpose_opt_start(arr_source, arr_dest, transp, exch_mode); + if ((int)*ierr != 0) return; + + *ierr = P_Transpose_opt_end(arr_source, arr_dest, transp, exch_mode); +} + +#ifdef FORTRANUNDERSCORE +void unreg_transpose_ ( exch_id, ierr ) +#elif defined(FORTRANDOUBLEUNDERSCORE) +void unreg_transpose__ ( exch_id, ierr ) +#else +void unreg_transpose ( exch_id, ierr ) +#endif + + MPI_Fint *ierr, *exch_id; +{ + int exch_mode; + Transposition *transp; + + get_transp_handle(&transp, &exch_mode, (int)*exch_id); + + *ierr = P_Transpose_opt_free(transp, exch_mode); + if ((int)*ierr != 0) return; + free(transp); + + remove_transp_handle((int)*exch_id); } +// -------------------------------------------------------------------------- //