Select Git revision
transpose.c
Forked from
Debolskiy Andrey / parlib
Source project has a limited visibility.
transpose.c 7.48 KiB
#include <stdlib.h>
#include "parlib.h"
int P_Transpose_init ( ndims, dim_source, lblks_source, dim_dest,
lblks_dest, stride, blklen, overlap, datatype, comm, period,
transp )
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
Transposition *transp;
{
int idim, nproc, iproc, ip, strd, count;
int wblka, wblkb, begb;
int ifsta, ifstb, idir, suma, sumb;
MPI_Aint fsize;
MPI_Datatype oldtype, *stype, *rtype;
int *sbeg, *rbeg;
/*
* Check input parameters
*/
if ( ndims < 2) { return 1; }
if ( dim_source < 1 || dim_source > ndims ) { return 2; }
if ( dim_dest < 1 || dim_dest > ndims ) { return 3; }
if ( dim_source == dim_dest ) { return 4; }
for ( idim = 0; idim < ndims; idim++ ) {
if ( stride[idim] <= 0) { return 5; }
}
for (idir = 0; idir < 2; idir++ ) {
if ( overlap[idir] < 0 ) { return 6; }
}
/*
* Define the number of processors in the group and the rank
*/
MPI_Comm_size ( comm, &nproc );
if ( nproc == 0 ) { return 0; }
MPI_Comm_rank ( comm, &iproc );
if ( iproc == MPI_UNDEFINED ) { return 0; }
suma = sumb = 0;
for ( ip = 0; ip < nproc; ip++ ) {
suma += lblks_source[ip];
sumb += lblks_dest[ip];
if ( lblks_source[ip] <= 0 ) { return 14; }
if ( lblks_dest[ip] <= 0 ) { return 15; }
}
if ( lblks_source[iproc] > blklen[dim_source-1] ) { return 8; }
if ( lblks_dest[iproc] > blklen[dim_dest-1] ) { return 9; }
if ( suma > stride[dim_source-1] ) { return 10; }
if ( sumb > stride[dim_dest-1] ) { return 11; }
for ( idim = 0; idim < ndims; idim++ ) {
if ( idim != dim_source-1 && idim != dim_dest-1 ) {
if ( blklen[idim] > stride[idim] ) { return 7; }
}
}
if ( overlap[0] > lblks_dest[0] ) { return 12; }
if ( overlap[1] > lblks_dest[nproc-1] ) { return 13; }
MPI_Type_extent ( datatype, &fsize );
/*
* Allocate memory
*/
stype = transp->stype =
(MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc );
rtype = transp->rtype =
(MPI_Datatype *) malloc ( sizeof(MPI_Datatype)*nproc );
sbeg = transp->sbeg = (int *) malloc ( sizeof(int)*nproc );
rbeg = transp->rbeg = (int *) malloc ( sizeof(int)*nproc ); /*
* Define data types for the blocks and the beginings of the blocks
*/
ifsta = ifstb = 1;
for ( ip = 0; ip < nproc; ip++ ) {
wblka = lblks_source[iproc];
wblkb = lblks_dest[ip];
if ( ip > 0 || period ) wblkb += overlap[0];
if ( ip < nproc-1 || period ) wblkb += overlap[1];
oldtype = datatype;
strd = 1;
for ( idim = 0; idim < ndims; idim++ ) {
if ( idim == dim_source-1 ) {
count = wblka;
} else if ( idim == dim_dest-1 ) {
count = wblkb;
} else {
count = blklen[idim];
}
MPI_Type_hvector ( count, 1, strd*fsize, oldtype, stype+ip );
if ( idim > 0 ) { MPI_Type_free ( &oldtype ); }
oldtype = stype[ip];
if ( idim == dim_source-1 ) {
strd *= blklen[idim];
} else {
strd *= stride[idim];
}
}
MPI_Type_commit ( stype+ip );
wblka = lblks_source[ip];
wblkb = lblks_dest[iproc];
if ( iproc > 0 || period ) wblkb += overlap[0];
if ( iproc < nproc-1 || period ) wblkb += overlap[1];
oldtype = datatype;
strd = 1;
for ( idim = 0; idim < ndims; idim++ ) {
if ( idim == dim_source-1 ) {
count = wblka;
} else if ( idim == dim_dest-1 ) {
count = wblkb;
} else {
count = blklen[idim];
}
MPI_Type_hvector ( count, 1, strd*fsize, oldtype, rtype+ip );
if ( idim > 0 ) { MPI_Type_free ( &oldtype ); }
oldtype = rtype[ip];
if ( idim == dim_dest-1 ) {
strd *= blklen[idim];
} else {
strd *= stride[idim];
}
}
MPI_Type_commit ( rtype+ip );
begb = ifstb;
if ( ip > 0 || period ) begb -= overlap[0];
strd = 1;
for ( idim = 0; idim < dim_dest-1; idim++ ) {
if ( idim == dim_source-1 ) {
strd *= blklen[idim];
} else {
strd *= stride[idim];
}
}
sbeg[ip] = strd*(begb-1);
rbeg[ip] = 0;
if ( iproc > 0 || period ) rbeg[ip] -= overlap[0]*strd;
strd = 1;
for ( idim = 0; idim < dim_source-1; idim++ ) {
if ( idim == dim_dest-1 ) {
strd *= blklen[idim];
} else {
strd *= stride[idim];
}
}
rbeg[ip] += strd*(ifsta-1);
ifsta += lblks_source[ip];
ifstb += lblks_dest[ip];
}
transp->nproc = nproc;
transp->iproc = iproc;
transp->comm = comm;
transp->fsize = fsize;
return 0;
}
int P_Transpose_start ( arr_source, arr_dest, transp )
void *arr_source, *arr_dest;
Transposition *transp;
{
char *arr_source_ch = (char *) arr_source;
char *arr_dest_ch = (char *) arr_dest;
int nproc = transp->nproc;
int iproc = transp->iproc;
MPI_Aint fsize = transp->fsize;
int *sbeg = transp->sbeg;
int *rbeg = transp->rbeg;
MPI_Datatype *stype = transp->stype;
MPI_Datatype *rtype = transp->rtype;
MPI_Comm comm = transp->comm;
MPI_Request *sreq, *rreq;
int ip;
char *src, *dest;
if ( nproc == 0 ) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; }
/*
* Allocate memory
*/
sreq = transp->sreq =
(MPI_Request *) malloc ( sizeof(MPI_Request)*nproc );
rreq = transp->rreq =
(MPI_Request *) malloc ( sizeof(MPI_Request)*nproc );
/*
* Start the communication
*/
for ( ip = 0; ip < nproc; ip++ ) {
dest = arr_dest_ch+rbeg[ip]*fsize;
src = arr_source_ch+sbeg[ip]*fsize;
if ( dest == src && iproc == ip ) {
rreq[ip] = sreq[ip] = MPI_REQUEST_NULL;
} else {
MPI_Irecv ( dest, 1, rtype[ip], ip, 0, comm, rreq+ip );
MPI_Isend ( src, 1, stype[ip], ip, 0, comm, sreq+ip );
}
}
return 0;
}
int P_Transpose_end ( transp )
Transposition *transp;
{
int nproc = transp->nproc;
int iproc = transp->iproc;
MPI_Request *sreq = transp->sreq;
MPI_Request *rreq = transp->rreq; int ip;
MPI_Status status;
if ( nproc == 0 ) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; }
for ( ip = 0; ip < nproc; ip++ ) {
MPI_Wait ( rreq+ip, &status );
MPI_Wait ( sreq+ip, &status );
}
return 0;
}
int P_Transpose_free ( transp )
Transposition *transp;
{
int nproc = transp->nproc;
int iproc = transp->iproc;
int *sbeg = transp->sbeg;
int *rbeg = transp->rbeg;
MPI_Datatype *stype = transp->stype;
MPI_Datatype *rtype = transp->rtype;
MPI_Request *sreq = transp->sreq;
MPI_Request *rreq = transp->rreq;
int ip;
if ( nproc == 0 ) { return 0; }
if ( iproc == MPI_UNDEFINED ) { return 0; }
for ( ip = 0; ip < nproc; ip++ ) {
MPI_Type_free ( rtype+ip );
MPI_Type_free ( stype+ip );
}
free ( stype );
free ( rtype );
free ( sbeg );
free ( rbeg );
free ( sreq );
free ( rreq );
return 0;
}
int P_Transpose ( ndims, arr_source, dim_source, lblks_source, arr_dest,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period )
void *arr_source, *arr_dest;
int ndims, dim_source, *lblks_source, dim_dest, *lblks_dest, *stride;
int *blklen, *overlap;
MPI_Datatype datatype;
MPI_Comm comm;
int period;
{
Transposition transp;
int ierr;
if ( ierr = P_Transpose_init ( ndims, dim_source, lblks_source,
dim_dest, lblks_dest, stride, blklen, overlap, datatype, comm,
period, &transp ) != 0 )
{
return ierr;
}
P_Transpose_start ( arr_source, arr_dest, &transp );
P_Transpose_end ( &transp );
P_Transpose_free ( &transp );
return 0;
}