/*$Id: mmdense.c,v 1.28 2000/04/09 04:35:58 bsmith Exp bsmith $*/
/*
Support for the parallel dense matrix vector multiply
*/
#include "src/mat/impls/dense/mpi/mpidense.h"
#include "src/vec/vecimpl.h"
#undef __FUNC__
#define __FUNC__ /**/"MatSetUpMultiply_MPIDense"
int MatSetUpMultiply_MPIDense(Mat mat)
{
Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
int ierr;
IS tofrom;
Vec gvec;
PetscFunctionBegin;
/* Create local vector that is used to scatter into */
ierr = VecCreateSeq(PETSC_COMM_SELF,mdn->N,&mdn->lvec);CHKERRQ(ierr);
/* Create temporary index set for building scatter gather */
ierr = ISCreateStride(PETSC_COMM_SELF,mdn->N,0,1,&tofrom);CHKERRQ(ierr);
/* Create temporary global vector to generate scatter context */
/* n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; */
ierr = VecCreateMPI(mat->comm,mdn->nvec,mdn->N,&gvec);CHKERRQ(ierr);
/* Generate the scatter context */
ierr = VecScatterCreate(gvec,tofrom,mdn->lvec,tofrom,&mdn->Mvctx);CHKERRQ(ierr);
PLogObjectParent(mat,mdn->Mvctx);
PLogObjectParent(mat,mdn->lvec);
PLogObjectParent(mat,tofrom);
PLogObjectParent(mat,gvec);
ierr = ISDestroy(tofrom);CHKERRQ(ierr);
ierr = VecDestroy(gvec);CHKERRQ(ierr);
PetscFunctionReturn(0);
}
extern int MatGetSubMatrices_MPIDense_Local(Mat,int,IS*,IS*,MatReuse,Mat*);
#undef __FUNC__
#define __FUNC__ /**/"MatGetSubMatrices_MPIDense"
int MatGetSubMatrices_MPIDense(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat **submat)
{
Mat_MPIDense *c = (Mat_MPIDense*)C->data;
int nmax,nstages_local,nstages,i,pos,max_no,ierr;
PetscFunctionBegin;
/* Allocate memory to hold all the submatrices */
if (scall != MAT_REUSE_MATRIX) {
*submat = (Mat *)PetscMalloc((ismax+1)*sizeof(Mat));CHKPTRQ(*submat);
}
/* Determine the number of stages through which submatrices are done */
nmax = 20*1000000 / (c->N * sizeof(int));
if (!nmax) nmax = 1;
nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
/* Make sure every processor loops through the nstages */
ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr);
for (i=0,pos=0; i*/"MatGetSubMatrices_MPIDense_Local"
int MatGetSubMatrices_MPIDense_Local(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat *submats)
{
Mat_MPIDense *c = (Mat_MPIDense*)C->data;
Mat A = c->A;
Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat;
int N = c->N,rstart = c->rstart,count;
int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size;
int **sbuf1,rank,m,i,j,k,l,ct1,ierr,**rbuf1,row,proc;
int nrqs,msz,**ptr,index,*ctr,*pa,*tmp,bsz,nrqr;
int is_no,jmax,*irow_i,**rmap,*rmap_i;
int len,ctr_j,*sbuf1_j,*rbuf1_i;
int tag0,tag1;
MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
MPI_Status *r_status1,*r_status2,*s_status1,*s_status2;
MPI_Comm comm;
Scalar **rbuf2,**sbuf2;
PetscFunctionBegin;
comm = C->comm;
tag0 = C->tag;
size = c->size;
rank = c->rank;
m = c->M;
/* Get some new tags to keep the communication clean */
ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
/* Check if the col indices are sorted */
for (i=0; i proc*/
for (i=0,j=0; irowners[i+1];
for (; jv + row;
for (k=0; km;
}
}
/* Now send off the data */
ierr = MPI_Isend(sbuf2[index],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr);
}
}
/* End Send-Recv of IS + row_numbers */
ierr = PetscFree(r_status1);CHKERRQ(ierr);
ierr = PetscFree(r_waits1);CHKERRQ(ierr);
s_status1 = (MPI_Status*)PetscMalloc((nrqs+1)*sizeof(MPI_Status));CHKPTRQ(s_status1);
ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);
ierr = PetscFree(s_status1);CHKERRQ(ierr);
ierr = PetscFree(s_waits1);CHKERRQ(ierr);
/* Create the submatrices */
if (scall == MAT_REUSE_MATRIX) {
for (i=0; idata);
if ((mat->m != nrow[i]) || (mat->n != ncol[i])) {
SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong size");
}
ierr = PetscMemzero(mat->v,mat->m*mat->n*sizeof(Scalar));CHKERRQ(ierr);
submats[i]->factor = C->factor;
}
} else {
for (i=0; idata;
mat_v = a->v;
imat_v = mat->v;
irow_i = irow[i];
m = nrow[i];
for (j=0; jm];
}
}
}
}
}
/* Create row map. This maps c->row to submat->row for each submat*/
/* this is a very expensive operation wrt memory usage */
len = (1+ismax)*sizeof(int*)+ ismax*c->M*sizeof(int);
rmap = (int **)PetscMalloc(len);CHKPTRQ(rmap);
rmap[0] = (int *)(rmap + ismax);
ierr = PetscMemzero(rmap[0],ismax*c->M*sizeof(int));CHKERRQ(ierr);
for (i=1; iM;}
for (i=0; idata;
imat_v = mat->v;
rmap_i = rmap[is_no];
m = nrow[is_no];
for (k=0; k