/*$Id: mmdense.c,v 1.28 2000/04/09 04:35:58 bsmith Exp bsmith $*/ /* Support for the parallel dense matrix vector multiply */ #include "src/mat/impls/dense/mpi/mpidense.h" #include "src/vec/vecimpl.h" #undef __FUNC__ #define __FUNC__ /**/"MatSetUpMultiply_MPIDense" int MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; int ierr; IS tofrom; Vec gvec; PetscFunctionBegin; /* Create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,mdn->N,&mdn->lvec);CHKERRQ(ierr); /* Create temporary index set for building scatter gather */ ierr = ISCreateStride(PETSC_COMM_SELF,mdn->N,0,1,&tofrom);CHKERRQ(ierr); /* Create temporary global vector to generate scatter context */ /* n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; */ ierr = VecCreateMPI(mat->comm,mdn->nvec,mdn->N,&gvec);CHKERRQ(ierr); /* Generate the scatter context */ ierr = VecScatterCreate(gvec,tofrom,mdn->lvec,tofrom,&mdn->Mvctx);CHKERRQ(ierr); PLogObjectParent(mat,mdn->Mvctx); PLogObjectParent(mat,mdn->lvec); PLogObjectParent(mat,tofrom); PLogObjectParent(mat,gvec); ierr = ISDestroy(tofrom);CHKERRQ(ierr); ierr = VecDestroy(gvec);CHKERRQ(ierr); PetscFunctionReturn(0); } extern int MatGetSubMatrices_MPIDense_Local(Mat,int,IS*,IS*,MatReuse,Mat*); #undef __FUNC__ #define __FUNC__ /**/"MatGetSubMatrices_MPIDense" int MatGetSubMatrices_MPIDense(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat **submat) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; int nmax,nstages_local,nstages,i,pos,max_no,ierr; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { *submat = (Mat *)PetscMalloc((ismax+1)*sizeof(Mat));CHKPTRQ(*submat); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (c->N * sizeof(int)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr); for (i=0,pos=0; i*/"MatGetSubMatrices_MPIDense_Local" int MatGetSubMatrices_MPIDense_Local(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat *submats) { Mat_MPIDense *c = (Mat_MPIDense*)C->data; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; int N = c->N,rstart = c->rstart,count; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size; int **sbuf1,rank,m,i,j,k,l,ct1,ierr,**rbuf1,row,proc; int nrqs,msz,**ptr,index,*ctr,*pa,*tmp,bsz,nrqr; int is_no,jmax,*irow_i,**rmap,*rmap_i; int len,ctr_j,*sbuf1_j,*rbuf1_i; int tag0,tag1; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; Scalar **rbuf2,**sbuf2; PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; m = c->M; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irowners[i+1]; for (; jv + row; for (k=0; km; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[index],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); s_status1 = (MPI_Status*)PetscMalloc((nrqs+1)*sizeof(MPI_Status));CHKPTRQ(s_status1); ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr); ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; idata); if ((mat->m != nrow[i]) || (mat->n != ncol[i])) { SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong size"); } ierr = PetscMemzero(mat->v,mat->m*mat->n*sizeof(Scalar));CHKERRQ(ierr); submats[i]->factor = C->factor; } } else { for (i=0; idata; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; jm]; } } } } } /* Create row map. This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ len = (1+ismax)*sizeof(int*)+ ismax*c->M*sizeof(int); rmap = (int **)PetscMalloc(len);CHKPTRQ(rmap); rmap[0] = (int *)(rmap + ismax); ierr = PetscMemzero(rmap[0],ismax*c->M*sizeof(int));CHKERRQ(ierr); for (i=1; iM;} for (i=0; idata; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k