/* Support for the parallel dense matrix vector multiply */ #include "src/mat/impls/dense/mpi/mpidense.h" #undef __FUNCT__ #define __FUNCT__ "MatSetUpMultiply_MPIDense" PetscErrorCode MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; PetscErrorCode ierr; IS from,to; Vec gvec; PetscFunctionBegin; /* Create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,mat->N,&mdn->lvec);CHKERRQ(ierr); /* Create temporary index set for building scatter gather */ ierr = ISCreateStride(mat->comm,mat->N,0,1,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,mat->N,0,1,&to);CHKERRQ(ierr); /* Create temporary global vector to generate scatter context */ /* n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; */ ierr = VecCreateMPI(mat->comm,mdn->nvec,mat->N,&gvec);CHKERRQ(ierr); /* Generate the scatter context */ ierr = VecScatterCreate(gvec,from,mdn->lvec,to,&mdn->Mvctx);CHKERRQ(ierr); PetscLogObjectParent(mat,mdn->Mvctx); PetscLogObjectParent(mat,mdn->lvec); PetscLogObjectParent(mat,from); PetscLogObjectParent(mat,to); PetscLogObjectParent(mat,gvec); ierr = ISDestroy(to);CHKERRQ(ierr); ierr = ISDestroy(from);CHKERRQ(ierr); ierr = VecDestroy(gvec);CHKERRQ(ierr); PetscFunctionReturn(0); } EXTERN PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat,int,const IS[],const IS[],MatReuse,Mat*); #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_MPIDense" PetscErrorCode MatGetSubMatrices_MPIDense(Mat C,int ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; int nmax,nstages_local,nstages,i,pos,max_no; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (C->N * sizeof(int)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr); for (i=0,pos=0; idata; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; int N = C->N,rstart = c->rstart,count; int **irow,**icol,*nrow,*ncol,*w1,*w3,*w4,*rtable,start,end,size; int **sbuf1,rank,m,i,j,k,l,ct1,**rbuf1,row,proc; int nrqs,msz,**ptr,idex,*ctr,*pa,*tmp,bsz,nrqr; int is_no,jmax,*irow_i,**rmap,*rmap_i; int len,ctr_j,*sbuf1_j,*rbuf1_i; int tag0,tag1; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscTruth sorted; PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; m = C->M; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irowners[i+1]; for (; jv + row; for (k=0; km; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr); ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; idata); if ((submats[i]->m != nrow[i]) || (submats[i]->n != ncol[i])) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); } ierr = PetscMemzero(mat->v,submats[i]->m*submats[i]->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factor = C->factor; } } else { for (i=0; itype_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],PETSC_NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { int col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; idata; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; jm]; } } } } } /* Create row map. This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ len = (1+ismax)*sizeof(int*)+ ismax*C->M*sizeof(int); ierr = PetscMalloc(len,&rmap);CHKERRQ(ierr); rmap[0] = (int*)(rmap + ismax); ierr = PetscMemzero(rmap[0],ismax*C->M*sizeof(int));CHKERRQ(ierr); for (i=1; iM;} for (i=0; idata; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; k