/* Support for the parallel dense matrix vector multiply */ #include <../src/mat/impls/dense/mpi/mpidense.h> #include #undef __FUNCT__ #define __FUNCT__ "MatSetUpMultiply_MPIDense" PetscErrorCode MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; PetscErrorCode ierr; IS from,to; Vec gvec; PetscFunctionBegin; /* Create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,mat->cmap->N,&mdn->lvec);CHKERRQ(ierr); /* Create temporary index set for building scatter gather */ ierr = ISCreateStride(((PetscObject)mat)->comm,mat->cmap->N,0,1,&from);CHKERRQ(ierr); ierr = ISCreateStride(PETSC_COMM_SELF,mat->cmap->N,0,1,&to);CHKERRQ(ierr); /* Create temporary global vector to generate scatter context */ /* n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; */ ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mdn->nvec,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); /* Generate the scatter context */ ierr = VecScatterCreate(gvec,from,mdn->lvec,to,&mdn->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,mdn->Mvctx);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,mdn->lvec);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); ierr = PetscLogObjectParent(mat,gvec);CHKERRQ(ierr); ierr = ISDestroy(&to);CHKERRQ(ierr); ierr = ISDestroy(&from);CHKERRQ(ierr); ierr = VecDestroy(&gvec);CHKERRQ(ierr); PetscFunctionReturn(0); } extern PetscErrorCode MatGetSubMatrices_MPIDense_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*); #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_MPIDense" PetscErrorCode MatGetSubMatrices_MPIDense(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; PetscInt nmax,nstages_local,nstages,i,pos,max_no; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr); for (i=0,pos=0; idata; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; comm = ((PetscObject)C)->comm; tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; m = C->rmap->N; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irmap->range[i+1]; for (; jv + row; for (k=0; krmap->n; } } /* Now send off the data */ ierr = MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i);CHKERRQ(ierr); } } /* End Send-Recv of IS + row_numbers */ ierr = PetscFree(r_status1);CHKERRQ(ierr); ierr = PetscFree(r_waits1);CHKERRQ(ierr); ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr); if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);} ierr = PetscFree(s_status1);CHKERRQ(ierr); ierr = PetscFree(s_waits1);CHKERRQ(ierr); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; idata); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n*sizeof(PetscScalar));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; itype_name);CHKERRQ(ierr); ierr = MatSeqDenseSetPreallocation(submats[i],PETSC_NULL);CHKERRQ(ierr); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; idata; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; jrmap->n]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ ierr = PetscMalloc(ismax*sizeof(PetscInt*),&rmap);CHKERRQ(ierr); ierr = PetscMalloc(ismax*C->rmap->N*sizeof(PetscInt),&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); for (i=1; irmap->N; for (i=0; idata; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; kdata; Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; PetscScalar oalpha = alpha; PetscErrorCode ierr; PetscBLASInt one = 1,nz; PetscFunctionBegin; ierr = PetscBLASIntCast(inA->rmap->n*inA->cmap->N,&nz);CHKERRQ(ierr); BLASscal_(&nz,&oalpha,a->v,&one); ierr = PetscLogFlops(nz);CHKERRQ(ierr); PetscFunctionReturn(0); }