/* Support for the parallel dense matrix vector multiply */ #include <../src/mat/impls/dense/mpi/mpidense.h> #include PetscErrorCode MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; PetscFunctionBegin; /* Create local vector that is used to scatter into */ PetscCall(VecDestroy(&mdn->lvec)); if (mdn->A) { PetscCall(MatCreateVecs(mdn->A,&mdn->lvec,NULL)); PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mdn->lvec)); } if (!mdn->Mvctx) { PetscCall(PetscLayoutSetUp(mat->cmap)); PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)mat),&mdn->Mvctx)); PetscCall(PetscSFSetGraphWithPattern(mdn->Mvctx,mat->cmap,PETSCSF_PATTERN_ALLGATHER)); PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mdn->Mvctx)); } PetscFunctionReturn(0); } static PetscErrorCode MatCreateSubMatrices_MPIDense_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*); PetscErrorCode MatCreateSubMatrices_MPIDense(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscInt nmax,nstages_local,nstages,i,pos,max_no; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { PetscCall(PetscCalloc1(ismax+1,submat)); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ PetscCall(MPIU_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C))); for (i=0,pos=0; idata; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data,*mat; PetscMPIInt rank,size,tag0,tag1,idex,end,i; PetscInt N = C->cmap->N,rstart = C->rmap->rstart,count; const PetscInt **irow,**icol,*irow_i; PetscInt *nrow,*ncol,*w1,*w3,*w4,*rtable,start; PetscInt **sbuf1,m,j,k,l,ct1,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*ctr,*pa,*tmp,bsz,nrqr; PetscInt is_no,jmax,**rmap,*rmap_i; PetscInt ctr_j,*sbuf1_j,*rbuf1_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *r_status1,*r_status2,*s_status1,*s_status2; MPI_Comm comm; PetscScalar **rbuf2,**sbuf2; PetscBool sorted; PetscFunctionBegin; PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); tag0 = ((PetscObject)C)->tag; PetscCallMPI(MPI_Comm_rank(comm,&rank)); PetscCallMPI(MPI_Comm_size(comm,&size)); m = C->rmap->N; /* Get some new tags to keep the communication clean */ PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag1)); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irmap->range[i+1]; for (; jv + row; for (k=0; klda; } } /* Now send off the data */ PetscCallMPI(MPI_Isend(sbuf2[idex],(end-start)*N,MPIU_SCALAR,s_proc,tag1,comm,s_waits2+i)); } } /* End Send-Recv of IS + row_numbers */ PetscCall(PetscFree(r_status1)); PetscCall(PetscFree(r_waits1)); PetscCall(PetscMalloc1(nrqs+1,&s_status1)); if (nrqs) PetscCallMPI(MPI_Waitall(nrqs,s_waits1,s_status1)); PetscCall(PetscFree(s_status1)); PetscCall(PetscFree(s_waits1)); /* Create the submatrices */ if (scall == MAT_REUSE_MATRIX) { for (i=0; idata); PetscCheckFalse((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i]),PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); PetscCall(PetscArrayzero(mat->v,submats[i]->rmap->n*submats[i]->cmap->n)); submats[i]->factortype = C->factortype; } } else { for (i=0; itype_name)); PetscCall(MatSeqDenseSetPreallocation(submats[i],NULL)); } } /* Assemble the matrices */ { PetscInt col; PetscScalar *imat_v,*mat_v,*imat_vi,*mat_vi; for (i=0; idata; mat_v = a->v; imat_v = mat->v; irow_i = irow[i]; m = nrow[i]; for (j=0; jlda]; } } } } } /* Create row map-> This maps c->row to submat->row for each submat*/ /* this is a very expensive operation wrt memory usage */ PetscCall(PetscMalloc1(ismax,&rmap)); PetscCall(PetscCalloc1(ismax*C->rmap->N,&rmap[0])); for (i=1; irmap->N; for (i=0; idata; imat_v = mat->v; rmap_i = rmap[is_no]; m = nrow[is_no]; for (k=0; kdata; PetscFunctionBegin; PetscCall(MatScale(A->A,alpha)); PetscFunctionReturn(0); }