/* Routines to compute overlapping regions of a parallel MPI matrix and to find submatrices that were shared across processors. */ #include <../src/mat/impls/aij/mpi/mpiaij.h> #include static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); #undef __FUNCT__ #define __FUNCT__ "MatIncreaseOverlap_MPIAIJ" PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); for (i=0; i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [3] = 5 => is[5] mesg [4] = sizeof(is[5]); ----------- mesg [5] mesg [n] datas[1] ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests recieved (which have to be or which have been processed */ #undef __FUNCT__ #define __FUNCT__ "MatIncreaseOverlap_MPIAIJ_Once" static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[]) { Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; PetscMPIInt *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2; const PetscInt **idx,*idx_i; PetscInt *n,**data,len; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2; PetscInt M,i,j,k,**rbuf,row,proc = 0,nrqs,msz,**outdat,**ptr; PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p; PetscBT *table; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *s_status,*recv_status; char *t_p; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; M = C->rmap->N; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscMalloc2(imax,&idx,imax,&n);CHKERRQ(ierr); for (i=0; irmap,row,&proc);CHKERRQ(ierr); w4[proc]++; } for (j=0; jrmap,row,&proc);CHKERRQ(ierr); if (proc != rank) { /* copy to the outgoing buffer */ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } else if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; /* Update the local table */ } /* Update the headers for the current IS */ for (j=0; jdata; Mat A = c->A,B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; PetscInt start,end,val,max,rstart,cstart,*ai,*aj; PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; PetscBT table_i; PetscFunctionBegin; rstart = C->rmap->rstart; cstart = C->cmap->rstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0; idata; Mat A = c->A,B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; PetscErrorCode ierr; PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; PetscInt val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr; PetscInt *rbuf_i,kmax,rbuf_0; PetscBT xtable; PetscFunctionBegin; m = C->rmap->N; rstart = C->rmap->rstart; cstart = C->cmap->rstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0,ct=0,total_sz=0; irmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n; else max1 = 1; mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); ierr = PetscMalloc1(mem_estimate,&xdata[0]);CHKERRQ(ierr); ++no_malloc; ierr = PetscBTCreate(m,&xtable);CHKERRQ(ierr); ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); ct3 = 0; for (i=0; idata; Mat_SeqAIJ *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data; PetscErrorCode ierr; PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j; PetscInt m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; MatScalar *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); if (scall == MAT_INITIAL_MATRIX) { /* ---------------------------------------------------------------- Tell every processor the number of nonzeros per row */ ierr = PetscMalloc1(A->rmap->N,&lens);CHKERRQ(ierr); for (i=A->rmap->rstart; irmap->rend; i++) { lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart]; } sendcount = A->rmap->rend - A->rmap->rstart; ierr = PetscMalloc2(size,&recvcounts,size,&displs);CHKERRQ(ierr); for (i=0; irmap->range[i+1] - A->rmap->range[i]; displs[i] = A->rmap->range[i]; } #if defined(PETSC_HAVE_MPI_IN_PLACE) ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else ierr = MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif /* --------------------------------------------------------------- Create the sequential matrix of the same type as the local block diagonal */ ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); ierr = MatSetType(B,((PetscObject)a->A)->type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); ierr = PetscMalloc1(1,Bin);CHKERRQ(ierr); **Bin = B; b = (Mat_SeqAIJ*)B->data; /*-------------------------------------------------------------------- Copy my part of matrix column indices over */ sendcount = ad->nz + bd->nz; jsendbuf = b->j + b->i[rstarts[rank]]; a_jsendbuf = ad->j; b_jsendbuf = bd->j; n = A->rmap->rend - A->rmap->rstart; cnt = 0; for (i=0; ii[i+1] - bd->i[i]; while (m > 0) { /* is it above diagonal (in bd (compressed) numbering) */ if (garray[*b_jsendbuf] > A->rmap->rstart + i) break; jsendbuf[cnt++] = garray[*b_jsendbuf++]; m--; } /* put in diagonal portion */ for (j=ad->i[i]; ji[i+1]; j++) { jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++; } /* put in upper diagonal portion */ while (m-- > 0) { jsendbuf[cnt++] = garray[*b_jsendbuf++]; } } if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); /*-------------------------------------------------------------------- Gather all column indices to all processors */ for (i=0; irmap->range[i]; jrmap->range[i+1]; j++) { recvcounts[i] += lens[j]; } } displs[0] = 0; for (i=1; ij,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif /*-------------------------------------------------------------------- Assemble the matrix into useable form (note numerical values not yet set) */ /* set the b->ilen (length of each row) values */ ierr = PetscMemcpy(b->ilen,lens,A->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); /* set the b->i indices */ b->i[0] = 0; for (i=1; i<=A->rmap->N; i++) { b->i[i] = b->i[i-1] + lens[i-1]; } ierr = PetscFree(lens);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } else { B = **Bin; b = (Mat_SeqAIJ*)B->data; } /*-------------------------------------------------------------------- Copy my part of matrix numerical values into the values location */ if (flag == MAT_GET_VALUES) { sendcount = ad->nz + bd->nz; sendbuf = b->a + b->i[rstarts[rank]]; a_sendbuf = ad->a; b_sendbuf = bd->a; b_sendj = bd->j; n = A->rmap->rend - A->rmap->rstart; cnt = 0; for (i=0; ii[i+1] - bd->i[i]; while (m > 0) { /* is it above diagonal (in bd (compressed) numbering) */ if (garray[*b_sendj] > A->rmap->rstart + i) break; sendbuf[cnt++] = *b_sendbuf++; m--; b_sendj++; } /* put in diagonal portion */ for (j=ad->i[i]; ji[i+1]; j++) { sendbuf[cnt++] = *a_sendbuf++; } /* put in upper diagonal portion */ while (m-- > 0) { sendbuf[cnt++] = *b_sendbuf++; b_sendj++; } } if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); /* ----------------------------------------------------------------- Gather all numerical values to all processors */ if (!recvcounts) { ierr = PetscMalloc2(size,&recvcounts,size,&displs);CHKERRQ(ierr); } for (i=0; ii[rstarts[i+1]] - b->i[rstarts[i]]; } displs[0] = 0; for (i=1; ia; #if defined(PETSC_HAVE_MPI_IN_PLACE) ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else ierr = MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif } /* endof (flag == MAT_GET_VALUES) */ ierr = PetscFree2(recvcounts,displs);CHKERRQ(ierr); if (A->symmetric) { ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else if (A->hermitian) { ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); } else if (A->structurally_symmetric) { ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatrices_MPIAIJ" PetscErrorCode MatGetSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; PetscInt nmax,nstages_local,nstages,i,pos,max_no,nrow,ncol; PetscBool rowflag,colflag,wantallmatrix=PETSC_FALSE,twantallmatrix,*allcolumns; PetscFunctionBegin; /* Check for special case: each processor gets entire matrix */ if (ismax == 1 && C->rmap->N == C->cmap->N) { ierr = ISIdentity(*isrow,&rowflag);CHKERRQ(ierr); ierr = ISIdentity(*iscol,&colflag);CHKERRQ(ierr); ierr = ISGetLocalSize(*isrow,&nrow);CHKERRQ(ierr); ierr = ISGetLocalSize(*iscol,&ncol);CHKERRQ(ierr); if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) { wantallmatrix = PETSC_TRUE; ierr = PetscOptionsGetBool(((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);CHKERRQ(ierr); } } ierr = MPI_Allreduce(&wantallmatrix,&twantallmatrix,1,MPIU_BOOL,MPI_MIN,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); if (twantallmatrix) { ierr = MatGetSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc1(ismax+1,submat);CHKERRQ(ierr); } /* Check for special case: each processor gets entire matrix columns */ ierr = PetscMalloc1(ismax+1,&allcolumns);CHKERRQ(ierr); for (i=0; icmap->N) { allcolumns[i] = PETSC_TRUE; } else { allcolumns[i] = PETSC_FALSE; } } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt)); /* Each stage will extract nmax submatrices. nmax is determined by the matrix column dimension. If the original matrix has 20M columns, only one submatrix per stage is allowed, etc. */ if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); for (i=0,pos=0; idata; Mat A = c->A; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*mat; const PetscInt **icol,**irow; PetscInt *nrow,*ncol,start; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag1,tag2,tag3,*w1,*w2,*w3,*w4,nrqr; PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc; PetscInt nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol; PetscInt **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; #if defined(PETSC_USE_CTABLE) PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i; #else PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i; #endif const PetscInt *irow_i; PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; PetscScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i; PetscMPIInt *onodes1,*olengths1; PetscMPIInt idex,idex2,end; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); tag0 = ((PetscObject)C)->tag; size = c->size; rank = c->rank; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); ierr = PetscMalloc4(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol);CHKERRQ(ierr); for (i=0; icmap->N; } else { ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr); } } /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscMalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); /* mesg size */ ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr); /* initialize work vector*/ ierr = PetscMemzero(w3,size*sizeof(PetscMPIInt));CHKERRQ(ierr); /* initialize work vector*/ for (i=0; i= C->rmap->range[l+1]) l++; proc = l; w4[proc]++; } for (j=0; j= C->rmap->range[l+1]) l++; proc = l; if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; jA->data,*sB = (Mat_SeqAIJ*)c->B->data; PetscInt *sAi = sA->i,*sBi = sB->i,id,rstart = C->rmap->rstart; PetscInt *sbuf2_i; for (i=0; ij, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; ii,*b_i = b->i,lwrite; PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray; PetscInt cend = C->cmap->rend; PetscInt *a_j = a->j,*b_j = b->j,ctmp; for (i=0; i= cend) cols[lwrite++] = ctmp; } ct2 += ncols; } } ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr); } } ierr = PetscMalloc1(nrqs+1,&r_status3);CHKERRQ(ierr); ierr = PetscMalloc1(nrqr+1,&s_status3);CHKERRQ(ierr); /* Allocate buffers for a->a, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; ii,*b_i = b->i, *cworkB,lwrite; PetscInt cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray; PetscInt cend = C->cmap->rend; PetscInt *b_j = b->j; PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a; for (i=0; i= cend) vals[lwrite++] = vworkB[l]; } ct2 += ncols; } } ierr = MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr); } } ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr); ierr = PetscFree(rbuf1);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&r_status4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqr+1,&s_status4);CHKERRQ(ierr); /* Form the matrix */ /* create col map: global col of C -> local col of submatrices */ { const PetscInt *icol_i; #if defined(PETSC_USE_CTABLE) ierr = PetscMalloc1(1+ismax,&cmap);CHKERRQ(ierr); for (i=0; icmap->N+1,&cmap[i]);CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; jcmap->N,&cmap[i]);CHKERRQ(ierr); ierr = PetscMemzero(cmap[i],C->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; j= C->rmap->range[l+1]) l++; proc = l; if (proc == rank) { ierr = MatGetRow_MPIAIJ(C,row,&ncols,&cols,0);CHKERRQ(ierr); if (!allcolumns[i]) { for (k=0; k local row of submatrices */ #if defined(PETSC_USE_CTABLE) ierr = PetscMalloc1(1+ismax,&rmap);CHKERRQ(ierr); for (i=0; irmap->N+1,&rmap[i]);CHKERRQ(ierr); rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jrmap->N,&rmap[0]);CHKERRQ(ierr); ierr = PetscMemzero(rmap[0],ismax*C->rmap->N*sizeof(PetscInt));CHKERRQ(ierr); } for (i=1; irmap->N; for (i=0; idata); if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); ierr = PetscMemcmp(mat->ilen,lens[i],submats[i]->rmap->n*sizeof(PetscInt),&flag);CHKERRQ(ierr); if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros"); /* Initial matrix as if empty */ ierr = PetscMemzero(mat->ilen,submats[i]->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); submats[i]->factortype = C->factortype; } } else { for (i=0; itype_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(submats[i],0,lens[i]);CHKERRQ(ierr); } } /* Assemble the matrices */ /* First assemble the local rows */ { PetscInt ilen_row,*imat_ilen,*imat_j,*imat_i,old_row; PetscScalar *imat_a; for (i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; if (!allcolumns[i]) cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; j= C->rmap->range[l+1]) l++; proc = l; if (proc == rank) { old_row = row; #if defined(PETSC_USE_CTABLE) ierr = PetscTableFind(rmap_i,row+1,&row);CHKERRQ(ierr); row--; #else row = rmap_i[row]; #endif ilen_row = imat_ilen[row]; ierr = MatGetRow_MPIAIJ(C,old_row,&ncols,&cols,&vals);CHKERRQ(ierr); mat_i = imat_i[row]; mat_a = imat_a + mat_i; mat_j = imat_j + mat_i; if (!allcolumns[i]) { for (k=0; kdata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; max1 = sbuf1_i[2*j]; for (k=0; kdata; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; imat_ilen = mat->ilen; if (allcolumns[i]) continue; jmax = nrow[i]; for (j=0; jrmap->n) != PetscAbs(B->rmap->n) || PetscAbs(A->rmap->bs) != PetscAbs(B->rmap->bs) || PetscAbs(A->cmap->bs) != PetscAbs(B->cmap->bs)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix"); ierr = MatCreate(comm, C);CHKERRQ(ierr); ierr = MatSetSizes(*C,A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(*C,A,A);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*C)->rmap);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*C)->cmap);CHKERRQ(ierr); if ((*C)->cmap->N != A->cmap->n + B->cmap->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incompatible component matrices of an MPIAIJ matrix"); ierr = MatSetType(*C, MATMPIAIJ);CHKERRQ(ierr); aij = (Mat_MPIAIJ*)((*C)->data); aij->A = A; aij->B = B; ierr = PetscLogObjectParent((PetscObject)*C,(PetscObject)A);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)*C,(PetscObject)B);CHKERRQ(ierr); (*C)->preallocated = (PetscBool)(A->preallocated && B->preallocated); (*C)->assembled = (PetscBool)(A->assembled && B->assembled); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMPIAIJExtractSeqMatrices_Private" PetscErrorCode MatMPIAIJExtractSeqMatrices_Private(Mat C, Mat *A, Mat *B) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*) (C->data); PetscFunctionBegin; PetscValidPointer(A,2); PetscValidPointer(B,3); *A = aij->A; *B = aij->B; /* Note that we don't incref *A and *B, so be careful! */ PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatricesParallel_MPIXAIJ" PetscErrorCode MatGetSubMatricesParallel_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[], PetscErrorCode(*getsubmats_seq)(Mat, PetscInt, const IS[], const IS[], MatReuse, Mat**), PetscErrorCode(*makefromseq)(MPI_Comm, Mat, Mat,Mat*), PetscErrorCode(*extractseq)(Mat, Mat*, Mat*)) { PetscErrorCode ierr; PetscMPIInt size, flag; PetscInt i,ii; PetscInt ismax_c; PetscFunctionBegin; if (!ismax) PetscFunctionReturn(0); for (i = 0, ismax_c = 0; i < ismax; ++i) { ierr = MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm, &flag);CHKERRQ(ierr); if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator"); ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);CHKERRQ(ierr); if (size > 1) ++ismax_c; } if (!ismax_c) { /* Sequential ISs only, so can call the sequential matrix extraction subroutine. */ ierr = (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);CHKERRQ(ierr); } else { /* if (ismax_c) */ Mat *A,*B; IS *isrow_c, *iscol_c; PetscMPIInt size; /* Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate array of sequential matrices underlying the resulting parallel matrices. Which arrays to allocate is based on the value of MatReuse scall. There are as many diag matrices as there are original index sets. There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets. Sequential matrix arrays are allocated in any event: even if the array of parallel matrices already exists, we need to consolidate the underlying seq matrices into as single array to serve as placeholders into getsubmats_seq will deposite the extracted diag and off-diag parts. However, if reuse is taking place, we have to allocate the seq matrix arrays here. If reuse is NOT taking place, then the seq matrix arrays are allocated by getsubmats_seq. */ /* Parallel matrix array is allocated only if no reuse is taking place. */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc1(ismax,submat);CHKERRQ(ierr); } else { ierr = PetscMalloc1(ismax, &A);CHKERRQ(ierr); ierr = PetscMalloc1(ismax_c, &B);CHKERRQ(ierr); /* If parallel matrices are being reused, then simply reuse the underlying seq matrices as well. */ for (i = 0, ii = 0; i < ismax; ++i) { ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);CHKERRQ(ierr); if (size > 1) { ierr = (*extractseq)((*submat)[i],A+i,B+ii);CHKERRQ(ierr); ++ii; } else A[i] = (*submat)[i]; } } /* Construct the complements of the iscol ISs for parallel ISs only. These are used to extract the off-diag portion of the resulting parallel matrix. The row IS for the off-diag portion is the same as for the diag portion, so we merely alias the row IS, while skipping those that are sequential. */ ierr = PetscMalloc2(ismax_c,&isrow_c, ismax_c, &iscol_c);CHKERRQ(ierr); for (i = 0, ii = 0; i < ismax; ++i) { ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);CHKERRQ(ierr); if (size > 1) { isrow_c[ii] = isrow[i]; ierr = ISGetNonlocalIS(iscol[i], &(iscol_c[ii]));CHKERRQ(ierr); ++ii; } } /* Now obtain the sequential A and B submatrices separately. */ ierr = (*getsubmats_seq)(C,ismax,isrow, iscol,scall, &A);CHKERRQ(ierr); ierr = (*getsubmats_seq)(C,ismax_c,isrow_c, iscol_c,scall, &B);CHKERRQ(ierr); for (ii = 0; ii < ismax_c; ++ii) { ierr = ISDestroy(&iscol_c[ii]);CHKERRQ(ierr); } ierr = PetscFree2(isrow_c, iscol_c);CHKERRQ(ierr); /* If scall == MAT_REUSE_MATRIX, we are done, since the sequential matrices A & B have been extracted directly into the parallel matrices containing them, or simply into the sequential matrix identical with the corresponding A (if size == 1). Otherwise, make sure that parallel matrices are constructed from A & B, or the A is put into the correct submat slot (if size == 1). */ if (scall != MAT_REUSE_MATRIX) { for (i = 0, ii = 0; i < ismax; ++i) { ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm, &size);CHKERRQ(ierr); if (size > 1) { /* For each parallel isrow[i], create parallel matrices from the extracted sequential matrices. */ /* Construct submat[i] from the Seq pieces A and B. */ ierr = (*makefromseq)(((PetscObject)isrow[i])->comm, A[i], B[ii], (*submat)+i);CHKERRQ(ierr); ++ii; } else (*submat)[i] = A[i]; } } ierr = PetscFree(A);CHKERRQ(ierr); ierr = PetscFree(B);CHKERRQ(ierr); } PetscFunctionReturn(0); } /* MatGetSubMatricesParallel_MPIXAIJ() */ #undef __FUNCT__ #define __FUNCT__ "MatGetSubMatricesParallel_MPIAIJ" PetscErrorCode MatGetSubMatricesParallel_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatGetSubMatricesParallel_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatGetSubMatrices_MPIAIJ,MatCreateMPIAIJFromSeqMatrices_Private,MatMPIAIJExtractSeqMatrices_Private);CHKERRQ(ierr); PetscFunctionReturn(0); }