/* Routines to compute overlapping regions of a parallel MPI matrix and to find submatrices that were shared across processors. */ #include <../src/mat/impls/aij/seq/aij.h> #include <../src/mat/impls/aij/mpi/mpiaij.h> #include #include static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**,PetscTable*); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); extern PetscErrorCode MatGetRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); extern PetscErrorCode MatRestoreRow_MPIAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once_Scalable(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Local_Scalable(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Send_Scalable(Mat,PetscInt,PetscMPIInt,PetscMPIInt *,PetscInt *, PetscInt *,PetscInt **,PetscInt **); static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Receive_Scalable(Mat,PetscInt,IS*,PetscInt,PetscInt *); PetscErrorCode MatIncreaseOverlap_MPIAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i; PetscFunctionBegin; if (ov < 0) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified"); for (i=0; i */ ierr = PetscCalloc2(2*nrrows,&todata,nrecvrows,&fromdata);CHKERRQ(ierr); for (i=0; imax_lsize ? lsize:max_lsize; isz[i] = lsize; } ierr = PetscMalloc2((max_lsize+nrecvs)*nidx,&indices_temp,nidx,&iscomms);CHKERRQ(ierr); for (i=0; imax_fszs ? fromsizes[2*i]:max_fszs; /* better way to estimate number of nonzero in the mat??? */ ierr = PetscCalloc5(max_fszs*nidx,&rows_data_ptr,nidx,&rows_data,nidx,&rows_pos_i,nfrom*nidx,&indv_counts,tnz,&indices_tmp);CHKERRQ(ierr); for (i=0; i */ ierr = PetscCalloc2(totalrows+nidx*nfrom*2,&sbdata,2*nfrom,&sbsizes);CHKERRQ(ierr); totalrows = 0; rows_pos = 0; /* use this code again */ for (i=0;i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [3] = 5 => is[5] mesg [4] = sizeof(is[5]); ----------- mesg [5] mesg [n] datas[1] ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests received (which have to be or which have been processed) */ static PetscErrorCode MatIncreaseOverlap_MPIAIJ_Once(Mat C,PetscInt imax,IS is[]) { Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; PetscMPIInt *w1,*w2,nrqr,*w3,*w4,*onodes1,*olengths1,*onodes2,*olengths2; const PetscInt **idx,*idx_i; PetscInt *n,**data,len; #if defined(PETSC_USE_CTABLE) PetscTable *table_data,table_data_i; PetscInt *tdata,tcount,tcount_max; #else PetscInt *data_i,*d_p; #endif PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,proc = 0; PetscInt M,i,j,k,**rbuf,row,nrqs,msz,**outdat,**ptr; PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2; PetscBT *table; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *s_status,*recv_status; MPI_Comm *iscomms; char *t_p; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; M = C->rmap->N; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscMalloc2(imax,(PetscInt***)&idx,imax,&n);CHKERRQ(ierr); for (i=0; irmap,row,&proc);CHKERRQ(ierr); w4[proc]++; } for (j=0; jrmap,row,&proc);CHKERRQ(ierr); if (proc != rank) { /* copy to the outgoing buffer */ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } else if (!PetscBTLookupSet(table_i,row)) { #if defined(PETSC_USE_CTABLE) ierr = PetscTableAdd(table_data_i,row+1,isz_i+1,INSERT_VALUES);CHKERRQ(ierr); #else data_i[isz_i] = row; /* Update the local table */ #endif isz_i++; } } /* Update the headers for the current IS */ for (j=0; jdata; Mat A = c->A,B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; PetscInt start,end,val,max,rstart,cstart,*ai,*aj; PetscInt *bi,*bj,*garray,i,j,k,row,isz_i; PetscBT table_i; #if defined(PETSC_USE_CTABLE) PetscTable table_data_i; PetscErrorCode ierr; PetscTablePosition tpos; PetscInt tcount,*tdata; #else PetscInt *data_i; #endif PetscFunctionBegin; rstart = C->rmap->rstart; cstart = C->cmap->rstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0; i tcount - 1) SETERRQ2(PETSC_COMM_SELF,0," j %d >= tcount %d",j,tcount); } #else data_i = data[i]; #endif table_i = table[i]; isz_i = isz[i]; max = isz[i]; for (j=0; jdata; Mat A = c->A,B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; PetscErrorCode ierr; PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; PetscInt val,max1,max2,m,no_malloc =0,*tmp,new_estimate,ctr; PetscInt *rbuf_i,kmax,rbuf_0; PetscBT xtable; PetscFunctionBegin; m = C->rmap->N; rstart = C->rmap->rstart; cstart = C->cmap->rstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0,ct=0,total_sz=0; irmap->n) max1 = ct*(a->nz + b->nz)/C->rmap->n; else max1 = 1; mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); ierr = PetscMalloc1(mem_estimate,&xdata[0]);CHKERRQ(ierr); ++no_malloc; ierr = PetscBTCreate(m,&xtable);CHKERRQ(ierr); ierr = PetscArrayzero(isz1,nrqr);CHKERRQ(ierr); ct3 = 0; for (i=0; idata; Mat_SeqAIJ *b,*ad = (Mat_SeqAIJ*)a->A->data,*bd = (Mat_SeqAIJ*)a->B->data; PetscErrorCode ierr; PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j; PetscInt m,*b_sendj,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; MatScalar *sendbuf,*recvbuf,*a_sendbuf,*b_sendbuf; PetscFunctionBegin; ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); if (scall == MAT_INITIAL_MATRIX) { /* ---------------------------------------------------------------- Tell every processor the number of nonzeros per row */ ierr = PetscMalloc1(A->rmap->N,&lens);CHKERRQ(ierr); for (i=A->rmap->rstart; irmap->rend; i++) { lens[i] = ad->i[i-A->rmap->rstart+1] - ad->i[i-A->rmap->rstart] + bd->i[i-A->rmap->rstart+1] - bd->i[i-A->rmap->rstart]; } ierr = PetscMalloc2(size,&recvcounts,size,&displs);CHKERRQ(ierr); for (i=0; irmap->range[i+1] - A->rmap->range[i]; displs[i] = A->rmap->range[i]; } #if defined(PETSC_HAVE_MPI_IN_PLACE) ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else sendcount = A->rmap->rend - A->rmap->rstart; ierr = MPI_Allgatherv(lens+A->rmap->rstart,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif /* --------------------------------------------------------------- Create the sequential matrix of the same type as the local block diagonal */ ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); ierr = MatSetSizes(B,A->rmap->N,A->cmap->N,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); ierr = MatSetType(B,((PetscObject)a->A)->type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); ierr = PetscCalloc1(2,Bin);CHKERRQ(ierr); **Bin = B; b = (Mat_SeqAIJ*)B->data; /*-------------------------------------------------------------------- Copy my part of matrix column indices over */ sendcount = ad->nz + bd->nz; jsendbuf = b->j + b->i[rstarts[rank]]; a_jsendbuf = ad->j; b_jsendbuf = bd->j; n = A->rmap->rend - A->rmap->rstart; cnt = 0; for (i=0; ii[i+1] - bd->i[i]; while (m > 0) { /* is it above diagonal (in bd (compressed) numbering) */ if (garray[*b_jsendbuf] > A->rmap->rstart + i) break; jsendbuf[cnt++] = garray[*b_jsendbuf++]; m--; } /* put in diagonal portion */ for (j=ad->i[i]; ji[i+1]; j++) { jsendbuf[cnt++] = A->rmap->rstart + *a_jsendbuf++; } /* put in upper diagonal portion */ while (m-- > 0) { jsendbuf[cnt++] = garray[*b_jsendbuf++]; } } if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); /*-------------------------------------------------------------------- Gather all column indices to all processors */ for (i=0; irmap->range[i]; jrmap->range[i+1]; j++) { recvcounts[i] += lens[j]; } } displs[0] = 0; for (i=1; ij,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif /*-------------------------------------------------------------------- Assemble the matrix into useable form (note numerical values not yet set) */ /* set the b->ilen (length of each row) values */ ierr = PetscArraycpy(b->ilen,lens,A->rmap->N);CHKERRQ(ierr); /* set the b->i indices */ b->i[0] = 0; for (i=1; i<=A->rmap->N; i++) { b->i[i] = b->i[i-1] + lens[i-1]; } ierr = PetscFree(lens);CHKERRQ(ierr); ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } else { B = **Bin; b = (Mat_SeqAIJ*)B->data; } /*-------------------------------------------------------------------- Copy my part of matrix numerical values into the values location */ if (flag == MAT_GET_VALUES) { sendcount = ad->nz + bd->nz; sendbuf = b->a + b->i[rstarts[rank]]; a_sendbuf = ad->a; b_sendbuf = bd->a; b_sendj = bd->j; n = A->rmap->rend - A->rmap->rstart; cnt = 0; for (i=0; ii[i+1] - bd->i[i]; while (m > 0) { /* is it above diagonal (in bd (compressed) numbering) */ if (garray[*b_sendj] > A->rmap->rstart + i) break; sendbuf[cnt++] = *b_sendbuf++; m--; b_sendj++; } /* put in diagonal portion */ for (j=ad->i[i]; ji[i+1]; j++) { sendbuf[cnt++] = *a_sendbuf++; } /* put in upper diagonal portion */ while (m-- > 0) { sendbuf[cnt++] = *b_sendbuf++; b_sendj++; } } if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); /* ----------------------------------------------------------------- Gather all numerical values to all processors */ if (!recvcounts) { ierr = PetscMalloc2(size,&recvcounts,size,&displs);CHKERRQ(ierr); } for (i=0; ii[rstarts[i+1]] - b->i[rstarts[i]]; } displs[0] = 0; for (i=1; ia; #if defined(PETSC_HAVE_MPI_IN_PLACE) ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #else ierr = MPI_Allgatherv(sendbuf,sendcount,MPIU_SCALAR,recvbuf,recvcounts,displs,MPIU_SCALAR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); #endif } /* endof (flag == MAT_GET_VALUES) */ ierr = PetscFree2(recvcounts,displs);CHKERRQ(ierr); if (A->symmetric) { ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } else if (A->hermitian) { ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); } else if (A->structurally_symmetric) { ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); } PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,PetscBool allcolumns,Mat *submats) { Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; Mat submat,A = c->A,B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data,*subc; PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,nzA,nzB; PetscInt cstart = C->cmap->rstart,cend = C->cmap->rend,rstart = C->rmap->rstart,*bmap = c->garray; const PetscInt *icol,*irow; PetscInt nrow,ncol,start; PetscErrorCode ierr; PetscMPIInt rank,size,tag1,tag2,tag3,tag4,*w1,*w2,nrqr; PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,ct3,**rbuf1,row,proc; PetscInt nrqs=0,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol,*iptr; PetscInt **rbuf3,*req_source1,*req_source2,**sbuf_aj,**rbuf2,max1,nnz; PetscInt *lens,rmax,ncols,*cols,Crow; #if defined(PETSC_USE_CTABLE) PetscTable cmap,rmap; PetscInt *cmap_loc,*rmap_loc; #else PetscInt *cmap,*rmap; #endif PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*sbuf1_i,*rbuf2_i,*rbuf3_i; PetscInt *cworkB,lwrite,*subcols,*row2proc; PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a,*subvals=NULL; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3 = NULL,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3 = NULL,*s_status2; MPI_Status *r_status3 = NULL,*r_status4,*s_status4; MPI_Comm comm; PetscScalar **rbuf4,**sbuf_aa,*vals,*sbuf_aa_i,*rbuf4_i; PetscMPIInt *onodes1,*olengths1,idex,end; Mat_SubSppt *smatis1; PetscBool isrowsorted,iscolsorted; PetscFunctionBegin; if (ismax != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This routine only works when all processes have ismax=1"); ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; ierr = ISSorted(iscol[0],&iscolsorted);CHKERRQ(ierr); ierr = ISSorted(isrow[0],&isrowsorted);CHKERRQ(ierr); ierr = ISGetIndices(isrow[0],&irow);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[0],&nrow);CHKERRQ(ierr); if (allcolumns) { icol = NULL; ncol = C->cmap->N; } else { ierr = ISGetIndices(iscol[0],&icol);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[0],&ncol);CHKERRQ(ierr); } if (scall == MAT_INITIAL_MATRIX) { PetscInt *sbuf2_i,*cworkA,lwrite,ctmp; /* Get some new tags to keep the communication clean */ tag1 = ((PetscObject)C)->tag; ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them */ ierr = PetscCalloc2(size,&w1,size,&w2);CHKERRQ(ierr); ierr = PetscMalloc1(nrow,&row2proc);CHKERRQ(ierr); /* w1[proc] = num of rows owned by proc -- to be requested */ proc = 0; nrqs = 0; /* num of outgoing messages */ for (j=0; j= C->rmap->range[proc+1]) proc++; w1[proc]++; row2proc[j] = proc; /* map row index to proc */ if (proc != rank && !w2[proc]) { w2[proc] = 1; nrqs++; } } w1[rank] = 0; /* rows owned by self will not be requested */ ierr = PetscMalloc1(nrqs+1,&pa);CHKERRQ(ierr); /*(proc -array)*/ for (proc=0,j=0; procj */ ierr = MPI_Waitall(nrqs,r_waits2,r_status2);CHKERRQ(ierr); ierr = PetscMalloc4(nrqs+1,&r_waits3,nrqr+1,&s_waits3,nrqs+1,&r_status3,nrqr+1,&s_status3);CHKERRQ(ierr); for (i=0; ij, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; i= cend) cols[lwrite++] = ctmp; } ct2 += ncols; } ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source1[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); } /* create column map (cmap): global col of C -> local col of submat */ #if defined(PETSC_USE_CTABLE) if (!allcolumns) { ierr = PetscTableCreate(ncol+1,C->cmap->N+1,&cmap);CHKERRQ(ierr); ierr = PetscCalloc1(C->cmap->n,&cmap_loc);CHKERRQ(ierr); for (j=0; j= cstart && icol[j] rmap->n,&rmap_loc);CHKERRQ(ierr); #else if (!allcolumns) { ierr = PetscCalloc1(C->cmap->N,&cmap);CHKERRQ(ierr); for (j=0; jA */ ncols = ai[row-rstart+1] - ai[row-rstart]; cols = aj + ai[row-rstart]; if (!allcolumns) { for (k=0; kB */ ncols = bi[row-rstart+1] - bi[row-rstart]; cols = bj + bi[row-rstart]; if (!allcolumns) { for (k=0; k local row of submat */ #if defined(PETSC_USE_CTABLE) ierr = PetscTableCreate(nrow+1,C->rmap->N+1,&rmap);CHKERRQ(ierr); for (j=0; jrmap->N,&rmap);CHKERRQ(ierr); for (j=0; jj is done */ ierr = MPI_Waitall(nrqs,r_waits3,r_status3);CHKERRQ(ierr); for (i=0; ij */ rbuf3_i = rbuf3[i]; /* received C->j */ /* is_no = sbuf1_i[2*j-1]; if (is_no != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_no !=0"); */ max1 = sbuf1_i[2]; for (k=0; k= cstart && rbuf3_i[ct2] type_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(submat,0,lens);CHKERRQ(ierr); /* create struct Mat_SubSppt and attached it to submat */ ierr = PetscNew(&smatis1);CHKERRQ(ierr); subc = (Mat_SeqAIJ*)submat->data; subc->submatis1 = smatis1; smatis1->id = 0; smatis1->nrqs = nrqs; smatis1->nrqr = nrqr; smatis1->rbuf1 = rbuf1; smatis1->rbuf2 = rbuf2; smatis1->rbuf3 = rbuf3; smatis1->sbuf2 = sbuf2; smatis1->req_source2 = req_source2; smatis1->sbuf1 = sbuf1; smatis1->ptr = ptr; smatis1->tmp = tmp; smatis1->ctr = ctr; smatis1->pa = pa; smatis1->req_size = req_size; smatis1->req_source1 = req_source1; smatis1->allcolumns = allcolumns; smatis1->singleis = PETSC_TRUE; smatis1->row2proc = row2proc; smatis1->rmap = rmap; smatis1->cmap = cmap; #if defined(PETSC_USE_CTABLE) smatis1->rmap_loc = rmap_loc; smatis1->cmap_loc = cmap_loc; #endif smatis1->destroy = submat->ops->destroy; submat->ops->destroy = MatDestroySubMatrix_SeqAIJ; submat->factortype = C->factortype; /* compute rmax */ rmax = 0; for (i=0; irmap->n != nrow || submat->cmap->n != ncol) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); subc = (Mat_SeqAIJ*)submat->data; rmax = subc->rmax; smatis1 = subc->submatis1; nrqs = smatis1->nrqs; nrqr = smatis1->nrqr; rbuf1 = smatis1->rbuf1; rbuf2 = smatis1->rbuf2; rbuf3 = smatis1->rbuf3; req_source2 = smatis1->req_source2; sbuf1 = smatis1->sbuf1; sbuf2 = smatis1->sbuf2; ptr = smatis1->ptr; tmp = smatis1->tmp; ctr = smatis1->ctr; pa = smatis1->pa; req_size = smatis1->req_size; req_source1 = smatis1->req_source1; allcolumns = smatis1->allcolumns; row2proc = smatis1->row2proc; rmap = smatis1->rmap; cmap = smatis1->cmap; #if defined(PETSC_USE_CTABLE) rmap_loc = smatis1->rmap_loc; cmap_loc = smatis1->cmap_loc; #endif } /* Post recv matrix values */ ierr = PetscMalloc3(nrqs+1,&rbuf4, rmax,&subcols, rmax,&subvals);CHKERRQ(ierr); ierr = PetscMalloc4(nrqs+1,&r_waits4,nrqr+1,&s_waits4,nrqs+1,&r_status4,nrqr+1,&s_status4);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag4);CHKERRQ(ierr); for (i=0; ia, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; i= cend) vals[lwrite++] = vworkB[l]; } ct2 += ncols; } ierr = MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source1[i],tag4,comm,s_waits4+i);CHKERRQ(ierr); } /* Assemble submat */ /* First assemble the local rows */ for (j=0; jA */ ncols = ai[Crow+1] - ai[Crow]; cols = aj + ai[Crow]; vals = a->a + ai[Crow]; i = 0; for (k=0; kB */ ncols = bi[Crow+1] - bi[Crow]; cols = bj + bi[Crow]; vals = b->a + bi[Crow]; for (k=0; kA */ ncols = ai[Crow+1] - ai[Crow]; cols = aj + ai[Crow]; vals = a->a + ai[Crow]; i = 0; for (k=0; kB */ ncols = bi[Crow+1] - bi[Crow]; cols = bj + bi[Crow]; vals = b->a + bi[Crow]; for (k=0; kA */ ncols = ai[Crow+1] - ai[Crow]; cols = aj + ai[Crow]; vals = a->a + ai[Crow]; i = 0; for (k=0; kB */ ncols = bi[Crow+1] - bi[Crow]; cols = bj + bi[Crow]; vals = b->a + bi[Crow]; for (k=0; kj */ ct3 = 0; /* count of received C->j that will be inserted into submat */ rbuf2_i = rbuf2[idex]; /* int** received length of C->j from other processes */ rbuf3_i = rbuf3[idex]; /* int** received C->j from other processes */ rbuf4_i = rbuf4[idex]; /* scalar** received C->a from other processes */ /* is_no = sbuf1_i[2*j-1]; if (is_no != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"is_no !=0"); */ max1 = sbuf1_i[2]; /* num of rows */ for (k=0; k= cstart && rbuf3_i[ct2] j with index that should be inserted to submat */ if (iscolsorted) rbuf3_i[ct3++] = ct2; } } ierr = MatSetValues_SeqAIJ(submat,1,&row,idex,subcols,subvals,INSERT_VALUES);CHKERRQ(ierr); } else { /* scall == MAT_REUSE_MATRIX */ submat = submats[0]; subc = (Mat_SeqAIJ*)submat->data; nnz = subc->i[row+1] - subc->i[row]; /* num of submat entries in this row */ for (l=0; lj + subc->i[row]; /* sorted column indices */ ierr = MatSetValues_SeqAIJ(submat,1,&row,nnz,bj,subvals,INSERT_VALUES);CHKERRQ(ierr); } } else { /* allcolumns */ nnz = rbuf2_i[ct1]; /* num of C entries in this row */ ierr = MatSetValues_SeqAIJ(submat,1,&row,nnz,rbuf3_i+ct2,rbuf4_i+ct2,INSERT_VALUES);CHKERRQ(ierr); ct2 += nnz; } } } /* sending a->a are done */ ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr); ierr = PetscFree4(r_waits4,s_waits4,r_status4,s_status4);CHKERRQ(ierr); ierr = MatAssemblyBegin(submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); submats[0] = submat; /* Restore the indices */ ierr = ISRestoreIndices(isrow[0],&irow);CHKERRQ(ierr); if (!allcolumns) { ierr = ISRestoreIndices(iscol[0],&icol);CHKERRQ(ierr); } /* Destroy allocated memory */ for (i=0; icmap->N) allcolumns = PETSC_TRUE; ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(C,ismax,isrow,iscol,scall,allcolumns,*submat);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatrices_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; PetscInt nmax,nstages=0,i,pos,max_no,nrow,ncol,in[2],out[2]; PetscBool rowflag,colflag,wantallmatrix=PETSC_FALSE; Mat_SeqAIJ *subc; Mat_SubSppt *smat; PetscFunctionBegin; /* Check for special case: each processor has a single IS */ if (C->submat_singleis) { /* flag is set in PCSetUp_ASM() to skip MPIU_Allreduce() */ ierr = MatCreateSubMatrices_MPIAIJ_SingleIS(C,ismax,isrow,iscol,scall,submat);CHKERRQ(ierr); C->submat_singleis = PETSC_FALSE; /* resume its default value in case C will be used for non-single IS */ PetscFunctionReturn(0); } /* Collect global wantallmatrix and nstages */ if (!C->cmap->N) nmax=20*1000000/sizeof(PetscInt); else nmax = 20*1000000 / (C->cmap->N * sizeof(PetscInt)); if (!nmax) nmax = 1; if (scall == MAT_INITIAL_MATRIX) { /* Collect global wantallmatrix and nstages */ if (ismax == 1 && C->rmap->N == C->cmap->N) { ierr = ISIdentity(*isrow,&rowflag);CHKERRQ(ierr); ierr = ISIdentity(*iscol,&colflag);CHKERRQ(ierr); ierr = ISGetLocalSize(*isrow,&nrow);CHKERRQ(ierr); ierr = ISGetLocalSize(*iscol,&ncol);CHKERRQ(ierr); if (rowflag && colflag && nrow == C->rmap->N && ncol == C->cmap->N) { wantallmatrix = PETSC_TRUE; ierr = PetscOptionsGetBool(((PetscObject)C)->options,((PetscObject)C)->prefix,"-use_fast_submatrix",&wantallmatrix,NULL);CHKERRQ(ierr); } } /* Determine the number of stages through which submatrices are done Each stage will extract nmax submatrices. nmax is determined by the matrix column dimension. If the original matrix has 20M columns, only one submatrix per stage is allowed, etc. */ nstages = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* local nstages */ in[0] = -1*(PetscInt)wantallmatrix; in[1] = nstages; ierr = MPIU_Allreduce(in,out,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); wantallmatrix = (PetscBool)(-out[0]); nstages = out[1]; /* Make sure every processor loops through the global nstages */ } else { /* MAT_REUSE_MATRIX */ if (ismax) { subc = (Mat_SeqAIJ*)(*submat)[0]->data; smat = subc->submatis1; } else { /* (*submat)[0] is a dummy matrix */ smat = (Mat_SubSppt*)(*submat)[0]->data; } if (!smat) { /* smat is not generated by MatCreateSubMatrix_MPIAIJ_All(...,MAT_INITIAL_MATRIX,...) */ wantallmatrix = PETSC_TRUE; } else if (smat->singleis) { ierr = MatCreateSubMatrices_MPIAIJ_SingleIS(C,ismax,isrow,iscol,scall,submat);CHKERRQ(ierr); PetscFunctionReturn(0); } else { nstages = smat->nstages; } } if (wantallmatrix) { ierr = MatCreateSubMatrix_MPIAIJ_All(C,MAT_GET_VALUES,scall,submat);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Allocate memory to hold all the submatrices and dummy submatrices */ if (scall == MAT_INITIAL_MATRIX) { ierr = PetscCalloc1(ismax+nstages,submat);CHKERRQ(ierr); } for (i=0,pos=0; i= ismax) max_no = 0; else max_no = ismax-pos; ierr = MatCreateSubMatrices_MPIAIJ_Local(C,max_no,isrow+pos,iscol+pos,scall,*submat+pos);CHKERRQ(ierr); if (!max_no && scall == MAT_INITIAL_MATRIX) { /* submat[pos] is a dummy matrix */ smat = (Mat_SubSppt*)(*submat)[pos]->data; pos++; smat->nstages = nstages; } pos += max_no; } if (ismax && scall == MAT_INITIAL_MATRIX) { /* save nstages for reuse */ subc = (Mat_SeqAIJ*)(*submat)[0]->data; smat = subc->submatis1; smat->nstages = nstages; } PetscFunctionReturn(0); } /* -------------------------------------------------------------------------*/ PetscErrorCode MatCreateSubMatrices_MPIAIJ_Local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; Mat A = c->A; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)c->B->data,*subc; const PetscInt **icol,**irow; PetscInt *nrow,*ncol,start; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag2,tag3,tag4,*w1,*w2,*w3,*w4,nrqr; PetscInt **sbuf1,**sbuf2,i,j,k,l,ct1,ct2,**rbuf1,row,proc=-1; PetscInt nrqs=0,msz,**ptr=NULL,*req_size=NULL,*ctr=NULL,*pa,*tmp=NULL,tcol; PetscInt **rbuf3=NULL,*req_source1=NULL,*req_source2,**sbuf_aj,**rbuf2=NULL,max1,max2; PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; #if defined(PETSC_USE_CTABLE) PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i; #else PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i; #endif const PetscInt *irow_i; PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; PetscScalar **rbuf4,*rbuf4_i,**sbuf_aa,*vals,*mat_a,*imat_a,*sbuf_aa_i; PetscMPIInt *onodes1,*olengths1,end; PetscInt **row2proc,*row2proc_i,ilen_row,*imat_ilen,*imat_j,*imat_i,old_row; Mat_SubSppt *smat_i; PetscBool *issorted,*allcolumns,colflag,iscsorted=PETSC_TRUE; PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,ilen; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; ierr = PetscMalloc4(ismax,&row2proc,ismax,&cmap,ismax,&rmap,ismax+1,&allcolumns);CHKERRQ(ierr); ierr = PetscMalloc5(ismax,(PetscInt***)&irow,ismax,(PetscInt***)&icol,ismax,&nrow,ismax,&ncol,ismax,&issorted);CHKERRQ(ierr); for (i=0; icmap->N) { allcolumns[i] = PETSC_TRUE; icol[i] = NULL; } else { allcolumns[i] = PETSC_FALSE; ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); } } if (scall == MAT_REUSE_MATRIX) { /* Assumes new rows are same length as the old rows */ for (i=0; idata; if ((submats[i]->rmap->n != nrow[i]) || (submats[i]->cmap->n != ncol[i])) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); /* Initial matrix as if empty */ ierr = PetscArrayzero(subc->ilen,submats[i]->rmap->n);CHKERRQ(ierr); smat_i = subc->submatis1; nrqs = smat_i->nrqs; nrqr = smat_i->nrqr; rbuf1 = smat_i->rbuf1; rbuf2 = smat_i->rbuf2; rbuf3 = smat_i->rbuf3; req_source2 = smat_i->req_source2; sbuf1 = smat_i->sbuf1; sbuf2 = smat_i->sbuf2; ptr = smat_i->ptr; tmp = smat_i->tmp; ctr = smat_i->ctr; pa = smat_i->pa; req_size = smat_i->req_size; req_source1 = smat_i->req_source1; allcolumns[i] = smat_i->allcolumns; row2proc[i] = smat_i->row2proc; rmap[i] = smat_i->rmap; cmap[i] = smat_i->cmap; } if (!ismax){ /* Get dummy submatrices and retrieve struct submatis1 */ if (!submats[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"submats are null, cannot reuse"); smat_i = (Mat_SubSppt*)submats[0]->data; nrqs = smat_i->nrqs; nrqr = smat_i->nrqr; rbuf1 = smat_i->rbuf1; rbuf2 = smat_i->rbuf2; rbuf3 = smat_i->rbuf3; req_source2 = smat_i->req_source2; sbuf1 = smat_i->sbuf1; sbuf2 = smat_i->sbuf2; ptr = smat_i->ptr; tmp = smat_i->tmp; ctr = smat_i->ctr; pa = smat_i->pa; req_size = smat_i->req_size; req_source1 = smat_i->req_source1; allcolumns[0] = PETSC_FALSE; } } else { /* scall == MAT_INITIAL_MATRIX */ /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscCalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); /* mesg size, initialize work vectors */ for (i=0; i= C->rmap->range[proc+1]) proc++; w4[proc]++; row2proc_i[j] = proc; /* map row index to proc */ } for (j=0; jtag; ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); ierr = PetscFree(onodes1);CHKERRQ(ierr); ierr = PetscFree(olengths1);CHKERRQ(ierr); /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);CHKERRQ(ierr); ierr = PetscArrayzero(sbuf1,size);CHKERRQ(ierr); ierr = PetscArrayzero(ptr,size);CHKERRQ(ierr); { PetscInt *iptr = tmp; k = 0; for (i=0; ii,*sBi = b->i,id,rstart = C->rmap->rstart; PetscInt *sbuf2_i; ierr = MPI_Waitall(nrqr,r_waits1,r_status1);CHKERRQ(ierr); for (i=0; ij, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; ii,*b_i = b->i,lwrite; PetscInt *cworkA,*cworkB,cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray; PetscInt cend = C->cmap->rend; PetscInt *a_j = a->j,*b_j = b->j,ctmp; for (i=0; i= cend) cols[lwrite++] = ctmp; } ct2 += ncols; } } ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source1[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); } } ierr = PetscMalloc2(nrqs+1,&r_status3,nrqr+1,&s_status3);CHKERRQ(ierr); /* create col map: global col of C -> local col of submatrices */ { const PetscInt *icol_i; #if defined(PETSC_USE_CTABLE) for (i=0; icmap->N+1,&cmap[i]);CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; jcmap->N,&cmap[i]);CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; j local row of submatrices */ #if defined(PETSC_USE_CTABLE) for (i=0; irmap->N+1,&rmap[i]);CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; jrmap->N,&rmap[i]);CHKERRQ(ierr); rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jtype_name);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(submats[i],0,lens[i]);CHKERRQ(ierr); /* create struct Mat_SubSppt and attached it to submat */ ierr = PetscNew(&smat_i);CHKERRQ(ierr); subc = (Mat_SeqAIJ*)submats[i]->data; subc->submatis1 = smat_i; smat_i->destroy = submats[i]->ops->destroy; submats[i]->ops->destroy = MatDestroySubMatrix_SeqAIJ; submats[i]->factortype = C->factortype; smat_i->id = i; smat_i->nrqs = nrqs; smat_i->nrqr = nrqr; smat_i->rbuf1 = rbuf1; smat_i->rbuf2 = rbuf2; smat_i->rbuf3 = rbuf3; smat_i->sbuf2 = sbuf2; smat_i->req_source2 = req_source2; smat_i->sbuf1 = sbuf1; smat_i->ptr = ptr; smat_i->tmp = tmp; smat_i->ctr = ctr; smat_i->pa = pa; smat_i->req_size = req_size; smat_i->req_source1 = req_source1; smat_i->allcolumns = allcolumns[i]; smat_i->singleis = PETSC_FALSE; smat_i->row2proc = row2proc[i]; smat_i->rmap = rmap[i]; smat_i->cmap = cmap[i]; } if (!ismax) { /* Create dummy submats[0] for reuse struct subc */ ierr = MatCreate(PETSC_COMM_SELF,&submats[0]);CHKERRQ(ierr); ierr = MatSetSizes(submats[0],0,0,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(submats[0],MATDUMMY);CHKERRQ(ierr); /* create struct Mat_SubSppt and attached it to submat */ ierr = PetscNewLog(submats[0],&smat_i);CHKERRQ(ierr); submats[0]->data = (void*)smat_i; smat_i->destroy = submats[0]->ops->destroy; submats[0]->ops->destroy = MatDestroySubMatrix_Dummy; submats[0]->factortype = C->factortype; smat_i->id = 0; smat_i->nrqs = nrqs; smat_i->nrqr = nrqr; smat_i->rbuf1 = rbuf1; smat_i->rbuf2 = rbuf2; smat_i->rbuf3 = rbuf3; smat_i->sbuf2 = sbuf2; smat_i->req_source2 = req_source2; smat_i->sbuf1 = sbuf1; smat_i->ptr = ptr; smat_i->tmp = tmp; smat_i->ctr = ctr; smat_i->pa = pa; smat_i->req_size = req_size; smat_i->req_source1 = req_source1; smat_i->allcolumns = PETSC_FALSE; smat_i->singleis = PETSC_FALSE; smat_i->row2proc = NULL; smat_i->rmap = NULL; smat_i->cmap = NULL; } if (ismax) {ierr = PetscFree(lens[0]);CHKERRQ(ierr);} ierr = PetscFree(lens);CHKERRQ(ierr); ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); } /* endof scall == MAT_INITIAL_MATRIX */ /* Post recv matrix values */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&rbuf4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&r_waits4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&r_status4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqr+1,&s_status4);CHKERRQ(ierr); for (i=0; ia, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; ii,*b_i = b->i, *cworkB,lwrite; PetscInt cstart = C->cmap->rstart,rstart = C->rmap->rstart,*bmap = c->garray; PetscInt cend = C->cmap->rend; PetscInt *b_j = b->j; PetscScalar *vworkA,*vworkB,*a_a = a->a,*b_a = b->a; for (i=0; i= cend) vals[lwrite++] = vworkB[l]; } ct2 += ncols; } } ierr = MPI_Isend(sbuf_aa_i,req_size[i],MPIU_SCALAR,req_source1[i],tag4,comm,s_waits4+i);CHKERRQ(ierr); } } /* Assemble the matrices */ /* First assemble the local rows */ for (i=0; idata; imat_ilen = subc->ilen; imat_j = subc->j; imat_i = subc->i; imat_a = subc->a; if (!allcolumns[i]) cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jdata; imat_ilen = subc->ilen; imat_j = subc->j; imat_i = subc->i; imat_a = subc->a; max1 = sbuf1_i[2*j]; for (k=0; kdata; imat_j = subc->j; imat_i = subc->i; imat_a = subc->a; imat_ilen = subc->ilen; if (allcolumns[i]) continue; jmax = nrow[i]; for (j=0; jA & C->B have been created, even if empty. */ PetscErrorCode MatSetSeqMats_MPIAIJ(Mat C,IS rowemb,IS dcolemb,IS ocolemb,MatStructure pattern,Mat A,Mat B) { /* If making this function public, change the error returned in this function away from _PLIB. */ PetscErrorCode ierr; Mat_MPIAIJ *aij; Mat_SeqAIJ *Baij; PetscBool seqaij,Bdisassembled; PetscInt m,n,*nz,i,j,ngcol,col,rstart,rend,shift,count; PetscScalar v; const PetscInt *rowindices,*colindices; PetscFunctionBegin; /* Check to make sure the component matrices (and embeddings) are compatible with C. */ if (A) { ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&seqaij);CHKERRQ(ierr); if (!seqaij) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diagonal matrix is of wrong type"); if (rowemb) { ierr = ISGetLocalSize(rowemb,&m);CHKERRQ(ierr); if (m != A->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Row IS of size %D is incompatible with diag matrix row size %D",m,A->rmap->n); } else { if (C->rmap->n != A->rmap->n) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diag seq matrix is row-incompatible with the MPIAIJ matrix"); } } if (dcolemb) { ierr = ISGetLocalSize(dcolemb,&n);CHKERRQ(ierr); if (n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diag col IS of size %D is incompatible with diag matrix col size %D",n,A->cmap->n); } else { if (C->cmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Diag seq matrix is col-incompatible with the MPIAIJ matrix"); } } if (B) { ierr = PetscObjectBaseTypeCompare((PetscObject)B,MATSEQAIJ,&seqaij);CHKERRQ(ierr); if (!seqaij) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Off-diagonal matrix is of wrong type"); if (rowemb) { ierr = ISGetLocalSize(rowemb,&m);CHKERRQ(ierr); if (m != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Row IS of size %D is incompatible with off-diag matrix row size %D",m,A->rmap->n); } else { if (C->rmap->n != B->rmap->n) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Off-diag seq matrix is row-incompatible with the MPIAIJ matrix"); } } if (ocolemb) { ierr = ISGetLocalSize(ocolemb,&n);CHKERRQ(ierr); if (n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Off-diag col IS of size %D is incompatible with off-diag matrix col size %D",n,B->cmap->n); } else { if (C->cmap->N - C->cmap->n != B->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Off-diag seq matrix is col-incompatible with the MPIAIJ matrix"); } } aij = (Mat_MPIAIJ*)C->data; if (!aij->A) { /* Mimic parts of MatMPIAIJSetPreallocation() */ ierr = MatCreate(PETSC_COMM_SELF,&aij->A);CHKERRQ(ierr); ierr = MatSetSizes(aij->A,C->rmap->n,C->cmap->n,C->rmap->n,C->cmap->n);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(aij->A,C,C);CHKERRQ(ierr); ierr = MatSetType(aij->A,MATSEQAIJ);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)C,(PetscObject)aij->A);CHKERRQ(ierr); } if (A) { ierr = MatSetSeqMat_SeqAIJ(aij->A,rowemb,dcolemb,pattern,A);CHKERRQ(ierr); } else { ierr = MatSetUp(aij->A);CHKERRQ(ierr); } if (B) { /* Destroy the old matrix or the column map, depending on the sparsity pattern. */ /* If pattern == DIFFERENT_NONZERO_PATTERN, we reallocate B and need to "disassemble" B -- convert it to using C's global indices. To insert the values we take the safer, albeit more expensive, route of MatSetValues(). If pattern == SUBSET_NONZERO_PATTERN, we do not "disassemble" B and do not reallocate; we MatZeroValues(B) first, so there may be a bunch of zeros that, perhaps, could be compacted out. TODO: Put B's values into aij->B's aij structure in place using the embedding ISs? At least avoid calling MatSetValues() and the implied searches? */ if (B && pattern == DIFFERENT_NONZERO_PATTERN) { #if defined(PETSC_USE_CTABLE) ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); #else ierr = PetscFree(aij->colmap);CHKERRQ(ierr); /* A bit of a HACK: ideally we should deal with case aij->B all in one code block below. */ if (aij->B) { ierr = PetscLogObjectMemory((PetscObject)C,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); } #endif ngcol = 0; if (aij->lvec) { ierr = VecGetSize(aij->lvec,&ngcol);CHKERRQ(ierr); } if (aij->garray) { ierr = PetscFree(aij->garray);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)C,-ngcol*sizeof(PetscInt));CHKERRQ(ierr); } ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); } if (aij->B && B && pattern == DIFFERENT_NONZERO_PATTERN) { ierr = MatDestroy(&aij->B);CHKERRQ(ierr); } if (aij->B && B && pattern == SUBSET_NONZERO_PATTERN) { ierr = MatZeroEntries(aij->B);CHKERRQ(ierr); } } Bdisassembled = PETSC_FALSE; if (!aij->B) { ierr = MatCreate(PETSC_COMM_SELF,&aij->B);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)C,(PetscObject)aij->B);CHKERRQ(ierr); ierr = MatSetSizes(aij->B,C->rmap->n,C->cmap->N,C->rmap->n,C->cmap->N);CHKERRQ(ierr); ierr = MatSetBlockSizesFromMats(aij->B,B,B);CHKERRQ(ierr); ierr = MatSetType(aij->B,MATSEQAIJ);CHKERRQ(ierr); Bdisassembled = PETSC_TRUE; } if (B) { Baij = (Mat_SeqAIJ*)B->data; if (pattern == DIFFERENT_NONZERO_PATTERN) { ierr = PetscMalloc1(B->rmap->n,&nz);CHKERRQ(ierr); for (i=0; irmap->n; i++) { nz[i] = Baij->i[i+1] - Baij->i[i]; } ierr = MatSeqAIJSetPreallocation(aij->B,0,nz);CHKERRQ(ierr); ierr = PetscFree(nz);CHKERRQ(ierr); } ierr = PetscLayoutGetRange(C->rmap,&rstart,&rend);CHKERRQ(ierr); shift = rend-rstart; count = 0; rowindices = NULL; colindices = NULL; if (rowemb) { ierr = ISGetIndices(rowemb,&rowindices);CHKERRQ(ierr); } if (ocolemb) { ierr = ISGetIndices(ocolemb,&colindices);CHKERRQ(ierr); } for (i=0; irmap->n; i++) { PetscInt row; row = i; if (rowindices) row = rowindices[i]; for (j=Baij->i[i]; ji[i+1]; j++) { col = Baij->j[count]; if (colindices) col = colindices[col]; if (Bdisassembled && col>=rstart) col += shift; v = Baij->a[count]; ierr = MatSetValues(aij->B,1,&row,1,&col,&v,INSERT_VALUES);CHKERRQ(ierr); ++count; } } /* No assembly for aij->B is necessary. */ /* FIXME: set aij->B's nonzerostate correctly. */ } else { ierr = MatSetUp(aij->B);CHKERRQ(ierr); } C->preallocated = PETSC_TRUE; C->was_assembled = PETSC_FALSE; C->assembled = PETSC_FALSE; /* C will need to be assembled so that aij->B can be compressed into local form in MatSetUpMultiply_MPIAIJ(). Furthermore, its nonzerostate will need to be based on that of aij->A's and aij->B's. */ PetscFunctionReturn(0); } /* B uses local indices with column indices ranging between 0 and N-n; they must be interpreted using garray. */ PetscErrorCode MatGetSeqMats_MPIAIJ(Mat C,Mat *A,Mat *B) { Mat_MPIAIJ *aij = (Mat_MPIAIJ*)C->data; PetscFunctionBegin; PetscValidPointer(A,2); PetscValidPointer(B,3); /* FIXME: make sure C is assembled */ *A = aij->A; *B = aij->B; /* Note that we don't incref *A and *B, so be careful! */ PetscFunctionReturn(0); } /* Extract MPI submatrices encoded by pairs of IS that may live on subcomms of C. NOT SCALABLE due to the use of ISGetNonlocalIS() (see below). */ PetscErrorCode MatCreateSubMatricesMPI_MPIXAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[], PetscErrorCode(*getsubmats_seq)(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat**), PetscErrorCode(*getlocalmats)(Mat,Mat*,Mat*), PetscErrorCode(*setseqmat)(Mat,IS,IS,MatStructure,Mat), PetscErrorCode(*setseqmats)(Mat,IS,IS,IS,MatStructure,Mat,Mat)) { PetscErrorCode ierr; PetscMPIInt isize,flag; PetscInt i,ii,cismax,ispar; Mat *A,*B; IS *isrow_p,*iscol_p,*cisrow,*ciscol,*ciscol_p; PetscFunctionBegin; if (!ismax) PetscFunctionReturn(0); for (i = 0, cismax = 0; i < ismax; ++i) { PetscMPIInt isize; ierr = MPI_Comm_compare(((PetscObject)isrow[i])->comm,((PetscObject)iscol[i])->comm,&flag);CHKERRQ(ierr); if (flag != MPI_IDENT) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column index sets must have the same communicator"); ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm, &isize);CHKERRQ(ierr); if (isize > 1) ++cismax; } /* If cismax is zero on all C's ranks, then and only then can we use purely sequential matrix extraction. ispar counts the number of parallel ISs across C's comm. */ ierr = MPIU_Allreduce(&cismax,&ispar,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); if (!ispar) { /* Sequential ISs only across C's comm, so can call the sequential matrix extraction subroutine. */ ierr = (*getsubmats_seq)(C,ismax,isrow,iscol,scall,submat);CHKERRQ(ierr); PetscFunctionReturn(0); } /* if (ispar) */ /* Construct the "complements" -- the off-processor indices -- of the iscol ISs for parallel ISs only. These are used to extract the off-diag portion of the resulting parallel matrix. The row IS for the off-diag portion is the same as for the diag portion, so we merely alias (without increfing) the row IS, while skipping those that are sequential. */ ierr = PetscMalloc2(cismax,&cisrow,cismax,&ciscol);CHKERRQ(ierr); ierr = PetscMalloc1(cismax,&ciscol_p);CHKERRQ(ierr); for (i = 0, ii = 0; i < ismax; ++i) { ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm,&isize);CHKERRQ(ierr); if (isize > 1) { /* TODO: This is the part that's ***NOT SCALABLE***. To fix this we need to extract just the indices of C's nonzero columns that lie on the intersection of isrow[i] and ciscol[ii] -- the nonlocal part of iscol[i] -- without actually computing ciscol[ii]. This also has to be done without serializing on the IS list, so, most likely, it is best done by rewriting MatCreateSubMatrices_MPIAIJ() directly. */ ierr = ISGetNonlocalIS(iscol[i],&(ciscol[ii]));CHKERRQ(ierr); /* Now we have to (a) make sure ciscol[ii] is sorted, since, even if the off-proc indices were sorted on each rank, concatenated they might no longer be sorted; (b) Use ISSortPermutation() to construct ciscol_p, the mapping from the indices in the nondecreasing order to the original index positions. If ciscol[ii] is strictly increasing, the permutation IS is NULL. */ ierr = ISSortPermutation(ciscol[ii],PETSC_FALSE,ciscol_p+ii);CHKERRQ(ierr); ierr = ISSort(ciscol[ii]);CHKERRQ(ierr); ++ii; } } ierr = PetscMalloc2(ismax,&isrow_p,ismax,&iscol_p);CHKERRQ(ierr); for (i = 0, ii = 0; i < ismax; ++i) { PetscInt j,issize; const PetscInt *indices; /* Permute the indices into a nondecreasing order. Reject row and col indices with duplicates. */ ierr = ISSortPermutation(isrow[i],PETSC_FALSE,isrow_p+i);CHKERRQ(ierr); ierr = ISSort(isrow[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&issize);CHKERRQ(ierr); ierr = ISGetIndices(isrow[i],&indices);CHKERRQ(ierr); for (j = 1; j < issize; ++j) { if (indices[j] == indices[j-1]) { SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Repeated indices in row IS %D: indices at %D and %D are both %D",i,j-1,j,indices[j]); } } ierr = ISRestoreIndices(isrow[i],&indices);CHKERRQ(ierr); ierr = ISSortPermutation(iscol[i],PETSC_FALSE,iscol_p+i);CHKERRQ(ierr); ierr = ISSort(iscol[i]);CHKERRQ(ierr); ierr = ISGetLocalSize(iscol[i],&issize);CHKERRQ(ierr); ierr = ISGetIndices(iscol[i],&indices);CHKERRQ(ierr); for (j = 1; j < issize; ++j) { if (indices[j-1] == indices[j]) { SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Repeated indices in col IS %D: indices at %D and %D are both %D",i,j-1,j,indices[j]); } } ierr = ISRestoreIndices(iscol[i],&indices);CHKERRQ(ierr); ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm,&isize);CHKERRQ(ierr); if (isize > 1) { cisrow[ii] = isrow[i]; ++ii; } } /* Allocate the necessary arrays to hold the resulting parallel matrices as well as the intermediate array of sequential matrices underlying the resulting parallel matrices. Which arrays to allocate is based on the value of MatReuse scall and whether ISs are sorted and/or contain duplicates. There are as many diag matrices as there are original index sets. There are only as many parallel and off-diag matrices, as there are parallel (comm size > 1) index sets. ARRAYS that can hold Seq matrices get allocated in any event -- either here or by getsubmats_seq(): - If the array of MPI matrices already exists and is being reused, we need to allocate the array and extract the underlying seq matrices into it to serve as placeholders, into which getsubmats_seq will deposite the extracted diag and off-diag parts. Thus, we allocate the A&B arrays and fill them with A[i] and B[ii] extracted from the corresponding MPI submat. - However, if the rows, A's column indices or B's column indices are not sorted, the extracted A[i] & B[ii] will have a different order from what getsubmats_seq expects. To handle this case -- indicated by a nonzero isrow_p[i], iscol_p[i], or ciscol_p[ii] -- we duplicate A[i] --> AA[i], B[ii] --> BB[ii] (retrieve composed AA[i] or BB[ii]) and reuse them here. AA[i] and BB[ii] are then used to permute its values into A[i] and B[ii] sitting inside the corresponding submat. - If no reuse is taking place then getsubmats_seq will allocate the A&B arrays and create the corresponding A[i], B[ii], AA[i] or BB[ii] matrices. */ /* Parallel matrix array is allocated here only if no reuse is taking place. If reused, it is passed in by the caller. */ if (scall == MAT_INITIAL_MATRIX) { ierr = PetscMalloc1(ismax,submat);CHKERRQ(ierr); } /* Now obtain the sequential A and B submatrices separately. */ /* scall=MAT_REUSE_MATRIX is not handled yet, because getsubmats_seq() requires reuse of A and B */ ierr = (*getsubmats_seq)(C,ismax,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); ierr = (*getsubmats_seq)(C,cismax,cisrow,ciscol,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); /* If scall == MAT_REUSE_MATRIX AND the permutations are NULL, we are done, since the sequential matrices A & B have been extracted directly into the parallel matrices containing them, or simply into the sequential matrix identical with the corresponding A (if isize == 1). Note that in that case colmap doesn't need to be rebuilt, since the matrices are expected to have the same sparsity pattern. Otherwise, A and/or B have to be properly embedded into C's index spaces and the correct colmap must be constructed for C. This is done by setseqmat(s). */ for (i = 0, ii = 0; i < ismax; ++i) { /* TODO: cache ciscol, permutation ISs and maybe cisrow? What about isrow & iscol? That way we can avoid sorting and computing permutations when reusing. To this end: - remove the old cache, if it exists, when extracting submatrices with MAT_INITIAL_MATRIX - if caching arrays to hold the ISs, make and compose a container for them so that it can be destroyed upon destruction of C (use PetscContainerUserDestroy() to clear out the contents). */ MatStructure pattern; pattern = DIFFERENT_NONZERO_PATTERN; ierr = MPI_Comm_size(((PetscObject)isrow[i])->comm,&isize);CHKERRQ(ierr); /* Construct submat[i] from the Seq pieces A (and B, if necessary). */ if (isize > 1) { if (scall == MAT_INITIAL_MATRIX) { ierr = MatCreate(((PetscObject)isrow[i])->comm,(*submat)+i);CHKERRQ(ierr); ierr = MatSetSizes((*submat)[i],A[i]->rmap->n,A[i]->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType((*submat)[i],MATMPIAIJ);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*submat)[i]->rmap);CHKERRQ(ierr); ierr = PetscLayoutSetUp((*submat)[i]->cmap);CHKERRQ(ierr); } /* For each parallel isrow[i], insert the extracted sequential matrices into the parallel matrix. */ { Mat AA,BB; AA = A[i]; BB = B[ii]; if (AA || BB) { ierr = setseqmats((*submat)[i],isrow_p[i],iscol_p[i],ciscol_p[ii],pattern,AA,BB);CHKERRQ(ierr); ierr = MatAssemblyBegin((*submat)[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd((*submat)[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } ierr = MatDestroy(&AA);CHKERRQ(ierr); } ierr = ISDestroy(ciscol+ii);CHKERRQ(ierr); ierr = ISDestroy(ciscol_p+ii);CHKERRQ(ierr); ++ii; } else { /* if (isize == 1) */ if (scall == MAT_REUSE_MATRIX) { ierr = MatDestroy(&(*submat)[i]);CHKERRQ(ierr); } if (isrow_p[i] || iscol_p[i]) { ierr = MatDuplicate(A[i],MAT_DO_NOT_COPY_VALUES,(*submat)+i);CHKERRQ(ierr); ierr = setseqmat((*submat)[i],isrow_p[i],iscol_p[i],pattern,A[i]);CHKERRQ(ierr); /* Otherwise A is extracted straight into (*submats)[i]. */ /* TODO: Compose A[i] on (*submat([i] for future use, if ((isrow_p[i] || iscol_p[i]) && MAT_INITIAL_MATRIX). */ ierr = MatDestroy(A+i);CHKERRQ(ierr); } else (*submat)[i] = A[i]; } ierr = ISDestroy(&isrow_p[i]);CHKERRQ(ierr); ierr = ISDestroy(&iscol_p[i]);CHKERRQ(ierr); } ierr = PetscFree2(cisrow,ciscol);CHKERRQ(ierr); ierr = PetscFree2(isrow_p,iscol_p);CHKERRQ(ierr); ierr = PetscFree(ciscol_p);CHKERRQ(ierr); ierr = PetscFree(A);CHKERRQ(ierr); ierr = MatDestroySubMatrices(cismax,&B);CHKERRQ(ierr); PetscFunctionReturn(0); } PetscErrorCode MatCreateSubMatricesMPI_MPIAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[]) { PetscErrorCode ierr; PetscFunctionBegin; ierr = MatCreateSubMatricesMPI_MPIXAIJ(C,ismax,isrow,iscol,scall,submat,MatCreateSubMatrices_MPIAIJ,MatGetSeqMats_MPIAIJ,MatSetSeqMat_SeqAIJ,MatSetSeqMats_MPIAIJ);CHKERRQ(ierr); PetscFunctionReturn(0); }