/* Routines to compute overlapping regions of a parallel MPI matrix and to find submatrices that were shared across processors. */ #include <../src/mat/impls/baij/mpi/mpibaij.h> #include static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char**,PetscInt*,PetscInt**); static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt**,PetscInt**,PetscInt*); extern PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); extern PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**); PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs; IS *is_new; PetscFunctionBegin; ierr = PetscMalloc1(imax,&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,imax,is,is_new);CHKERRQ(ierr); if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); for (i=0; i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [5] = 5 => is[5] mesg [6] = sizeof(is[5]); ----------- mesg [7] mesg [n] data(is[1]) ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests recieved (which have to be or which have been processed */ PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[]) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; const PetscInt **idx,*idx_i; PetscInt *n,*w3,*w4,**data,len; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*w2,*w1,nrqr; PetscInt Mbs,i,j,k,**rbuf,row,proc=-1,nrqs,msz,**outdat,**ptr; PetscInt *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2,*d_p; PetscMPIInt *onodes1,*olengths1,*onodes2,*olengths2; PetscBT *table; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *s_status,*recv_status; char *t_p; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscMalloc2(imax+1,&idx,imax,&n);CHKERRQ(ierr); for (i=0; irmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); w4[proc]++; } for (j=0; jrmap,row*C->rmap->bs,&proc);CHKERRQ(ierr); if (proc != rank) { /* copy to the outgoing buffer */ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } else { /* Update the local table */ if (!PetscBTLookupSet(table_i,row)) data_i[isz_i++] = row; } } /* Update the headers for the current IS */ for (j=0; jdata; Mat A = c->A,B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; PetscInt start,end,val,max,rstart,cstart,*ai,*aj; PetscInt *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; PetscBT table_i; PetscFunctionBegin; rstart = c->rstartbs; cstart = c->cstartbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0; idata; Mat A = c->A,B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; PetscErrorCode ierr; PetscInt rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; PetscInt row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; PetscInt val,max1,max2,Mbs,no_malloc =0,*tmp,new_estimate,ctr; PetscInt *rbuf_i,kmax,rbuf_0; PetscBT xtable; PetscFunctionBegin; Mbs = c->Mbs; rstart = c->rstartbs; cstart = c->cstartbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0,ct=0,total_sz=0; iMbs) max1 = ct*(a->nz +b->nz)/c->Mbs; else max1 = 1; mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); ierr = PetscMalloc1(mem_estimate,&xdata[0]);CHKERRQ(ierr); ++no_malloc; ierr = PetscBTCreate(Mbs,&xtable);CHKERRQ(ierr); ierr = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr); ct3 = 0; for (i=0; idata; PetscErrorCode ierr; PetscInt nmax,nstages_local,nstages,i,pos,max_no,N=C->cmap->N,bs=C->rmap->bs; Mat_SeqBAIJ *subc; Mat_SubSppt *smat; PetscFunctionBegin; /* The compression and expansion should be avoided. Doesn't point out errors, might change the indices, hence buggey */ ierr = PetscMalloc2(ismax+1,&isrow_block,ismax+1,&iscol_block);CHKERRQ(ierr); ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,ismax,isrow,isrow_block);CHKERRQ(ierr); ierr = ISCompressIndicesGeneral(N,C->cmap->n,bs,ismax,iscol,iscol_block);CHKERRQ(ierr); /* Determine the number of stages through which submatrices are done */ if (!C->cmap->N) nmax=20*1000000/sizeof(PetscInt); else nmax = 20*1000000 / (c->Nbs * sizeof(PetscInt)); if (!nmax) nmax = 1; if (scall == MAT_INITIAL_MATRIX) { nstages_local = ismax/nmax + ((ismax % nmax) ? 1 : 0); /* local nstages */ /* Make sure every processor loops through the nstages */ ierr = MPIU_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); /* Allocate memory to hold all the submatrices and dummy submatrices */ ierr = PetscCalloc1(ismax+nstages,submat);CHKERRQ(ierr); } else { /* MAT_REUSE_MATRIX */ if (ismax) { subc = (Mat_SeqBAIJ*)((*submat)[0]->data); smat = subc->submatis1; } else { /* (*submat)[0] is a dummy matrix */ smat = (Mat_SubSppt*)(*submat)[0]->data; } if (!smat) { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"MatCreateSubMatrices(...,MAT_REUSE_MATRIX,...) requires submat"); } nstages = smat->nstages; } for (i=0,pos=0; idata; smat->nstages = nstages; } pos += max_no; } if (scall == MAT_INITIAL_MATRIX && ismax) { /* save nstages for reuse */ subc = (Mat_SeqBAIJ*)((*submat)[0]->data); smat = subc->submatis1; smat->nstages = nstages; } for (i=0; i size) fproc = size; while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) { if (row < proc_gnode[fproc]) fproc--; else fproc++; } *rank = fproc; PetscFunctionReturn(0); } #endif /* -------------------------------------------------------------------------*/ /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */ PetscErrorCode MatCreateSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; Mat A = c->A; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*subc; const PetscInt **icol,**irow; PetscInt *nrow,*ncol,start; PetscErrorCode ierr; PetscMPIInt rank,size,tag0,tag2,tag3,tag4,*w1,*w2,*w3,*w4,nrqr; PetscInt **sbuf1,**sbuf2,*sbuf2_i,i,j,k,l,ct1,ct2,**rbuf1,row,proc=-1; PetscInt nrqs=0,msz,**ptr=NULL,*req_size=NULL,*ctr=NULL,*pa,*tmp=NULL,tcol; PetscInt **rbuf3=NULL,*req_source1=NULL,*req_source2,**sbuf_aj,**rbuf2=NULL,max1,max2; PetscInt **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax; #if defined(PETSC_USE_CTABLE) PetscTable *cmap,cmap_i=NULL,*rmap,rmap_i; #else PetscInt **cmap,*cmap_i=NULL,**rmap,*rmap_i; #endif const PetscInt *irow_i,*icol_i; PetscInt ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; PetscScalar **rbuf4,*rbuf4_i=NULL,**sbuf_aa,*vals,*mat_a=NULL,*imat_a,*sbuf_aa_i; PetscMPIInt *onodes1,*olengths1,end; PetscInt **row2proc,*row2proc_i,*imat_ilen,*imat_j,*imat_i; Mat_SubSppt *smat_i; PetscBool *issorted,colflag,iscsorted=PETSC_TRUE; PetscInt *sbuf1_i,*rbuf2_i,*rbuf3_i,ilen; PetscInt bs=C->rmap->bs,bs2=c->bs2,rstart = c->rstartbs; PetscBool ijonly=c->ijonly; /* private flag indicates only matrix data structures are requested */ PetscInt nzA,nzB,*a_i=a->i,*b_i=b->i,*a_j = a->j,*b_j = b->j,ctmp,imark,*cworkA,*cworkB; PetscScalar *vworkA=NULL,*vworkB=NULL,*a_a = a->a,*b_a = b->a; PetscInt cstart = c->cstartbs,*bmap = c->garray; PetscBool *allrows,*allcolumns; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; ierr = PetscMalloc5(ismax,&row2proc,ismax,&cmap,ismax,&rmap,ismax+1,&allcolumns,ismax,&allrows);CHKERRQ(ierr); ierr = PetscMalloc5(ismax,&irow,ismax,&icol,ismax,&nrow,ismax,&ncol,ismax,&issorted);CHKERRQ(ierr); for (i=0; iNbs) { allcolumns[i] = PETSC_TRUE; icol[i] = NULL; } else { allcolumns[i] = PETSC_FALSE; ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr); } /* Check for special case: allrows */ ierr = ISIdentity(isrow[i],&colflag);CHKERRQ(ierr); ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr); if (colflag && nrow[i] == c->Mbs) { allrows[i] = PETSC_TRUE; irow[i] = NULL; } else { allrows[i] = PETSC_FALSE; ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr); } } if (scall == MAT_REUSE_MATRIX) { /* Assumes new rows are same length as the old rows */ for (i=0; idata); if (subc->mbs != nrow[i] || subc->nbs != ncol[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); /* Initial matrix as if empty */ ierr = PetscMemzero(subc->ilen,subc->mbs*sizeof(PetscInt));CHKERRQ(ierr); /* Initial matrix as if empty */ submats[i]->factortype = C->factortype; smat_i = subc->submatis1; nrqs = smat_i->nrqs; nrqr = smat_i->nrqr; rbuf1 = smat_i->rbuf1; rbuf2 = smat_i->rbuf2; rbuf3 = smat_i->rbuf3; req_source2 = smat_i->req_source2; sbuf1 = smat_i->sbuf1; sbuf2 = smat_i->sbuf2; ptr = smat_i->ptr; tmp = smat_i->tmp; ctr = smat_i->ctr; pa = smat_i->pa; req_size = smat_i->req_size; req_source1 = smat_i->req_source1; allcolumns[i] = smat_i->allcolumns; allrows[i] = smat_i->allrows; row2proc[i] = smat_i->row2proc; rmap[i] = smat_i->rmap; cmap[i] = smat_i->cmap; } if (!ismax){ /* Get dummy submatrices and retrieve struct submatis1 */ if (!submats[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"submats are null, cannot reuse"); smat_i = (Mat_SubSppt*)submats[0]->data; nrqs = smat_i->nrqs; nrqr = smat_i->nrqr; rbuf1 = smat_i->rbuf1; rbuf2 = smat_i->rbuf2; rbuf3 = smat_i->rbuf3; req_source2 = smat_i->req_source2; sbuf1 = smat_i->sbuf1; sbuf2 = smat_i->sbuf2; ptr = smat_i->ptr; tmp = smat_i->tmp; ctr = smat_i->ctr; pa = smat_i->pa; req_size = smat_i->req_size; req_source1 = smat_i->req_source1; allcolumns[0] = PETSC_FALSE; } } else { /* scall == MAT_INITIAL_MATRIX */ /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ ierr = PetscCalloc4(size,&w1,size,&w2,size,&w3,size,&w4);CHKERRQ(ierr); /* mesg size, initialize work vectors */ for (i=0; i= c->rangebs[proc+1]) proc++; w4[proc]++; row2proc_i[j] = proc; /* map row index to proc */ } for (j=0; jtag; ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr); ierr = PetscFree(onodes1);CHKERRQ(ierr); ierr = PetscFree(olengths1);CHKERRQ(ierr); /* Allocate Memory for outgoing messages */ ierr = PetscMalloc4(size,&sbuf1,size,&ptr,2*msz,&tmp,size,&ctr);CHKERRQ(ierr); ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr); ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr); { PetscInt *iptr = tmp; k = 0; for (i=0; ij, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; i local col of submatrices */ #if defined(PETSC_USE_CTABLE) for (i=0; iNbs+1,&cmap[i]);CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; jNbs,&cmap[i]);CHKERRQ(ierr); jmax = ncol[i]; icol_i = icol[i]; cmap_i = cmap[i]; for (j=0; j local row of submatrices */ for (i=0; iMbs+1,&rmap[i]);CHKERRQ(ierr); irow_i = irow[i]; jmax = nrow[i]; for (j=0; jMbs,&rmap[i]);CHKERRQ(ierr); rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jtype_name);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); ierr = MatSeqSBAIJSetPreallocation(submats[i],bs_tmp,0,lens[i]);CHKERRQ(ierr); /* this subroutine is used by SBAIJ routines */ /* create struct Mat_SubSppt and attached it to submat */ ierr = PetscNew(&smat_i);CHKERRQ(ierr); subc = (Mat_SeqBAIJ*)submats[i]->data; subc->submatis1 = smat_i; smat_i->destroy = submats[i]->ops->destroy; submats[i]->ops->destroy = MatDestroy_SeqBAIJ_Submatrices; submats[i]->factortype = C->factortype; smat_i->id = i; smat_i->nrqs = nrqs; smat_i->nrqr = nrqr; smat_i->rbuf1 = rbuf1; smat_i->rbuf2 = rbuf2; smat_i->rbuf3 = rbuf3; smat_i->sbuf2 = sbuf2; smat_i->req_source2 = req_source2; smat_i->sbuf1 = sbuf1; smat_i->ptr = ptr; smat_i->tmp = tmp; smat_i->ctr = ctr; smat_i->pa = pa; smat_i->req_size = req_size; smat_i->req_source1 = req_source1; smat_i->allcolumns = allcolumns[i]; smat_i->allrows = allrows[i]; smat_i->singleis = PETSC_FALSE; smat_i->row2proc = row2proc[i]; smat_i->rmap = rmap[i]; smat_i->cmap = cmap[i]; } if (!ismax) { /* Create dummy submats[0] for reuse struct subc */ ierr = MatCreate(PETSC_COMM_SELF,&submats[0]);CHKERRQ(ierr); ierr = MatSetSizes(submats[0],0,0,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetType(submats[0],MATDUMMY);CHKERRQ(ierr); /* create struct Mat_SubSppt and attached it to submat */ ierr = PetscNewLog(submats[0],&smat_i);CHKERRQ(ierr); submats[0]->data = (void*)smat_i; smat_i->destroy = submats[0]->ops->destroy; submats[0]->ops->destroy = MatDestroy_Dummy_Submatrices; submats[0]->factortype = C->factortype; smat_i->id = 0; smat_i->nrqs = nrqs; smat_i->nrqr = nrqr; smat_i->rbuf1 = rbuf1; smat_i->rbuf2 = rbuf2; smat_i->rbuf3 = rbuf3; smat_i->sbuf2 = sbuf2; smat_i->req_source2 = req_source2; smat_i->sbuf1 = sbuf1; smat_i->ptr = ptr; smat_i->tmp = tmp; smat_i->ctr = ctr; smat_i->pa = pa; smat_i->req_size = req_size; smat_i->req_source1 = req_source1; smat_i->allcolumns = PETSC_FALSE; smat_i->singleis = PETSC_FALSE; smat_i->row2proc = NULL; smat_i->rmap = NULL; smat_i->cmap = NULL; } if (ismax) {ierr = PetscFree(lens[0]);CHKERRQ(ierr);} ierr = PetscFree(lens);CHKERRQ(ierr); ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr); ierr = PetscFree(sbuf_aj);CHKERRQ(ierr); } /* endof scall == MAT_INITIAL_MATRIX */ /* Post recv matrix values */ if (!ijonly) { ierr = PetscObjectGetNewTag((PetscObject)C,&tag4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&rbuf4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&r_waits4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqs+1,&r_status4);CHKERRQ(ierr); ierr = PetscMalloc1(nrqr+1,&s_status4);CHKERRQ(ierr); for (i=0; ia, and send them off */ ierr = PetscMalloc1(nrqr+1,&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; idata; imat_ilen = subc->ilen; imat_j = subc->j; imat_i = subc->i; imat_a = subc->a; if (!allcolumns[i]) cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for (j=0; jdata; imat_ilen = subc->ilen; imat_j = subc->j; imat_i = subc->i; if (!ijonly) imat_a = subc->a; max1 = sbuf1_i[2*j]; for (k=0; kdata; imat_ilen = subc->ilen; imat_j = subc->j; imat_i = subc->i; if (!ijonly) imat_a = subc->a; if (allcolumns[i]) continue; jmax = nrow[i]; for (j=0; jijonly = PETSC_FALSE; /* set back to the default */ PetscFunctionReturn(0); }