/* Routines to compute overlapping regions of a parallel MPI matrix. Used for finding submatrices that were shared across processors. */ #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> #include static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat,PetscInt,IS*); static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Local(Mat,PetscInt*,PetscInt,PetscInt*,PetscBT*); #undef __FUNCT__ #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ" PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat C,PetscInt is_max,IS is[],PetscInt ov) { PetscErrorCode ierr; PetscInt i,N=C->cmap->N, bs=C->rmap->bs,M=C->rmap->N,Mbs=M/bs,*nidx,isz,iov; IS *is_new,*is_row; Mat *submats; Mat_MPISBAIJ *c=(Mat_MPISBAIJ*)C->data; Mat_SeqSBAIJ *asub_i; PetscBT table; PetscInt *ai,brow,nz,nis,l,nmax,nstages_local,nstages,max_no,pos; const PetscInt *idx; PetscBool flg,*allcolumns,*allrows; PetscFunctionBegin; ierr = PetscMalloc1(is_max,&is_new);CHKERRQ(ierr); /* Convert the indices into block format */ ierr = ISCompressIndicesGeneral(N,C->rmap->n,bs,is_max,is,is_new);CHKERRQ(ierr); if (ov < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n"); /* ----- previous non-scalable implementation ----- */ flg = PETSC_FALSE; ierr = PetscOptionsHasName(NULL, "-IncreaseOverlap_old", &flg);CHKERRQ(ierr); if (flg) { /* previous non-scalable implementation */ printf("use previous non-scalable implementation...\n"); for (i=0; iNbs * sizeof(PetscInt)); if (!nmax) nmax = 1; nstages_local = is_max/nmax + ((is_max % nmax) ? 1 : 0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)C));CHKERRQ(ierr); for (iov=0; iovijonly = PETSC_TRUE; ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,is_row+pos,is_new+pos,MAT_INITIAL_MATRIX,allrows+pos,allcolumns+pos,submats+pos);CHKERRQ(ierr); pos += max_no; } /* 2) Row search */ ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr); /* 3) Column search */ for (i=0; idata; ai = asub_i->i;; /* put is_new obtained from MatIncreaseOverlap_MPIBAIJ() to table */ ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr); ierr = ISGetIndices(is_new[i],&idx);CHKERRQ(ierr); ierr = ISGetLocalSize(is_new[i],&nis);CHKERRQ(ierr); for (l=0; ldata; PetscErrorCode ierr; PetscMPIInt size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork; const PetscInt *idx_i; PetscInt idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i; PetscInt Mbs,i,j,k,*odata1,*odata2; PetscInt proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est; PetscInt proc_end=0,len_unused,nodata2; PetscInt ois_max; /* max no of is[] in each of processor */ char *t_p; MPI_Comm comm; MPI_Request *s_waits1,*s_waits2,r_req; MPI_Status *s_status,r_status; PetscBT *table; /* mark indices of this processor's is[] */ PetscBT table_i; PetscBT otable; /* mark indices of other processors' is[] */ PetscInt bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners; IS garray_local,garray_gl; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); /* create tables used in step 1: table[i] - mark c->garray of proc [i] step 3: table[i] - mark indices of is[i] when whose=MINE table[0] - mark incideces of is[] when whose=OTHER */ len = PetscMax(is_max, size);CHKERRQ(ierr); ierr = PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p);CHKERRQ(ierr); for (i=0; igarray from all processors */ ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local);CHKERRQ(ierr); ierr = ISAllGather(garray_local, &garray_gl);CHKERRQ(ierr); ierr = ISDestroy(&garray_local);CHKERRQ(ierr); ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); Bowners[0] = 0; for (i=0; irow to proc_id) */ ierr = PetscMalloc1(Mbs,&ctable);CHKERRQ(ierr); for (proc_id=0,j=0; proc_idrmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id; } /* hash tables marking c->garray */ ierr = ISGetIndices(garray_gl,&idx_i);CHKERRQ(ierr); for (i=0; i len) len = len_r1[i]; } ierr = PetscFree(len_r1);CHKERRQ(ierr); ierr = PetscFree(id_r1);CHKERRQ(ierr); for (proc_id=0; proc_id= len_max */ k = 0; while (k < nrqr) { /* Receive messages */ ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr); if (flag) { ierr = MPI_Get_count(&r_status,MPIU_INT,&len);CHKERRQ(ierr); proc_id = r_status.MPI_SOURCE; ierr = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr); ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr); /* Process messages */ /* make sure there is enough unused space in odata2 array */ if (len_unused < len_max) { /* allocate more space for odata2 */ ierr = PetscMalloc1(len_est+1,&odata2);CHKERRQ(ierr); odata2_ptr[++nodata2] = odata2; len_unused = len_est; } ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);CHKERRQ(ierr); len = 1 + odata2[0]; for (i=0; i 1+is_max) { /* Add data2 into data */ data2_i = data2 + 1 + is_max; for (i=0; idata; Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data; Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(c->B)->data; PetscErrorCode ierr; PetscInt row,mbs,Mbs,*nidx_i,col,col_max,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l; PetscInt a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n; PetscBT table0; /* mark the indices of input is[] for look up */ PetscBT table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */ PetscFunctionBegin; Mbs = c->Mbs; mbs = a->mbs; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; rstart = c->rstartbs; is_max = data[0]; ierr = PetscBTCreate(Mbs,&table0);CHKERRQ(ierr); nidx[0] = is_max; idx_i = data + is_max + 1; /* ptr to input is[0] array */ nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */ for (i=0; i= Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"index col %D >= Mbs %D",col,Mbs); if (!PetscBTLookupSet(table_i,col)) { ierr = PetscBTSet(table0,col);CHKERRQ(ierr); if (whose == MINE) nidx_i[isz0] = col; if (col_max < col) col_max = col; isz0++; } } if (whose == MINE) isz = isz0; k = 0; /* no. of indices from input is[i] that have been examined */ for (row=0; row= isz0) break; /* for (row=0; row col_max) break; if (PetscBTLookup(table0,col)) { if (!PetscBTLookupSet(table_i,row+rstart)) nidx_i[isz++] = row+rstart; break; /* for l = start; l col_max) break; if (PetscBTLookup(table0,col)) { if (!PetscBTLookupSet(table_i,row+rstart)) nidx_i[isz++] = row+rstart; break; /* for l = start; l