/*$Id: baijov.c,v 1.60 2001/03/06 20:15:24 balay Exp balay $*/ /* Routines to compute overlapping regions of a parallel MPI matrix and to find submatrices that were shared across processors. */ #include "src/mat/impls/baij/mpi/mpibaij.h" #include "petscbt.h" static int MatIncreaseOverlap_MPIBAIJ_Once(Mat,int,IS *); static int MatIncreaseOverlap_MPIBAIJ_Local(Mat,int,char **,int*,int**); static int MatIncreaseOverlap_MPIBAIJ_Receive(Mat,int,int **,int**,int*); EXTERN int MatGetRow_MPIBAIJ(Mat,int,int*,int**,Scalar**); EXTERN int MatRestoreRow_MPIBAIJ(Mat,int,int*,int**,Scalar**); #undef __FUNC__ #define __FUNC__ "MatCompressIndicesGeneral_MPIBAIJ" static int MatCompressIndicesGeneral_MPIBAIJ(Mat C,int imax,IS *is_in,IS *is_out) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)C->data; int ierr,isz,bs = baij->bs,n,i,j,*idx,ival; #if defined (PETSC_USE_CTABLE) PetscTable gid1_lid1; int tt, gid1, *nidx; PetscTablePosition tpos; #else int Nbs,*nidx; PetscBT table; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr); #else Nbs = baij->Nbs; ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr); #endif for (i=0; iNbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim"); if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;} #endif } ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr); #if defined (PETSC_USE_CTABLE) ierr = PetscMalloc((isz+1)*sizeof(int),&nidx);CHKERRQ(ierr); ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); j = 0; while (tpos) { ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr); if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); } nidx[tt] = gid1 - 1; j++; } if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); } ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); ierr = PetscFree(nidx);CHKERRQ(ierr); #else ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr); #endif } #if defined (PETSC_USE_CTABLE) ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); #else ierr = PetscBTDestroy(table);CHKERRQ(ierr); ierr = PetscFree(nidx);CHKERRQ(ierr); #endif PetscFunctionReturn(0); } #undef __FUNC__ #define __FUNC__ "MatCompressIndicesSorted_MPIBAIJ" static int MatCompressIndicesSorted_MPIBAIJ(Mat C,int imax,IS *is_in,IS *is_out) { Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)C->data; int ierr,bs=baij->bs,i,j,k,val,n,*idx,*nidx,*idx_local; PetscTruth flg; #if defined (PETSC_USE_CTABLE) int maxsz; #else int Nbs=baij->Nbs; #endif PetscFunctionBegin; for (i=0; i maxsz) maxsz = n; } ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr); #else ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr); #endif /* Now check if the indices are in block order */ for (i=0; idata; int ierr,bs = baij->bs,n,i,j,k,*idx,*nidx; #if defined (PETSC_USE_CTABLE) int maxsz; #else int Nbs = baij->Nbs; #endif PetscFunctionBegin; #if defined (PETSC_USE_CTABLE) /* Now check max size */ for (i=0,maxsz=0; i maxsz) maxsz = n*bs; } ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr); #else ierr = PetscMalloc((Nbs*bs+1)*sizeof(int),&nidx);CHKERRQ(ierr); #endif for (i=0; i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [5] = 5 => is[5] mesg [6] = sizeof(is[5]); ----------- mesg [7] mesg [n] datas[1] ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests recieved (which have to be or which have been processed */ #undef __FUNC__ #define __FUNC__ "MatIncreaseOverlap_MPIBAIJ_Once" static int MatIncreaseOverlap_MPIBAIJ_Once(Mat C,int imax,IS *is) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; int **idx,*n,*w1,*w2,*w3,*w4,*rtable,**data,len,*idx_i; int size,rank,Mbs,i,j,k,ierr,**rbuf,row,proc,nrqs,msz,**outdat,**ptr; int *ctr,*pa,*tmp,nrqr,*isz,*isz1,**xdata,**rbuf2; int *onodes1,*olengths1,tag1,tag2,*onodes2,*olengths2; PetscBT *table; MPI_Comm comm; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2; MPI_Status *s_status,*recv_status; PetscFunctionBegin; comm = C->comm; size = c->size; rank = c->rank; Mbs = c->Mbs; ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); len = (imax+1)*sizeof(int*)+ (imax + Mbs)*sizeof(int); ierr = PetscMalloc(len,&idx);CHKERRQ(ierr); n = (int*)(idx + imax); rtable = n + imax; for (i=0; i proc*/ for (i=0,j=0; irowners[i+1]; for (; jdata; Mat A = c->A,B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; int start,end,val,max,rstart,cstart,*ai,*aj; int *bi,*bj,*garray,i,j,k,row,*data_i,isz_i; PetscBT table_i; PetscFunctionBegin; rstart = c->rstart; cstart = c->cstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0; idata; Mat A = c->A,B = c->B; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data; int rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k; int row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end; int val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr; int *rbuf_i,kmax,rbuf_0,ierr; PetscBT xtable; PetscFunctionBegin; rank = c->rank; Mbs = c->Mbs; rstart = c->rstart; cstart = c->cstart; ai = a->i; aj = a->j; bi = b->i; bj = b->j; garray = c->garray; for (i=0,ct=0,total_sz=0; iMbs) max1 = ct*(a->nz +b->nz)/c->Mbs; else max1 = 1; mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1); ierr = PetscMalloc(mem_estimate*sizeof(int),&xdata[0]);CHKERRQ(ierr); ++no_malloc; ierr = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr); ierr = PetscMemzero(isz1,nrqr*sizeof(int));CHKERRQ(ierr); ct3 = 0; for (i=0; idata; int nmax,nstages_local,nstages,i,pos,max_no,ierr; PetscFunctionBegin; /* The compression and expansion should be avoided. Does'nt point out errors might change the indices hence buggey */ ierr = PetscMalloc(2*(ismax+1)*sizeof(IS),&isrow_new);CHKERRQ(ierr); iscol_new = isrow_new + ismax; ierr = MatCompressIndicesSorted_MPIBAIJ(C,ismax,isrow,isrow_new);CHKERRQ(ierr); ierr = MatCompressIndicesSorted_MPIBAIJ(C,ismax,iscol,iscol_new);CHKERRQ(ierr); /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (c->Nbs * sizeof(int)); if (!nmax) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax)?1:0); /* Make sure every porcessor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr); for (i=0,pos=0; i numprocs) fproc = numprocs; while (gid < proc_gnode[fproc] || gid >= proc_gnode[fproc+1]) { if (gid < proc_gnode[fproc]) fproc--; else fproc++; } /* if(fproc<0 || fproc>=numprocs) { SETERRQ(1,"fproc < 0 || fproc >= numprocs"); }*/ *proc = fproc; PetscFunctionReturn(0); } #endif /* -------------------------------------------------------------------------*/ #undef __FUNC__ #define __FUNC__ "MatGetSubMatrices_MPIBAIJ_local" static int MatGetSubMatrices_MPIBAIJ_local(Mat C,int ismax,IS *isrow,IS *iscol,MatReuse scall,Mat *submats) { Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data; Mat A = c->A; Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,start,end,size; int **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,ierr,**rbuf1,row,proc; int nrqs,msz,**ptr,index,*req_size,*ctr,*pa,*tmp,tcol,nrqr; int **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2; int **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i; int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i; int bs=c->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB; int cstart = c->cstart,nzA,nzB,*a_i=a->i,*b_i=b->i,imark; int *bmap = c->garray,ctmp,rstart=c->rstart,tag0,tag1,tag2,tag3; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; MatScalar **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB; MatScalar *a_a=a->a,*b_a=b->a; PetscTruth flag; int *onodes1,*olengths1; #if defined (PETSC_USE_CTABLE) int tt; PetscTable *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1; #else int **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs; #endif PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr); /* Check if the col indices are sorted */ for (i=0; i proc*/ for (i=0,j=0; irowners[i+1]; for (; jrowners,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif w4[proc]++; } for (j=0; jrowners,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc != rank) { /* copy to the outgoing buf*/ ctr[proc]++; *ptr[proc] = row; ptr[proc]++; } } /* Update the headers for the current IS */ for (j=0; jA->data,*sB = (Mat_SeqBAIJ*)c->B->data; int *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i; for (i=0; ij, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(int *),&sbuf_aj);CHKERRQ(ierr); for (i=0,j=0; ia, and send them off */ ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar *),&sbuf_aa);CHKERRQ(ierr); for (i=0,j=0; iNbs*sizeof(int); ierr = PetscMalloc(len,&cmap);CHKERRQ(ierr); cmap[0] = (int *)(cmap + ismax); ierr = PetscMemzero(cmap[0],(1+ismax*c->Nbs)*sizeof(int));CHKERRQ(ierr); for (i=1; iNbs; } #endif for (i=0; irowners,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc == rank) { /* Get indices from matA and then from matB */ row = row - rstart; nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; #if defined (PETSC_USE_CTABLE) for (k=0; kdata); if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || mat->bs != bs)) { SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size"); } ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(int),&flag);CHKERRQ(ierr); if (flag == PETSC_FALSE) { SETERRQ(PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros"); } /* Initial matrix as if empty */ ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(int));CHKERRQ(ierr); submats[i]->factor = C->factor; } } else { for (i=0; ibs,nrow[i]*bs,ncol[i]*bs,0,lens[i],submats+i);CHKERRQ(ierr); } } /* Assemble the matrices */ /* First assemble the local rows */ { int ilen_row,*imat_ilen,*imat_j,*imat_i; MatScalar *imat_a; for (i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; #if defined (PETSC_USE_CTABLE) lcol1_gcol1 = colmaps[i]; lrow1_grow1 = rowmaps[i]; #else cmap_i = cmap[i]; rmap_i = rmap[i]; #endif irow_i = irow[i]; jmax = nrow[i]; for (j=0; jrowners,&proc);CHKERRQ(ierr); #else proc = rtable[row]; #endif if (proc == rank) { row = row - rstart; nzA = a_i[row+1] - a_i[row]; nzB = b_i[row+1] - b_i[row]; cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row]; vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2; #if defined (PETSC_USE_CTABLE) ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr); row--; if (row < 0) { SETERRQ(1,"row not found in table"); } #else row = rmap_i[row + rstart]; #endif mat_i = imat_i[row]; mat_a = imat_a + mat_i*bs2; mat_j = imat_j + mat_i; ilen_row = imat_ilen[row]; /* load the column indices for this row into cols*/ for (l=0; ldata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; max1 = sbuf1_i[2*j]; for (k=0; k