#ifdef PETSC_RCS_HEADER static char vcid[] = "$Id: mmdense.c,v 1.13 1997/10/19 03:25:11 bsmith Exp balay $"; #endif /* Support for the parallel dense matrix vector multiply */ #include "src/mat/impls/dense/mpi/mpidense.h" #include "src/vec/vecimpl.h" #undef __FUNC__ #define __FUNC__ "MatSetUpMultiply_MPIDense" int MatSetUpMultiply_MPIDense(Mat mat) { Mat_MPIDense *mdn = (Mat_MPIDense *) mat->data; int ierr,n; IS tofrom; Vec gvec; PetscFunctionBegin; /* Create local vector that is used to scatter into */ ierr = VecCreateSeq(PETSC_COMM_SELF,mdn->N,&mdn->lvec); CHKERRQ(ierr); /* Create temporary index set for building scatter gather */ ierr = ISCreateStride(PETSC_COMM_SELF,mdn->N,0,1,&tofrom); CHKERRQ(ierr); /* Create temporary global vector to generate scatter context */ n = mdn->cowners[mdn->rank+1] - mdn->cowners[mdn->rank]; ierr = VecCreateMPI(mat->comm,n,mdn->N,&gvec); CHKERRQ(ierr); /* Generate the scatter context */ ierr = VecScatterCreate(gvec,tofrom,mdn->lvec,tofrom,&mdn->Mvctx); CHKERRQ(ierr); PLogObjectParent(mat,mdn->Mvctx); PLogObjectParent(mat,mdn->lvec); PLogObjectParent(mat,tofrom); PLogObjectParent(mat,gvec); ierr = ISDestroy(tofrom); CHKERRQ(ierr); ierr = VecDestroy(gvec); CHKERRQ(ierr); PetscFunctionReturn(0); } #if defined (__JUNK__) int MatGetSubMatrices_MPIDense_Local(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat*); #undef __FUNC__ #define __FUNC__ "MatGetSubMatrices_MPIDense" int MatGetSubMatrices_MPIDense(Mat C,int ismax,IS *isrow,IS *iscol, MatGetSubMatrixCall scall,Mat **submat) { Mat_MPIDense *c = (Mat_MPIDense *) C->data; int nmax,nstages_local,nstages,i,pos,max_no,ierr; PetscFunctionBegin; /* Allocate memory to hold all the submatrices */ if (scall != MAT_REUSE_MATRIX) { *submat = (Mat *)PetscMalloc((ismax+1)*sizeof(Mat));CHKPTRQ(*submat); } /* Determine the number of stages through which submatrices are done */ nmax = 20*1000000 / (c->N * sizeof(int)); if (nmax == 0) nmax = 1; nstages_local = ismax/nmax + ((ismax % nmax)?1:0); /* Make sure every processor loops through the nstages */ ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPI_INT,MPI_MAX,C->comm);CHKERRQ(ierr); for ( i=0,pos=0; idata; Mat A = c->A; Mat_SeqDense *a = (Mat_SeqDense*)A->data, *mat; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable,start,end,size; int **sbuf1,**sbuf2, rank, m,i,j,k,l,ct1,ct2,ierr, **rbuf1,row,proc; int nrqs, msz, **ptr,index,*req_size,*ctr,*pa,*tmp,tcol,bsz,nrqr; int **rbuf3,*req_source,**sbuf_aj, **rbuf2, max1,max2,**rmap; int **cmap,**lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax,*irow_i; int len,ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*cmap_i,*lens_i; int *rmap_i,tag0,tag1,tag2,tag3; MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3; MPI_Request *r_waits4,*s_waits3,*s_waits4; MPI_Status *r_status1,*r_status2,*s_status1,*s_status3,*s_status2; MPI_Status *r_status3,*r_status4,*s_status4; MPI_Comm comm; Scalar **rbuf4, **sbuf_aa, *vals, *mat_a, *sbuf_aa_i; PetscFunctionBegin; comm = C->comm; tag0 = C->tag; size = c->size; rank = c->rank; m = c->M; /* Get some new tags to keep the communication clean */ ierr = PetscObjectGetNewTag((PetscObject)C,&tag1); CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag2); CHKERRQ(ierr); ierr = PetscObjectGetNewTag((PetscObject)C,&tag3); CHKERRQ(ierr); /* Check if the col indices are sorted */ for ( i=0; i proc*/ for ( i=0,j=0; irowners[i+1]; for ( ; jN; sbuf2_i[j] = ncols; req_size[index] += ncols; } req_source[index] = r_status1[i].MPI_SOURCE; /* form the header */ sbuf2_i[0] = req_size[index]; for ( j=1; jj, and send them off */ /** No space required for a->j... as the complete row is packed and sent */ /** sbuf_aj = (int **)PetscMalloc((nrqr+1)*sizeof(int *));CHKPTRQ(sbuf_aj); */ /** for ( i=0,j=0; ii, *b_i = b->i, imark; int *cworkA, *cworkB, cstart = c->cstart, rstart = c->rstart, *bmap = c->garray; int *a_j = a->j, *b_j = b->j, ctmp, *t_cols; for ( i=0; ia, and send them off */ sbuf_aa = (Scalar **)PetscMalloc((nrqr+1)*sizeof(Scalar *));CHKPTRQ(sbuf_aa); for ( i=0,j=0; ii, *b_i = b->i, *cworkB, imark; int cstart = c->cstart, rstart = c->rstart, *bmap = c->garray; int *b_j = b->j; Scalar *vworkA, *vworkB, *a_a = a->a, *b_a = b->a,*t_vals; for ( i=0; iN*sizeof(int); cmap = (int **)PetscMalloc(len); CHKPTRQ(cmap); cmap[0] = (int *)(cmap + ismax); PetscMemzero(cmap[0],(1+ismax*c->N)*sizeof(int)); for ( i=1; iN; } for ( i=0; iM*sizeof(int); rmap = (int **)PetscMalloc(len); CHKPTRQ(rmap); rmap[0] = (int *)(rmap + ismax); PetscMemzero(rmap[0],ismax*c->M*sizeof(int)); for ( i=1; iM;} for ( i=0; idata); if ((mat->m != nrow[i]) || (mat->n != ncol[i])) { SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong size"); } if (PetscMemcmp(mat->ilen,lens[i], mat->m *sizeof(int))) { SETERRQ(PETSC_ERR_ARG_SIZ,0,"Cannot reuse matrix. wrong no of nonzeros"); } /* Initial matrix as if empty */ PetscMemzero(mat->ilen,mat->m*sizeof(int)); submats[i]->factor = C->factor; } } else { for ( i=0; idata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; cmap_i = cmap[i]; rmap_i = rmap[i]; irow_i = irow[i]; jmax = nrow[i]; for ( j=0; jdata; imat_ilen = mat->ilen; imat_j = mat->j; imat_i = mat->i; imat_a = mat->a; max1 = sbuf1_i[2*j]; for ( k=0; k