#ifndef lint static char vcid[] = "$Id: mpiov.c,v 1.25 1996/02/15 23:59:09 balay Exp balay $"; #endif #include "mpiaij.h" #include "inline/bitarray.h" static int MatIncreaseOverlap_MPIAIJ_private(Mat, int, IS *); static int FindOverlapLocal(Mat , int , char **,int*, int**); static int FindOverlapRecievedMesg(Mat , int, int **, int**, int* ); int MatIncreaseOverlap_MPIAIJ(Mat C, int imax, IS *is, int ov) { int i, ierr; if (ov < 0){ SETERRQ(1," MatIncreaseOverlap_MPIAIJ: negative overlap specified\n");} for (i =0; i is[1] mesg [2] = sizeof(is[1]); ----------- mesg [5] = 5 => is[5] mesg [6] = sizeof(is[5]); ----------- mesg [7] mesg [n] datas[1] ----------- mesg[n+1] mesg[m] data(is[5]) ----------- Notes: nrqs - no of requests sent (or to be sent out) nrqr - no of requests recieved (which have to be or which have been processed */ static int MatIncreaseOverlap_MPIAIJ_private(Mat C, int imax, IS *is) { Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; int **idx, *n, *w1, *w2, *w3, *w4, *rtable,**data; int size, rank, m,i,j,k, ierr, **rbuf, row, proc, nrqs, msz, **outdat, **ptr; int *ctr, *pa, tag, *tmp,bsz, nrqr , *isz, *isz1, **xdata; int bsz1, **rbuf2; char **table; MPI_Comm comm; MPI_Request *send_waits,*recv_waits,*send_waits2,*recv_waits2 ; MPI_Status *send_status ,*recv_status; double space, fr, maxs; comm = C->comm; tag = C->tag; size = c->size; rank = c->rank; m = c->M; TrSpace( &space, &fr, &maxs ); /* MPIU_printf(MPI_COMM_SELF,"[%d] allocated space = %f fragments = %f max ever allocated = %f\n", rank, space, fr, maxs); */ idx = (int **)PetscMalloc((imax+1)*sizeof(int *)); CHKPTRQ(idx); n = (int *)PetscMalloc((imax+1)*sizeof(int )); CHKPTRQ(n); rtable = (int *)PetscMalloc((m+1)*sizeof(int )); CHKPTRQ(rtable); /* Hash table for maping row ->proc */ for ( i=0 ; i proc*/ for( i=0, j=0; i< size; i++) { for (; j rowners[i+1]; j++) { rtable[j] = i; } } /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ w1 = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /* mesg size */ w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ w3 = w2 + size; /* no of IS that needs to be sent to proc i */ w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/ for ( i=0; idata; Mat A = c->A, B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; int start, end, val, max, rstart,cstart, ashift, bshift,*ai, *aj; int *bi, *bj, *garray, i, j, k, row; rstart = c->rstart; cstart = c->cstart; ashift = a->indexshift; ai = a->i; aj = a->j +ashift; bshift = b->indexshift; bi = b->i; bj = b->j +bshift; garray = c->garray; for( i=0; idata; Mat A = c->A, B = c->B; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ*)B->data; int rstart,cstart, ashift, bshift,*ai, *aj, *bi, *bj, *garray, i, j, k; int row,total_sz,ct, ct1, ct2, ct3,mem_estimate, oct2, l, start, end; int val, max1, max2, rank, m, no_malloc =0, *tmp, new_estimate, ctr; char *xtable; rank = c->rank; m = c->M; rstart = c->rstart; cstart = c->cstart; ashift = a->indexshift; ai = a->i; aj = a->j +ashift; bshift = b->indexshift; bi = b->i; bj = b->j +bshift; garray = c->garray; for (i =0, ct =0, total_sz =0; i< nrqr; ++i){ ct+= rbuf[i][0]; for ( j = 1; j <= rbuf[i][0] ; j++ ) { total_sz += rbuf[i][2*j]; } } max1 = ct*(a->nz +b->nz)/c->m; mem_estimate = 3*((total_sz > max1?total_sz:max1)+1); xdata[0] = (int *)PetscMalloc(mem_estimate *sizeof(int)); CHKPTRQ(xdata[0]); ++no_malloc; xtable = (char *)PetscMalloc((m/BITSPERBYTE+1)); CHKPTRQ(xtable); PetscMemzero((void *)isz1,(nrqr+1) *sizeof(int)); ct3 = 0; for (i =0; i< nrqr; i++) { /* for easch mesg from proc i */ ct1 = 2*rbuf[i][0]+1; ct2 = ct1; ct3+= ct1; for (j = 1, max1= rbuf[i][0]; j<=max1; j++) { /* for each IS from proc i*/ PetscMemzero((void *)xtable,(m/BITSPERBYTE+1)); oct2 = ct2; for (k =0; k < rbuf[i][2*j]; k++, ct1++) { row = rbuf[i][ct1]; if(!BT_LOOKUP(xtable,row)) { if (!(ct3 < mem_estimate)) { new_estimate = (int)(1.5*mem_estimate)+1; tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); PetscFree(xdata[0]); xdata[0] = tmp; mem_estimate = new_estimate; ++no_malloc; for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} } xdata[i][ct2++] = row;ct3++; } } for ( k=oct2, max2 =ct2 ; k< max2; k++) { row = xdata[i][k] - rstart; start = ai[row]; end = ai[row+1]; for ( l=start; l < end; l++) { val = aj[l] +ashift + cstart; if(!BT_LOOKUP(xtable,val)) { if (!(ct3 < mem_estimate)) { new_estimate = (int)(1.5*mem_estimate)+1; tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); PetscFree(xdata[0]); xdata[0] = tmp; mem_estimate = new_estimate; ++no_malloc; for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} } xdata[i][ct2++] = val;ct3++; } } start = bi[row]; end = bi[row+1]; for ( l=start; l < end; l++) { val = garray[bj[l]+bshift] ; if(!BT_LOOKUP(xtable,val)) { if (!(ct3 < mem_estimate)) { new_estimate = (int)(1.5*mem_estimate)+1; tmp = (int*) PetscMalloc(new_estimate * sizeof(int)); CHKPTRQ(tmp); PetscMemcpy((char *)tmp,(char *)xdata[0],mem_estimate*sizeof(int)); PetscFree(xdata[0]); xdata[0] = tmp; mem_estimate = new_estimate; ++no_malloc; for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];} } xdata[i][ct2++] = val;ct3++; } } } /* Update the header*/ xdata[i][2*j] = ct2-oct2; /* Undo the vector isz1 and use only a var*/ xdata[i][2*j-1] = rbuf[i][2*j-1]; } xdata[i][0] = rbuf[i][0]; xdata[i+1] = xdata[i] +ct2; isz1[i] = ct2; /* size of each message */ } PetscFree(xtable); PLogInfo(0,"MatIncreaseOverlap_MPIAIJ:[%d] Allocated %d bytes, required %d bytes, no of mallocs = %d\n",rank,mem_estimate, ct3,no_malloc); return 0; } int MatGetSubMatrices_MPIAIJ (Mat C,int ismax, IS *isrow,IS *iscol,MatGetSubMatrixCall scall, Mat **submat) { Mat_MPIAIJ *c = (Mat_MPIAIJ *) C->data; Mat A = c->A; Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data, *mat; int **irow,**icol,*nrow,*ncol,*w1,*w2,*w3,*w4,*rtable, start, end, size; int **sbuf1,**sbuf2, rank, m,i,j,k,l,ct1,ct2, ierr, **rbuf1, row, proc; int nrqs, msz, **ptr, index, *req_size, *ctr, *pa, tag, *tmp,tcol,bsz, nrqr; int **rbuf3,*req_source,**sbuf_aj, ashift, **rbuf2, max1,max2, **rmap; int **cmap,**lens, is_no, ncols, *cols, mat_i, *mat_j, tmp2; MPI_Request *send_waits, *recv_waits, *send_waits2, *recv_waits2, *recv_waits3 ; MPI_Request *recv_waits4,*send_waits3,*send_waits4; MPI_Status *recv_status ,*recv_status2,*send_status,*send_status3 ,*send_status2; MPI_Status *recv_status3,*recv_status4,*send_status4; MPI_Comm comm; Scalar **rbuf4, **sbuf_aa, *vals, *mat_a; comm = C->comm; tag = C->tag; size = c->size; rank = c->rank; m = c->M; ashift = a->indexshift; /* Check if the col indices are sorted */ for (i=0; iproc */ for ( i=0 ; i proc*/ for( i=0, j=0; i< size; i++) { for (; j rowners[i+1]; j++) { rtable[j] = i; } } /* evaluate communication - mesg to who, length of mesg, and buffer space required. Based on this, buffers are allocated, and data copied into them*/ w1 = (int *)PetscMalloc((size)*4*sizeof(int )); CHKPTRQ(w1); /* mesg size */ w2 = w1 + size; /* if w2[i] marked, then a message to proc i*/ w3 = w2 + size; /* no of IS that needs to be sent to proc i */ w4 = w3 + size; /* temp work space used in determining w1, w2, w3 */ PetscMemzero(w1,(size)*3*sizeof(int)); /* initialise work vector*/ for ( i=0; ij, and send them off */ sbuf_aj = (int **)PetscMalloc((nrqr+1)*sizeof(int *)); CHKPTRQ(sbuf_aj); for ( i=0, j =0; i< nrqr; i++) j += req_size[i]; sbuf_aj[0] = (int*) PetscMalloc((j+1)*sizeof(int)); CHKPTRQ(sbuf_aj[0]); for (i =1; i< nrqr; i++) sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1]; send_waits3 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); CHKPTRQ(send_waits3); for (i=0; ia, and send them off */ sbuf_aa = (Scalar **)PetscMalloc((nrqr+1)*sizeof(Scalar *)); CHKPTRQ(sbuf_aa); for ( i=0, j =0; i< nrqr; i++) j += req_size[i]; sbuf_aa[0] = (Scalar*) PetscMalloc((j+1)*sizeof(Scalar)); CHKPTRQ(sbuf_aa[0]); for (i =1; i< nrqr; i++) sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]; send_waits4 = (MPI_Request *) PetscMalloc((nrqr+1)*sizeof(MPI_Request)); CHKPTRQ(send_waits4); for (i=0; iN)*sizeof(int)); CHKPTRQ(cmap[0]); PetscMemzero((char *)cmap[0],(1+ ismax*c->N)*sizeof(int)); for (i =1; iN; } for (i=0; i< ismax; i++) { for ( j=0; j< ncol[i]; j++) { cmap[i][icol[i][j]] = j+1; } } /* Create lens which is required for MatCreate... */ lens = (int **)PetscMalloc((1+ ismax)*sizeof(int *)); CHKPTRQ(lens); for (i =0, j=0; iM)*sizeof(int)); CHKPTRQ(rmap[0]); PetscMemzero((char *)rmap[0],(1+ ismax*c->M)*sizeof(int)); for (i =1; iM ;} for (i=0; i< ismax; i++) { for ( j=0; j< nrow[i]; j++) { rmap[i][irow[i][j]] = j; } } /* Update lens from offproc data */ for ( tmp2 =0; tmp2 < nrqs; tmp2++) { MPI_Waitany(nrqs, recv_waits3, &i, recv_status3+tmp2); index = pa[i]; ct1 = 2*sbuf1[index][0]+1; /* sbuf1, rbuf2*/ ct2 = 0; /* rbuf3, rbuf4 */ for (j =1; j<= sbuf1[index][0]; j++) { is_no = sbuf1[index][2*j-1]; max1 = sbuf1[index][2*j]; for (k =0; k< max1; k++, ct1++) { row = sbuf1[index][ct1]; row = rmap[is_no][row]; /* the val in the new matrix to be */ max2 = rbuf2[i][ct1]; for (l=0; ldata); for (j =0; j< nrow[i]; j++) { row = irow[i][j] ; proc = rtable[row]; if (proc == rank) { ierr = MatGetRow(C,row,&ncols,&cols,&vals); CHKERRQ(ierr); row = rmap[i][row]; mat_i = mat->i[row] + ashift; mat_a = mat->a + mat_i; mat_j = mat->j + mat_i; for (k =0; k< ncols; k++) { if ((tcol = cmap[i][cols[k]])) { *mat_j++ = tcol - (!ashift); *mat_a++ = vals[k]; mat->ilen[row]++; } } ierr = MatRestoreRow(C,row,&ncols,&cols,&vals); CHKERRQ(ierr); } } } /* Now assemble the off proc rows*/ for(tmp2 =0; tmp2 data); max1 = sbuf1[index][2*j]; for (k =0; k< max1; k++, ct1++) { row = sbuf1[index][ct1]; row = rmap[is_no][row]; /* the val in the new matrix to be */ mat_i = mat->i[row] + ashift; mat_a = mat->a + mat_i; mat_j = mat->j + mat_i; max2 = rbuf2[i][ct1]; for (l=0; lilen[row]++; } } } } } MPI_Waitall(nrqr,send_waits4,send_status4); /* Packup*/ for( i=0; i< ismax; i++) { ierr = MatAssemblyBegin((*submat)[i], FINAL_ASSEMBLY); CHKERRQ(ierr); } for( i=0; i< ismax; i++) { ierr = MatAssemblyEnd((*submat)[i], FINAL_ASSEMBLY); CHKERRQ(ierr); } /* Restore the indices */ for (i=0; i