1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: stash.c,v 1.20 1999/02/25 22:48:34 balay Exp balay $"; 3 #endif 4 5 #include "src/vec/vecimpl.h" 6 #include "src/mat/matimpl.h" 7 8 #define DEFAULT_STASH_SIZE 10000 9 /* 10 This stash is currently used for all the parallel matrix implementations. 11 The stash is where elements of a matrix destined to be stored on other 12 processors are kept until matrix assembly is done. 13 14 This is a simple minded stash. Simply add entry to end of stash. 15 */ 16 17 #undef __FUNC__ 18 #define __FUNC__ "StashCreate_Private" 19 int StashCreate_Private(MPI_Comm comm,int bs, Stash *stash) 20 { 21 int ierr,flg,max=DEFAULT_STASH_SIZE; 22 23 PetscFunctionBegin; 24 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 25 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 26 ierr = PetscCommGetNewTag(comm,&stash->tag2); CHKERRQ(ierr); 27 ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr); 28 ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 29 30 if (bs <= 0) bs = 1; 31 stash->bs = bs; 32 stash->nmax = 0; 33 stash->n = 0; 34 stash->reallocs = 0; 35 stash->idx = 0; 36 stash->idy = 0; 37 stash->array = 0; 38 39 stash->send_waits = 0; 40 stash->recv_waits = 0; 41 stash->nsends = 0; 42 stash->nrecvs = 0; 43 stash->svalues = 0; 44 stash->rvalues = 0; 45 stash->rmax = 0; 46 stash->rdata = 0; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNC__ 51 #define __FUNC__ "StashDestroy_Private" 52 int StashDestroy_Private(Stash *stash) 53 { 54 int ierr; 55 PetscFunctionBegin; 56 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 57 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNC__ 62 #define __FUNC__ "StashReset_Private" 63 int StashReset_Private(Stash *stash) 64 { 65 PetscFunctionBegin; 66 /* Now update nmaxold to be app 10% more than nmax, this way the 67 wastage of space is reduced the next time this stash is used */ 68 stash->oldnmax = (int)(stash->nmax * 1.1) + 5; 69 stash->nmax = 0; 70 stash->n = 0; 71 stash->reallocs = 0; 72 stash->rmax = 0; 73 74 if (stash->array) { 75 PetscFree(stash->array); 76 stash->array = 0; 77 stash->idx = 0; 78 stash->idy = 0; 79 } 80 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 81 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits =0;} 82 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 83 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 84 if (stash->rdata) {PetscFree(stash->rdata); stash->rdata = 0;} 85 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNC__ 90 #define __FUNC__ "StashInfo_Private" 91 int StashInfo_Private(Stash *stash) 92 { 93 PetscFunctionBegin; 94 PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs); 95 PetscFunctionReturn(0); 96 } 97 #undef __FUNC__ 98 #define __FUNC__ "StashSetInitialSize_Private" 99 int StashSetInitialSize_Private(Stash *stash,int max) 100 { 101 PetscFunctionBegin; 102 stash->oldnmax = max; 103 stash->nmax = 0; 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "StashExpand_Private" 109 static int StashExpand_Private(Stash *stash) 110 { 111 int *n_idx,*n_idy,newnmax; 112 Scalar *n_array; 113 114 PetscFunctionBegin; 115 /* allocate a larger stash */ 116 if (stash->nmax == 0) newnmax = stash->oldnmax; 117 else newnmax = stash->nmax *2; 118 119 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+sizeof(Scalar)));CHKPTRQ(n_array); 120 n_idx = (int *) (n_array + newnmax); 121 n_idy = (int *) (n_idx + newnmax); 122 PetscMemcpy(n_array,stash->array,stash->nmax*sizeof(Scalar)); 123 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 124 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 125 if (stash->array) PetscFree(stash->array); 126 stash->array = n_array; 127 stash->idx = n_idx; 128 stash->idy = n_idy; 129 stash->nmax = newnmax; 130 stash->oldnmax = newnmax; 131 stash->reallocs++; 132 PetscFunctionReturn(0); 133 } 134 135 /* 136 Should do this properly. With a sorted array. 137 */ 138 #undef __FUNC__ 139 #define __FUNC__ "StashValues_Private" 140 int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values,InsertMode addv) 141 { 142 int ierr,i,found; 143 Scalar val; 144 145 PetscFunctionBegin; 146 for ( i=0; i<n; i++ ) { 147 found = 0; 148 val = *values++; 149 if (!found) { /* not found so add to end */ 150 if ( stash->n == stash->nmax ) { 151 ierr = StashExpand_Private(stash); CHKERRQ(ierr); 152 } 153 stash->array[stash->n] = val; 154 stash->idx[stash->n] = row; 155 stash->idy[stash->n++] = idxn[i]; 156 } 157 } 158 PetscFunctionReturn(0); 159 } 160 161 #undef __FUNC__ 162 #define __FUNC__ "StashScatterBegin_Private" 163 int StashScatterBegin_Private(Stash *stash,int *owners) 164 { 165 MPI_Comm comm = stash->comm; 166 MPI_Request *send_waits,*recv_waits; 167 Scalar *rvalues,*svalues; 168 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2; 169 int rank,size,*nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 170 int count,ierr,*sindices,*rindices,bs=stash->bs; 171 172 PetscFunctionBegin; 173 174 ierr = MPI_Comm_size(comm,&size); CHKERRQ(ierr); 175 ierr = MPI_Comm_rank(comm,&rank); CHKERRQ(ierr); 176 177 /* first count number of contributors to each processor */ 178 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 179 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 180 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 181 for ( i=0; i<stash->n; i++ ) { 182 idx = stash->idx[i]; 183 for ( j=0; j<size; j++ ) { 184 if (idx >= bs*owners[j] && idx < bs*owners[j+1]) { 185 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 186 } 187 } 188 } 189 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 190 191 /* inform other processors of number of messages and max length*/ 192 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 193 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 194 nreceives = work[rank]; 195 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 196 nmax = work[rank]; 197 PetscFree(work); 198 /* post receives: 199 since we don't know how long each individual message is we 200 allocate the largest needed buffer for each receive. Potentially 201 this is a lot of wasted space. 202 */ 203 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(sizeof(Scalar)+2*sizeof(int))); 204 CHKPTRQ(rvalues); 205 rindices = (int *) (rvalues + nreceives*nmax); 206 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*2*sizeof(MPI_Request)); 207 CHKPTRQ(recv_waits); 208 for ( i=0,count=0; i<nreceives; i++ ) { 209 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 210 recv_waits+count++); CHKERRQ(ierr); 211 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 212 recv_waits+count++); CHKERRQ(ierr); 213 } 214 215 /* do sends: 216 1) starts[i] gives the starting index in svalues for stuff going to 217 the ith processor 218 */ 219 svalues = (Scalar *) PetscMalloc((stash->n+1)*(sizeof(Scalar)+2*sizeof(int))); 220 CHKPTRQ(svalues); 221 sindices = (int *) (svalues + stash->n); 222 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 223 CHKPTRQ(send_waits); 224 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 225 starti = startv + size; 226 /* use 2 sends the first with all_a, the next with all_i and then all_j */ 227 startv[0] = 0; starti[0] = 0; 228 for ( i=1; i<size; i++ ) { 229 startv[i] = startv[i-1] + nprocs[i-1]; 230 starti[i] = starti[i-1] + nprocs[i-1]*2; 231 } 232 for ( i=0; i<stash->n; i++ ) { 233 j = owner[i]; 234 svalues[startv[j]] = stash->array[i]; 235 sindices[starti[j]] = stash->idx[i]; 236 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 237 startv[j]++; 238 starti[j]++; 239 } 240 startv[0] = 0; 241 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 242 for ( i=0,count=0; i<size; i++ ) { 243 if (procs[i]) { 244 ierr = MPI_Isend(svalues+startv[i],nprocs[i],MPIU_SCALAR,i,tag1,comm, 245 send_waits+count++);CHKERRQ(ierr); 246 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 247 send_waits+count++);CHKERRQ(ierr); 248 } 249 } 250 PetscFree(owner); 251 PetscFree(startv); 252 PetscFree(nprocs); 253 stash->svalues = svalues; stash->rvalues = rvalues; 254 stash->nsends = nsends; stash->nrecvs = nreceives; 255 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 256 stash->rmax = nmax; 257 258 PetscFunctionReturn(0); 259 } 260 261 262 #undef __FUNC__ 263 #define __FUNC__ "StashScatterEnd_Private" 264 int StashScatterEnd_Private(Stash *stash) 265 { 266 int i,ierr,size,*nprocs; 267 MPI_Status *send_status,*recv_status; 268 int i1,i2,s1,s2,count,*rindices; 269 270 PetscFunctionBegin; 271 272 /* wait on receives */ 273 ierr = MPI_Comm_size(stash->comm,&size); CHKERRQ(ierr); 274 if (stash->nrecvs) { 275 recv_status = (MPI_Status *) PetscMalloc(2*(stash->nrecvs+1)*sizeof(MPI_Status)); 276 CHKPTRQ(recv_status); 277 ierr = MPI_Waitall(2*stash->nrecvs,stash->recv_waits,recv_status);CHKERRQ(ierr); 278 /* Now pack the received messages into a structure which is useable by others */ 279 stash->rdata = (Stash_rdata *) PetscMalloc(stash->nrecvs*sizeof(Stash_rdata)); 280 CHKPTRQ(stash->rdata); 281 nprocs = (int *) PetscMalloc((2*size+1)*sizeof(int)); CHKPTRQ(nprocs); 282 rindices = (int *) (stash->rvalues + stash->rmax*stash->nrecvs); 283 for (i=0; i<2*size+1; i++ ) nprocs[i] = -1; 284 for ( i=0; i<stash->nrecvs; i++ ){ 285 nprocs[2*recv_status[2*i].MPI_SOURCE] = i; 286 nprocs[2*recv_status[2*i+1].MPI_SOURCE+1] = i; 287 } 288 for (i=0,count=0; i<size; i++) { 289 i1 = nprocs[2*i]; 290 i2 = nprocs[2*i+1]; 291 if (i1 != -1) { 292 if (i2 == -1) SETERRQ(1,0,"Internal Error"); 293 ierr = MPI_Get_count(recv_status+2*i1,MPIU_SCALAR,&s1);CHKERRQ(ierr); 294 ierr = MPI_Get_count(recv_status+2*i2+1,MPI_INT,&s2);CHKERRQ(ierr); 295 if (s1*2 != s2) SETERRQ(1,0,"Internal Error"); 296 stash->rdata[count].a = stash->rvalues + i1*stash->rmax; 297 stash->rdata[count].i = rindices + 2*i2*stash->rmax; 298 stash->rdata[count].j = stash->rdata[count].i + s1; 299 stash->rdata[count].n = s1; 300 count ++; 301 } 302 } 303 PetscFree(recv_status); 304 PetscFree(nprocs); 305 } 306 /* wait on sends */ 307 if (stash->nsends) { 308 send_status = (MPI_Status *)PetscMalloc(2*stash->nsends*sizeof(MPI_Status)); 309 CHKPTRQ(send_status); 310 ierr = MPI_Waitall(2*stash->nsends,stash->send_waits,send_status);CHKERRQ(ierr); 311 PetscFree(send_status); 312 } 313 PetscFunctionReturn(0); 314 } 315