1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: stash.c,v 1.21 1999/03/09 21:33:15 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/matimpl.h" 6 7 #define DEFAULT_STASH_SIZE 10000 8 /* 9 This stash is currently used for all the parallel matrix implementations. 10 The stash is where elements of a matrix destined to be stored on other 11 processors are kept until matrix assembly is done. 12 13 This is a simple minded stash. Simply add entry to end of stash. 14 */ 15 16 #undef __FUNC__ 17 #define __FUNC__ "StashCreate_Private" 18 int StashCreate_Private(MPI_Comm comm,int bs_stash, int bs_mat, Stash *stash) 19 { 20 int ierr,flg,max=DEFAULT_STASH_SIZE; 21 22 PetscFunctionBegin; 23 /* Require 2 tags, get the second using PetscCommGetNewTag() */ 24 ierr = PetscCommDuplicate_Private(comm,&stash->comm,&stash->tag1);CHKERRQ(ierr); 25 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2); CHKERRQ(ierr); 26 ierr = OptionsGetInt(PETSC_NULL,"-stash_initial_size",&max,&flg);CHKERRQ(ierr); 27 ierr = StashSetInitialSize_Private(stash,max); CHKERRQ(ierr); 28 ierr = MPI_Comm_size(stash->comm,&stash->size); CHKERRQ(ierr); 29 ierr = MPI_Comm_rank(stash->comm,&stash->rank); CHKERRQ(ierr); 30 31 if (bs_stash <= 0) bs_stash = 1; 32 if (bs_mat <= 0) bs_mat = 1; 33 34 stash->bs_stash = bs_stash; 35 stash->bs_mat = bs_mat; 36 stash->nmax = 0; 37 stash->n = 0; 38 stash->reallocs = 0; 39 stash->idx = 0; 40 stash->idy = 0; 41 stash->array = 0; 42 43 stash->send_waits = 0; 44 stash->recv_waits = 0; 45 stash->send_status = 0; 46 stash->nsends = 0; 47 stash->nrecvs = 0; 48 stash->svalues = 0; 49 stash->rvalues = 0; 50 stash->rmax = 0; 51 stash->nprocs = 0; 52 stash->nprocessed = 0; 53 PetscFunctionReturn(0); 54 } 55 56 #undef __FUNC__ 57 #define __FUNC__ "StashDestroy_Private" 58 int StashDestroy_Private(Stash *stash) 59 { 60 int ierr; 61 62 PetscFunctionBegin; 63 ierr = PetscCommDestroy_Private(&stash->comm); CHKERRQ(ierr); 64 if (stash->array) {PetscFree(stash->array); stash->array = 0;} 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNC__ 69 #define __FUNC__ "StashScatterEnd_Private" 70 int StashScatterEnd_Private(Stash *stash) 71 { 72 int nsends=stash->nsends,ierr; 73 MPI_Status *send_status; 74 75 PetscFunctionBegin; 76 /* wait on sends */ 77 if (nsends) { 78 send_status = (MPI_Status *)PetscMalloc(2*nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 79 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 80 PetscFree(send_status); 81 } 82 83 /* Now update nmaxold to be app 10% more than nmax, this way the 84 wastage of space is reduced the next time this stash is used */ 85 stash->oldnmax = (int)(stash->nmax * 1.1) + 5; 86 stash->nmax = 0; 87 stash->n = 0; 88 stash->reallocs = 0; 89 stash->rmax = 0; 90 stash->nprocessed = 0; 91 92 if (stash->array) { 93 PetscFree(stash->array); 94 stash->array = 0; 95 stash->idx = 0; 96 stash->idy = 0; 97 } 98 if (stash->send_waits) {PetscFree(stash->send_waits);stash->send_waits = 0;} 99 if (stash->recv_waits) {PetscFree(stash->recv_waits);stash->recv_waits = 0;} 100 if (stash->svalues) {PetscFree(stash->svalues);stash->svalues = 0;} 101 if (stash->rvalues) {PetscFree(stash->rvalues); stash->rvalues = 0;} 102 if (stash->nprocs) {PetscFree(stash->nprocs); stash->nprocs = 0;} 103 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNC__ 108 #define __FUNC__ "StashInfo_Private" 109 int StashInfo_Private(Stash *stash) 110 { 111 PetscFunctionBegin; 112 PLogInfo(0,"StashInfo_Private:Stash size %d, mallocs incured %d\n",stash->n,stash->reallocs); 113 PetscFunctionReturn(0); 114 } 115 #undef __FUNC__ 116 #define __FUNC__ "StashSetInitialSize_Private" 117 int StashSetInitialSize_Private(Stash *stash,int max) 118 { 119 PetscFunctionBegin; 120 stash->oldnmax = max; 121 stash->nmax = 0; 122 PetscFunctionReturn(0); 123 } 124 125 #undef __FUNC__ 126 #define __FUNC__ "StashExpand_Private" 127 static int StashExpand_Private(Stash *stash) 128 { 129 int *n_idx,*n_idy,newnmax,bs2; 130 Scalar *n_array; 131 132 PetscFunctionBegin; 133 /* allocate a larger stash */ 134 if (stash->nmax == 0) newnmax = stash->oldnmax; 135 else newnmax = stash->nmax*2; 136 137 bs2 = stash->bs_stash*stash->bs_stash; 138 n_array = (Scalar *)PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(Scalar)));CHKPTRQ(n_array); 139 n_idx = (int *) (n_array + bs2*newnmax); 140 n_idy = (int *) (n_idx + newnmax); 141 PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(Scalar)); 142 PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int)); 143 PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int)); 144 if (stash->array) PetscFree(stash->array); 145 stash->array = n_array; 146 stash->idx = n_idx; 147 stash->idy = n_idy; 148 stash->nmax = newnmax; 149 stash->oldnmax = newnmax; 150 stash->reallocs++; 151 PetscFunctionReturn(0); 152 } 153 154 /* 155 Should do this properly. With a sorted array. 156 */ 157 #undef __FUNC__ 158 #define __FUNC__ "StashValues_Private" 159 int StashValues_Private(Stash *stash,int row,int n, int *idxn,Scalar *values) 160 { 161 int ierr,i; 162 Scalar val; 163 164 PetscFunctionBegin; 165 for ( i=0; i<n; i++ ) { 166 if ( stash->n == stash->nmax ) { 167 ierr = StashExpand_Private(stash); CHKERRQ(ierr); 168 } 169 stash->idx[stash->n] = row; 170 stash->idy[stash->n] = idxn[i]; 171 stash->array[stash->n] = values[i]; 172 stash->n++; 173 } 174 PetscFunctionReturn(0); 175 } 176 177 #undef __FUNC__ 178 #define __FUNC__ "StashValuesBlocked_Private" 179 int StashValuesBlocked_Private(Stash *stash,int row,int n,int *idxn,Scalar *values, 180 int rmax,int cmax,int roworiented,int idx) 181 { 182 int ierr,i,j,k,found,bs2,bs=stash->bs_stash; 183 Scalar *vals,*array; 184 185 /* stepval gives the offset that one should use to access the next line of 186 a given block of values */ 187 188 PetscFunctionBegin; 189 bs2 = bs*bs; 190 for ( i=0; i<n; i++ ) { 191 if ( stash->n == stash->nmax ) { 192 ierr = StashExpand_Private(stash); CHKERRQ(ierr); 193 } 194 stash->idx[stash->n] = row; 195 stash->idy[stash->n] = idxn[i]; 196 /* Now copy over the block of values. Store the values column oriented. 197 This enables inserting multiple blocks belonging to a row with a single 198 funtion call */ 199 if (roworiented) { 200 array = stash->array + bs2*stash->n; 201 vals = values + idx*bs2*n + bs*i; 202 for ( j=0; j<bs; j++ ) { 203 for ( k=0; k<bs; k++ ) {array[k*bs] = vals[k];} 204 array += 1; 205 vals += cmax*bs; 206 } 207 } else { 208 array = stash->array + bs2*stash->n; 209 vals = values + idx*bs + bs2*rmax*i; 210 for ( j=0; j<bs; j++ ) { 211 for ( k=0; k<bs; k++ ) {array[k] = vals[k];} 212 array += bs; 213 vals += rmax*bs; 214 } 215 } 216 stash->n++; 217 } 218 PetscFunctionReturn(0); 219 } 220 221 #undef __FUNC__ 222 #define __FUNC__ "StashScatterBegin_Private" 223 int StashScatterBegin_Private(Stash *stash,int *owners) 224 { 225 int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 226 int rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives; 227 int nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,mult; 228 Scalar *rvalues,*svalues; 229 MPI_Comm comm = stash->comm; 230 MPI_Request *send_waits,*recv_waits; 231 232 PetscFunctionBegin; 233 234 bs2 = stash->bs_stash*stash->bs_stash; 235 /* first count number of contributors to each processor */ 236 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 237 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 238 owner = (int *) PetscMalloc( (stash->n+1)*sizeof(int) ); CHKPTRQ(owner); 239 240 /* if blockstash, then the the owners and row,col indices 241 correspond to reduced indices */ 242 if (stash->bs_stash == 1) mult = stash->bs_mat; 243 else mult = 1; 244 245 for ( i=0; i<stash->n; i++ ) { 246 idx = stash->idx[i]; 247 for ( j=0; j<size; j++ ) { 248 if (idx >= mult*owners[j] && idx < mult*owners[j+1]) { 249 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 250 } 251 } 252 } 253 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 254 255 /* inform other processors of number of messages and max length*/ 256 work = (int *)PetscMalloc(size*sizeof(int)); CHKPTRQ(work); 257 ierr = MPI_Allreduce(procs,work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 258 nreceives = work[rank]; 259 ierr = MPI_Allreduce(nprocs,work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 260 nmax = work[rank]; 261 PetscFree(work); 262 /* post receives: 263 since we don't know how long each individual message is we 264 allocate the largest needed buffer for each receive. Potentially 265 this is a lot of wasted space. 266 */ 267 rvalues = (Scalar *)PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(rvalues); 268 rindices = (int *) (rvalues + bs2*nreceives*nmax); 269 recv_waits = (MPI_Request *)PetscMalloc((nreceives+1)*2*sizeof(MPI_Request));CHKPTRQ(recv_waits); 270 for ( i=0,count=0; i<nreceives; i++ ) { 271 ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm, 272 recv_waits+count++); CHKERRQ(ierr); 273 ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm, 274 recv_waits+count++); CHKERRQ(ierr); 275 } 276 277 /* do sends: 278 1) starts[i] gives the starting index in svalues for stuff going to 279 the ith processor 280 */ 281 svalues = (Scalar *)PetscMalloc((stash->n+1)*(bs2*sizeof(Scalar)+2*sizeof(int)));CHKPTRQ(svalues); 282 sindices = (int *) (svalues + bs2*stash->n); 283 send_waits = (MPI_Request *) PetscMalloc(2*(nsends+1)*sizeof(MPI_Request)); 284 CHKPTRQ(send_waits); 285 startv = (int *) PetscMalloc(2*size*sizeof(int) ); CHKPTRQ(startv); 286 starti = startv + size; 287 /* use 2 sends the first with all_a, the next with all_i and all_j */ 288 startv[0] = 0; starti[0] = 0; 289 for ( i=1; i<size; i++ ) { 290 startv[i] = startv[i-1] + nprocs[i-1]; 291 starti[i] = starti[i-1] + nprocs[i-1]*2; 292 } 293 for ( i=0; i<stash->n; i++ ) { 294 j = owner[i]; 295 if (bs2 == 1) { 296 svalues[startv[j]] = stash->array[i]; 297 } else { 298 PetscMemcpy(svalues+bs2*startv[j],stash->array+bs2*i,bs2*sizeof(Scalar)); 299 } 300 sindices[starti[j]] = stash->idx[i]; 301 sindices[starti[j]+nprocs[j]] = stash->idy[i]; 302 startv[j]++; 303 starti[j]++; 304 } 305 startv[0] = 0; 306 for ( i=1; i<size; i++ ) { startv[i] = startv[i-1] + nprocs[i-1];} 307 for ( i=0,count=0; i<size; i++ ) { 308 if (procs[i]) { 309 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[i],MPIU_SCALAR,i,tag1,comm, 310 send_waits+count++);CHKERRQ(ierr); 311 ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[i],MPI_INT,i,tag2,comm, 312 send_waits+count++);CHKERRQ(ierr); 313 } 314 } 315 PetscFree(owner); 316 PetscFree(startv); 317 /* This memory is reused in scatter end for a different purpose*/ 318 for (i=0; i<2*size; i++ ) nprocs[i] = -1; 319 stash->nprocs = nprocs; 320 321 stash->svalues = svalues; stash->rvalues = rvalues; 322 stash->nsends = nsends; stash->nrecvs = nreceives; 323 stash->send_waits = send_waits; stash->recv_waits = recv_waits; 324 stash->rmax = nmax; 325 PetscFunctionReturn(0); 326 } 327 328 /* 329 This function waits on the receives posted in the function 330 StashScatterBegin_Private() and returns one message at a time to 331 the calling function. If no messages are left, it indicates this by 332 setting flg = 0, else it sets flg = 1. 333 */ 334 #undef __FUNC__ 335 #define __FUNC__ "StashScatterGetMesg_Private" 336 int StashScatterGetMesg_Private(Stash *stash,int *nvals,int **rows,int** cols,Scalar **vals,int *flg) 337 { 338 int i,ierr,size=stash->size,*flg_v,*flg_i; 339 int i1,i2,*rindices,match_found=0,bs2; 340 MPI_Status recv_status; 341 342 PetscFunctionBegin; 343 344 *flg = 0; /* When a message is discovered this is reset to 1 */ 345 /* Return if no more messages to process */ 346 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 347 348 flg_v = stash->nprocs; 349 flg_i = flg_v + size; 350 bs2 = stash->bs_stash*stash->bs_stash; 351 /* If a matching pair of receieves are found, process them, and return the data to 352 the calling function. Until then keep receiving messages */ 353 while (!match_found) { 354 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 355 /* Now pack the received message into a structure which is useable by others */ 356 if (i % 2) { 357 ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr); 358 flg_i[recv_status.MPI_SOURCE] = i/2; 359 *nvals = *nvals/2; /* This message has both row indices and col indices */ 360 } else { 361 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 362 flg_v[recv_status.MPI_SOURCE] = i/2; 363 *nvals = *nvals/bs2; 364 } 365 366 /* Check if we have both the messages from this proc */ 367 i1 = flg_v[recv_status.MPI_SOURCE]; 368 i2 = flg_i[recv_status.MPI_SOURCE]; 369 if (i1 != -1 && i2 != -1) { 370 rindices = (int *) (stash->rvalues + bs2*stash->rmax*stash->nrecvs); 371 *rows = rindices + 2*i2*stash->rmax; 372 *cols = *rows + *nvals; 373 *vals = stash->rvalues + i1*bs2*stash->rmax; 374 *flg = 1; 375 stash->nprocessed ++; 376 match_found = 1; 377 } 378 } 379 PetscFunctionReturn(0); 380 } 381