1 2 #include <petsc-private/matimpl.h> 3 4 #define DEFAULT_STASH_SIZE 10000 5 6 /* 7 MatStashCreate_Private - Creates a stash,currently used for all the parallel 8 matrix implementations. The stash is where elements of a matrix destined 9 to be stored on other processors are kept until matrix assembly is done. 10 11 This is a simple minded stash. Simply adds entries to end of stash. 12 13 Input Parameters: 14 comm - communicator, required for scatters. 15 bs - stash block size. used when stashing blocks of values 16 17 Output Parameters: 18 stash - the newly created stash 19 */ 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatStashCreate_Private" 22 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 23 { 24 PetscErrorCode ierr; 25 PetscInt max,*opt,nopt,i; 26 PetscBool flg; 27 28 PetscFunctionBegin; 29 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 30 stash->comm = comm; 31 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 33 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 34 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 35 ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 36 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 37 38 39 nopt = stash->size; 40 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 41 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 42 if (flg) { 43 if (nopt == 1) max = opt[0]; 44 else if (nopt == stash->size) max = opt[stash->rank]; 45 else if (stash->rank < nopt) max = opt[stash->rank]; 46 else max = 0; /* Use default */ 47 stash->umax = max; 48 } else { 49 stash->umax = 0; 50 } 51 ierr = PetscFree(opt);CHKERRQ(ierr); 52 if (bs <= 0) bs = 1; 53 54 stash->bs = bs; 55 stash->nmax = 0; 56 stash->oldnmax = 0; 57 stash->n = 0; 58 stash->reallocs = -1; 59 stash->space_head = 0; 60 stash->space = 0; 61 62 stash->send_waits = 0; 63 stash->recv_waits = 0; 64 stash->send_status = 0; 65 stash->nsends = 0; 66 stash->nrecvs = 0; 67 stash->svalues = 0; 68 stash->rvalues = 0; 69 stash->rindices = 0; 70 stash->nprocessed = 0; 71 72 stash->reproduce = PETSC_FALSE; 73 ierr = PetscOptionsGetBool(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 /* 78 MatStashDestroy_Private - Destroy the stash 79 */ 80 #undef __FUNCT__ 81 #define __FUNCT__ "MatStashDestroy_Private" 82 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 83 { 84 PetscErrorCode ierr; 85 86 PetscFunctionBegin; 87 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 88 stash->space = 0; 89 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 /* 94 MatStashScatterEnd_Private - This is called as the final stage of 95 scatter. The final stages of message passing is done here, and 96 all the memory used for message passing is cleaned up. This 97 routine also resets the stash, and deallocates the memory used 98 for the stash. It also keeps track of the current memory usage 99 so that the same value can be used the next time through. 100 */ 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatStashScatterEnd_Private" 103 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 104 { 105 PetscErrorCode ierr; 106 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 107 MPI_Status *send_status; 108 109 PetscFunctionBegin; 110 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 111 /* wait on sends */ 112 if (nsends) { 113 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 114 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 115 ierr = PetscFree(send_status);CHKERRQ(ierr); 116 } 117 118 /* Now update nmaxold to be app 10% more than max n used, this way the 119 wastage of space is reduced the next time this stash is used. 120 Also update the oldmax, only if it increases */ 121 if (stash->n) { 122 bs2 = stash->bs*stash->bs; 123 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 124 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 125 } 126 127 stash->nmax = 0; 128 stash->n = 0; 129 stash->reallocs = -1; 130 stash->nprocessed = 0; 131 ierr = PetscMatStashSpaceDestroy(&stash->space_head);CHKERRQ(ierr); 132 stash->space = 0; 133 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 134 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 135 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 136 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 137 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 138 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 139 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* 144 MatStashGetInfo_Private - Gets the relavant statistics of the stash 145 146 Input Parameters: 147 stash - the stash 148 nstash - the size of the stash. Indicates the number of values stored. 149 reallocs - the number of additional mallocs incurred. 150 151 */ 152 #undef __FUNCT__ 153 #define __FUNCT__ "MatStashGetInfo_Private" 154 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 155 { 156 PetscInt bs2 = stash->bs*stash->bs; 157 158 PetscFunctionBegin; 159 if (nstash) *nstash = stash->n*bs2; 160 if (reallocs) { 161 if (stash->reallocs < 0) *reallocs = 0; 162 else *reallocs = stash->reallocs; 163 } 164 PetscFunctionReturn(0); 165 } 166 167 /* 168 MatStashSetInitialSize_Private - Sets the initial size of the stash 169 170 Input Parameters: 171 stash - the stash 172 max - the value that is used as the max size of the stash. 173 this value is used while allocating memory. 174 */ 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatStashSetInitialSize_Private" 177 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 178 { 179 PetscFunctionBegin; 180 stash->umax = max; 181 PetscFunctionReturn(0); 182 } 183 184 /* MatStashExpand_Private - Expand the stash. This function is called 185 when the space in the stash is not sufficient to add the new values 186 being inserted into the stash. 187 188 Input Parameters: 189 stash - the stash 190 incr - the minimum increase requested 191 192 Notes: 193 This routine doubles the currently used memory. 194 */ 195 #undef __FUNCT__ 196 #define __FUNCT__ "MatStashExpand_Private" 197 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 198 { 199 PetscErrorCode ierr; 200 PetscInt newnmax,bs2= stash->bs*stash->bs; 201 202 PetscFunctionBegin; 203 /* allocate a larger stash */ 204 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 205 if (stash->umax) newnmax = stash->umax/bs2; 206 else newnmax = DEFAULT_STASH_SIZE/bs2; 207 } else if (!stash->nmax) { /* resuing stash */ 208 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 209 else newnmax = stash->oldnmax/bs2; 210 } else newnmax = stash->nmax*2; 211 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 212 213 /* Get a MatStashSpace and attach it to stash */ 214 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 215 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 216 stash->space_head = stash->space; 217 } 218 219 stash->reallocs++; 220 stash->nmax = newnmax; 221 PetscFunctionReturn(0); 222 } 223 /* 224 MatStashValuesRow_Private - inserts values into the stash. This function 225 expects the values to be roworiented. Multiple columns belong to the same row 226 can be inserted with a single call to this function. 227 228 Input Parameters: 229 stash - the stash 230 row - the global row correspoiding to the values 231 n - the number of elements inserted. All elements belong to the above row. 232 idxn - the global column indices corresponding to each of the values. 233 values - the values inserted 234 */ 235 #undef __FUNCT__ 236 #define __FUNCT__ "MatStashValuesRow_Private" 237 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries) 238 { 239 PetscErrorCode ierr; 240 PetscInt i,k,cnt = 0; 241 PetscMatStashSpace space=stash->space; 242 243 PetscFunctionBegin; 244 /* Check and see if we have sufficient memory */ 245 if (!space || space->local_remaining < n) { 246 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 247 } 248 space = stash->space; 249 k = space->local_used; 250 for (i=0; i<n; i++) { 251 if (ignorezeroentries && (values[i] == 0.0)) continue; 252 space->idx[k] = row; 253 space->idy[k] = idxn[i]; 254 space->val[k] = values[i]; 255 k++; 256 cnt++; 257 } 258 stash->n += cnt; 259 space->local_used += cnt; 260 space->local_remaining -= cnt; 261 PetscFunctionReturn(0); 262 } 263 264 /* 265 MatStashValuesCol_Private - inserts values into the stash. This function 266 expects the values to be columnoriented. Multiple columns belong to the same row 267 can be inserted with a single call to this function. 268 269 Input Parameters: 270 stash - the stash 271 row - the global row correspoiding to the values 272 n - the number of elements inserted. All elements belong to the above row. 273 idxn - the global column indices corresponding to each of the values. 274 values - the values inserted 275 stepval - the consecutive values are sepated by a distance of stepval. 276 this happens because the input is columnoriented. 277 */ 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatStashValuesCol_Private" 280 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries) 281 { 282 PetscErrorCode ierr; 283 PetscInt i,k,cnt = 0; 284 PetscMatStashSpace space=stash->space; 285 286 PetscFunctionBegin; 287 /* Check and see if we have sufficient memory */ 288 if (!space || space->local_remaining < n) { 289 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 290 } 291 space = stash->space; 292 k = space->local_used; 293 for (i=0; i<n; i++) { 294 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 295 space->idx[k] = row; 296 space->idy[k] = idxn[i]; 297 space->val[k] = values[i*stepval]; 298 k++; 299 cnt++; 300 } 301 stash->n += cnt; 302 space->local_used += cnt; 303 space->local_remaining -= cnt; 304 PetscFunctionReturn(0); 305 } 306 307 /* 308 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 309 This function expects the values to be roworiented. Multiple columns belong 310 to the same block-row can be inserted with a single call to this function. 311 This function extracts the sub-block of values based on the dimensions of 312 the original input block, and the row,col values corresponding to the blocks. 313 314 Input Parameters: 315 stash - the stash 316 row - the global block-row correspoiding to the values 317 n - the number of elements inserted. All elements belong to the above row. 318 idxn - the global block-column indices corresponding to each of the blocks of 319 values. Each block is of size bs*bs. 320 values - the values inserted 321 rmax - the number of block-rows in the original block. 322 cmax - the number of block-columsn on the original block. 323 idx - the index of the current block-row in the original block. 324 */ 325 #undef __FUNCT__ 326 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 327 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 328 { 329 PetscErrorCode ierr; 330 PetscInt i,j,k,bs2,bs=stash->bs,l; 331 const PetscScalar *vals; 332 PetscScalar *array; 333 PetscMatStashSpace space=stash->space; 334 335 PetscFunctionBegin; 336 if (!space || space->local_remaining < n) { 337 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 338 } 339 space = stash->space; 340 l = space->local_used; 341 bs2 = bs*bs; 342 for (i=0; i<n; i++) { 343 space->idx[l] = row; 344 space->idy[l] = idxn[i]; 345 /* Now copy over the block of values. Store the values column oriented. 346 This enables inserting multiple blocks belonging to a row with a single 347 funtion call */ 348 array = space->val + bs2*l; 349 vals = values + idx*bs2*n + bs*i; 350 for (j=0; j<bs; j++) { 351 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 352 array++; 353 vals += cmax*bs; 354 } 355 l++; 356 } 357 stash->n += n; 358 space->local_used += n; 359 space->local_remaining -= n; 360 PetscFunctionReturn(0); 361 } 362 363 /* 364 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 365 This function expects the values to be roworiented. Multiple columns belong 366 to the same block-row can be inserted with a single call to this function. 367 This function extracts the sub-block of values based on the dimensions of 368 the original input block, and the row,col values corresponding to the blocks. 369 370 Input Parameters: 371 stash - the stash 372 row - the global block-row correspoiding to the values 373 n - the number of elements inserted. All elements belong to the above row. 374 idxn - the global block-column indices corresponding to each of the blocks of 375 values. Each block is of size bs*bs. 376 values - the values inserted 377 rmax - the number of block-rows in the original block. 378 cmax - the number of block-columsn on the original block. 379 idx - the index of the current block-row in the original block. 380 */ 381 #undef __FUNCT__ 382 #define __FUNCT__ "MatStashValuesColBlocked_Private" 383 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 384 { 385 PetscErrorCode ierr; 386 PetscInt i,j,k,bs2,bs=stash->bs,l; 387 const PetscScalar *vals; 388 PetscScalar *array; 389 PetscMatStashSpace space=stash->space; 390 391 PetscFunctionBegin; 392 if (!space || space->local_remaining < n) { 393 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 394 } 395 space = stash->space; 396 l = space->local_used; 397 bs2 = bs*bs; 398 for (i=0; i<n; i++) { 399 space->idx[l] = row; 400 space->idy[l] = idxn[i]; 401 /* Now copy over the block of values. Store the values column oriented. 402 This enables inserting multiple blocks belonging to a row with a single 403 funtion call */ 404 array = space->val + bs2*l; 405 vals = values + idx*bs2*n + bs*i; 406 for (j=0; j<bs; j++) { 407 for (k=0; k<bs; k++) {array[k] = vals[k];} 408 array += bs; 409 vals += rmax*bs; 410 } 411 l++; 412 } 413 stash->n += n; 414 space->local_used += n; 415 space->local_remaining -= n; 416 PetscFunctionReturn(0); 417 } 418 /* 419 MatStashScatterBegin_Private - Initiates the transfer of values to the 420 correct owners. This function goes through the stash, and check the 421 owners of each stashed value, and sends the values off to the owner 422 processors. 423 424 Input Parameters: 425 stash - the stash 426 owners - an array of size 'no-of-procs' which gives the ownership range 427 for each node. 428 429 Notes: The 'owners' array in the cased of the blocked-stash has the 430 ranges specified blocked global indices, and for the regular stash in 431 the proper global indices. 432 */ 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatStashScatterBegin_Private" 435 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 436 { 437 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 438 PetscInt size=stash->size,nsends; 439 PetscErrorCode ierr; 440 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 441 PetscScalar **rvalues,*svalues; 442 MPI_Comm comm = stash->comm; 443 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 444 PetscMPIInt *nprocs,*nlengths,nreceives; 445 PetscInt *sp_idx,*sp_idy; 446 PetscScalar *sp_val; 447 PetscMatStashSpace space,space_next; 448 449 PetscFunctionBegin; 450 bs2 = stash->bs*stash->bs; 451 452 /* first count number of contributors to each processor */ 453 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 454 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 455 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 456 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 457 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 458 459 i = j = 0; 460 lastidx = -1; 461 space = stash->space_head; 462 while (space != PETSC_NULL) { 463 space_next = space->next; 464 sp_idx = space->idx; 465 for (l=0; l<space->local_used; l++) { 466 /* if indices are NOT locally sorted, need to start search at the beginning */ 467 if (lastidx > (idx = sp_idx[l])) j = 0; 468 lastidx = idx; 469 for (; j<size; j++) { 470 if (idx >= owners[j] && idx < owners[j+1]) { 471 nlengths[j]++; owner[i] = j; break; 472 } 473 } 474 i++; 475 } 476 space = space_next; 477 } 478 /* Now check what procs get messages - and compute nsends. */ 479 for (i=0, nsends=0 ; i<size; i++) { 480 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 481 } 482 483 {PetscMPIInt *onodes,*olengths; 484 /* Determine the number of messages to expect, their lengths, from from-ids */ 485 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 486 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 487 /* since clubbing row,col - lengths are multiplied by 2 */ 488 for (i=0; i<nreceives; i++) olengths[i] *=2; 489 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 490 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 491 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 492 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 493 ierr = PetscFree(onodes);CHKERRQ(ierr); 494 ierr = PetscFree(olengths);CHKERRQ(ierr); 495 } 496 497 /* do sends: 498 1) starts[i] gives the starting index in svalues for stuff going to 499 the ith processor 500 */ 501 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 502 ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 503 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 504 /* use 2 sends the first with all_a, the next with all_i and all_j */ 505 startv[0] = 0; starti[0] = 0; 506 for (i=1; i<size; i++) { 507 startv[i] = startv[i-1] + nlengths[i-1]; 508 starti[i] = starti[i-1] + 2*nlengths[i-1]; 509 } 510 511 i = 0; 512 space = stash->space_head; 513 while (space != PETSC_NULL) { 514 space_next = space->next; 515 sp_idx = space->idx; 516 sp_idy = space->idy; 517 sp_val = space->val; 518 for (l=0; l<space->local_used; l++) { 519 j = owner[i]; 520 if (bs2 == 1) { 521 svalues[startv[j]] = sp_val[l]; 522 } else { 523 PetscInt k; 524 PetscScalar *buf1,*buf2; 525 buf1 = svalues+bs2*startv[j]; 526 buf2 = space->val + bs2*l; 527 for (k=0; k<bs2; k++) { buf1[k] = buf2[k]; } 528 } 529 sindices[starti[j]] = sp_idx[l]; 530 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 531 startv[j]++; 532 starti[j]++; 533 i++; 534 } 535 space = space_next; 536 } 537 startv[0] = 0; 538 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 539 540 for (i=0,count=0; i<size; i++) { 541 if (nprocs[i]) { 542 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 543 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 544 } 545 } 546 #if defined(PETSC_USE_INFO) 547 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 548 for (i=0; i<size; i++) { 549 if (nprocs[i]) { 550 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 551 } 552 } 553 #endif 554 ierr = PetscFree(nlengths);CHKERRQ(ierr); 555 ierr = PetscFree(owner);CHKERRQ(ierr); 556 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 557 ierr = PetscFree(nprocs);CHKERRQ(ierr); 558 559 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 560 ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 561 562 for (i=0; i<nreceives; i++) { 563 recv_waits[2*i] = recv_waits1[i]; 564 recv_waits[2*i+1] = recv_waits2[i]; 565 } 566 stash->recv_waits = recv_waits; 567 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 568 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 569 570 stash->svalues = svalues; 571 stash->sindices = sindices; 572 stash->rvalues = rvalues; 573 stash->rindices = rindices; 574 stash->send_waits = send_waits; 575 stash->nsends = nsends; 576 stash->nrecvs = nreceives; 577 stash->reproduce_count = 0; 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 MatStashScatterGetMesg_Private - This function waits on the receives posted 583 in the function MatStashScatterBegin_Private() and returns one message at 584 a time to the calling function. If no messages are left, it indicates this 585 by setting flg = 0, else it sets flg = 1. 586 587 Input Parameters: 588 stash - the stash 589 590 Output Parameters: 591 nvals - the number of entries in the current message. 592 rows - an array of row indices (or blocked indices) corresponding to the values 593 cols - an array of columnindices (or blocked indices) corresponding to the values 594 vals - the values 595 flg - 0 indicates no more message left, and the current call has no values associated. 596 1 indicates that the current call successfully received a message, and the 597 other output parameters nvals,rows,cols,vals are set appropriately. 598 */ 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatStashScatterGetMesg_Private" 601 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 602 { 603 PetscErrorCode ierr; 604 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 605 PetscInt bs2; 606 MPI_Status recv_status; 607 PetscBool match_found = PETSC_FALSE; 608 609 PetscFunctionBegin; 610 611 *flg = 0; /* When a message is discovered this is reset to 1 */ 612 /* Return if no more messages to process */ 613 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 614 615 bs2 = stash->bs*stash->bs; 616 /* If a matching pair of receives are found, process them, and return the data to 617 the calling function. Until then keep receiving messages */ 618 while (!match_found) { 619 CHKMEMQ; 620 if (stash->reproduce) { 621 i = stash->reproduce_count++; 622 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 623 } else { 624 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 625 } 626 CHKMEMQ; 627 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 628 629 /* Now pack the received message into a structure which is usable by others */ 630 if (i % 2) { 631 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 632 flg_v[2*recv_status.MPI_SOURCE] = i/2; 633 *nvals = *nvals/bs2; 634 } else { 635 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 636 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 637 *nvals = *nvals/2; /* This message has both row indices and col indices */ 638 } 639 640 /* Check if we have both messages from this proc */ 641 i1 = flg_v[2*recv_status.MPI_SOURCE]; 642 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 643 if (i1 != -1 && i2 != -1) { 644 *rows = stash->rindices[i2]; 645 *cols = *rows + *nvals; 646 *vals = stash->rvalues[i1]; 647 *flg = 1; 648 stash->nprocessed ++; 649 match_found = PETSC_TRUE; 650 } 651 } 652 PetscFunctionReturn(0); 653 } 654