1 #define PETSCMAT_DLL 2 3 #include "private/matimpl.h" 4 5 #define DEFAULT_STASH_SIZE 10000 6 7 /* 8 MatStashCreate_Private - Creates a stash,currently used for all the parallel 9 matrix implementations. The stash is where elements of a matrix destined 10 to be stored on other processors are kept until matrix assembly is done. 11 12 This is a simple minded stash. Simply adds entries to end of stash. 13 14 Input Parameters: 15 comm - communicator, required for scatters. 16 bs - stash block size. used when stashing blocks of values 17 18 Output Parameters: 19 stash - the newly created stash 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "MatStashCreate_Private" 23 PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 24 { 25 PetscErrorCode ierr; 26 PetscInt max,*opt,nopt,i; 27 PetscTruth flg; 28 29 PetscFunctionBegin; 30 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 31 stash->comm = comm; 32 ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 33 ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 34 ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 35 ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 36 ierr = PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);CHKERRQ(ierr); 37 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 38 39 40 nopt = stash->size; 41 ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 42 ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 43 if (flg) { 44 if (nopt == 1) max = opt[0]; 45 else if (nopt == stash->size) max = opt[stash->rank]; 46 else if (stash->rank < nopt) max = opt[stash->rank]; 47 else max = 0; /* Use default */ 48 stash->umax = max; 49 } else { 50 stash->umax = 0; 51 } 52 ierr = PetscFree(opt);CHKERRQ(ierr); 53 if (bs <= 0) bs = 1; 54 55 stash->bs = bs; 56 stash->nmax = 0; 57 stash->oldnmax = 0; 58 stash->n = 0; 59 stash->reallocs = -1; 60 stash->space_head = 0; 61 stash->space = 0; 62 63 stash->send_waits = 0; 64 stash->recv_waits = 0; 65 stash->send_status = 0; 66 stash->nsends = 0; 67 stash->nrecvs = 0; 68 stash->svalues = 0; 69 stash->rvalues = 0; 70 stash->rindices = 0; 71 stash->nprocessed = 0; 72 73 stash->reproduce = PETSC_FALSE; 74 ierr = PetscOptionsGetTruth(PETSC_NULL,"-matstash_reproduce",&stash->reproduce,PETSC_NULL);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 /* 79 MatStashDestroy_Private - Destroy the stash 80 */ 81 #undef __FUNCT__ 82 #define __FUNCT__ "MatStashDestroy_Private" 83 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 84 { 85 PetscErrorCode ierr; 86 87 PetscFunctionBegin; 88 if (stash->space_head){ 89 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 90 stash->space_head = 0; 91 stash->space = 0; 92 } 93 ierr = PetscFree(stash->flg_v);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* 98 MatStashScatterEnd_Private - This is called as the final stage of 99 scatter. The final stages of message passing is done here, and 100 all the memory used for message passing is cleaned up. This 101 routine also resets the stash, and deallocates the memory used 102 for the stash. It also keeps track of the current memory usage 103 so that the same value can be used the next time through. 104 */ 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatStashScatterEnd_Private" 107 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 108 { 109 PetscErrorCode ierr; 110 PetscInt nsends=stash->nsends,bs2,oldnmax,i; 111 MPI_Status *send_status; 112 113 PetscFunctionBegin; 114 for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1; 115 /* wait on sends */ 116 if (nsends) { 117 ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 118 ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 119 ierr = PetscFree(send_status);CHKERRQ(ierr); 120 } 121 122 /* Now update nmaxold to be app 10% more than max n used, this way the 123 wastage of space is reduced the next time this stash is used. 124 Also update the oldmax, only if it increases */ 125 if (stash->n) { 126 bs2 = stash->bs*stash->bs; 127 oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 128 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 129 } 130 131 stash->nmax = 0; 132 stash->n = 0; 133 stash->reallocs = -1; 134 stash->nprocessed = 0; 135 if (stash->space_head){ 136 ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 137 stash->space_head = 0; 138 stash->space = 0; 139 } 140 ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 141 stash->send_waits = 0; 142 ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 143 stash->recv_waits = 0; 144 ierr = PetscFree2(stash->svalues,stash->sindices);CHKERRQ(ierr); 145 stash->svalues = 0; 146 ierr = PetscFree(stash->rvalues[0]);CHKERRQ(ierr); 147 ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 148 stash->rvalues = 0; 149 ierr = PetscFree(stash->rindices[0]);CHKERRQ(ierr); 150 ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 151 stash->rindices = 0; 152 PetscFunctionReturn(0); 153 } 154 155 /* 156 MatStashGetInfo_Private - Gets the relavant statistics of the stash 157 158 Input Parameters: 159 stash - the stash 160 nstash - the size of the stash. Indicates the number of values stored. 161 reallocs - the number of additional mallocs incurred. 162 163 */ 164 #undef __FUNCT__ 165 #define __FUNCT__ "MatStashGetInfo_Private" 166 PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 167 { 168 PetscInt bs2 = stash->bs*stash->bs; 169 170 PetscFunctionBegin; 171 if (nstash) *nstash = stash->n*bs2; 172 if (reallocs) { 173 if (stash->reallocs < 0) *reallocs = 0; 174 else *reallocs = stash->reallocs; 175 } 176 PetscFunctionReturn(0); 177 } 178 179 /* 180 MatStashSetInitialSize_Private - Sets the initial size of the stash 181 182 Input Parameters: 183 stash - the stash 184 max - the value that is used as the max size of the stash. 185 this value is used while allocating memory. 186 */ 187 #undef __FUNCT__ 188 #define __FUNCT__ "MatStashSetInitialSize_Private" 189 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 190 { 191 PetscFunctionBegin; 192 stash->umax = max; 193 PetscFunctionReturn(0); 194 } 195 196 /* MatStashExpand_Private - Expand the stash. This function is called 197 when the space in the stash is not sufficient to add the new values 198 being inserted into the stash. 199 200 Input Parameters: 201 stash - the stash 202 incr - the minimum increase requested 203 204 Notes: 205 This routine doubles the currently used memory. 206 */ 207 #undef __FUNCT__ 208 #define __FUNCT__ "MatStashExpand_Private" 209 static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 210 { 211 PetscErrorCode ierr; 212 PetscInt newnmax,bs2= stash->bs*stash->bs; 213 214 PetscFunctionBegin; 215 /* allocate a larger stash */ 216 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 217 if (stash->umax) newnmax = stash->umax/bs2; 218 else newnmax = DEFAULT_STASH_SIZE/bs2; 219 } else if (!stash->nmax) { /* resuing stash */ 220 if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 221 else newnmax = stash->oldnmax/bs2; 222 } else newnmax = stash->nmax*2; 223 if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 224 225 /* Get a MatStashSpace and attach it to stash */ 226 ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 227 if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 228 stash->space_head = stash->space; 229 } 230 231 stash->reallocs++; 232 stash->nmax = newnmax; 233 PetscFunctionReturn(0); 234 } 235 /* 236 MatStashValuesRow_Private - inserts values into the stash. This function 237 expects the values to be roworiented. Multiple columns belong to the same row 238 can be inserted with a single call to this function. 239 240 Input Parameters: 241 stash - the stash 242 row - the global row correspoiding to the values 243 n - the number of elements inserted. All elements belong to the above row. 244 idxn - the global column indices corresponding to each of the values. 245 values - the values inserted 246 */ 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatStashValuesRow_Private" 249 PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries) 250 { 251 PetscErrorCode ierr; 252 PetscInt i,k,cnt = 0; 253 PetscMatStashSpace space=stash->space; 254 255 PetscFunctionBegin; 256 /* Check and see if we have sufficient memory */ 257 if (!space || space->local_remaining < n){ 258 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 259 } 260 space = stash->space; 261 k = space->local_used; 262 for (i=0; i<n; i++) { 263 if (ignorezeroentries && (values[i] == 0.0)) continue; 264 space->idx[k] = row; 265 space->idy[k] = idxn[i]; 266 space->val[k] = values[i]; 267 k++; 268 cnt++; 269 } 270 stash->n += cnt; 271 space->local_used += cnt; 272 space->local_remaining -= cnt; 273 PetscFunctionReturn(0); 274 } 275 276 /* 277 MatStashValuesCol_Private - inserts values into the stash. This function 278 expects the values to be columnoriented. Multiple columns belong to the same row 279 can be inserted with a single call to this function. 280 281 Input Parameters: 282 stash - the stash 283 row - the global row correspoiding to the values 284 n - the number of elements inserted. All elements belong to the above row. 285 idxn - the global column indices corresponding to each of the values. 286 values - the values inserted 287 stepval - the consecutive values are sepated by a distance of stepval. 288 this happens because the input is columnoriented. 289 */ 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatStashValuesCol_Private" 292 PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries) 293 { 294 PetscErrorCode ierr; 295 PetscInt i,k,cnt = 0; 296 PetscMatStashSpace space=stash->space; 297 298 PetscFunctionBegin; 299 /* Check and see if we have sufficient memory */ 300 if (!space || space->local_remaining < n){ 301 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 302 } 303 space = stash->space; 304 k = space->local_used; 305 for (i=0; i<n; i++) { 306 if (ignorezeroentries && (values[i*stepval] == 0.0)) continue; 307 space->idx[k] = row; 308 space->idy[k] = idxn[i]; 309 space->val[k] = values[i*stepval]; 310 k++; 311 cnt++; 312 } 313 stash->n += cnt; 314 space->local_used += cnt; 315 space->local_remaining -= cnt; 316 PetscFunctionReturn(0); 317 } 318 319 /* 320 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 321 This function expects the values to be roworiented. Multiple columns belong 322 to the same block-row can be inserted with a single call to this function. 323 This function extracts the sub-block of values based on the dimensions of 324 the original input block, and the row,col values corresponding to the blocks. 325 326 Input Parameters: 327 stash - the stash 328 row - the global block-row correspoiding to the values 329 n - the number of elements inserted. All elements belong to the above row. 330 idxn - the global block-column indices corresponding to each of the blocks of 331 values. Each block is of size bs*bs. 332 values - the values inserted 333 rmax - the number of block-rows in the original block. 334 cmax - the number of block-columsn on the original block. 335 idx - the index of the current block-row in the original block. 336 */ 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatStashValuesRowBlocked_Private" 339 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 340 { 341 PetscErrorCode ierr; 342 PetscInt i,j,k,bs2,bs=stash->bs,l; 343 const PetscScalar *vals; 344 PetscScalar *array; 345 PetscMatStashSpace space=stash->space; 346 347 PetscFunctionBegin; 348 if (!space || space->local_remaining < n){ 349 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 350 } 351 space = stash->space; 352 l = space->local_used; 353 bs2 = bs*bs; 354 for (i=0; i<n; i++) { 355 space->idx[l] = row; 356 space->idy[l] = idxn[i]; 357 /* Now copy over the block of values. Store the values column oriented. 358 This enables inserting multiple blocks belonging to a row with a single 359 funtion call */ 360 array = space->val + bs2*l; 361 vals = values + idx*bs2*n + bs*i; 362 for (j=0; j<bs; j++) { 363 for (k=0; k<bs; k++) array[k*bs] = vals[k]; 364 array++; 365 vals += cmax*bs; 366 } 367 l++; 368 } 369 stash->n += n; 370 space->local_used += n; 371 space->local_remaining -= n; 372 PetscFunctionReturn(0); 373 } 374 375 /* 376 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 377 This function expects the values to be roworiented. Multiple columns belong 378 to the same block-row can be inserted with a single call to this function. 379 This function extracts the sub-block of values based on the dimensions of 380 the original input block, and the row,col values corresponding to the blocks. 381 382 Input Parameters: 383 stash - the stash 384 row - the global block-row correspoiding to the values 385 n - the number of elements inserted. All elements belong to the above row. 386 idxn - the global block-column indices corresponding to each of the blocks of 387 values. Each block is of size bs*bs. 388 values - the values inserted 389 rmax - the number of block-rows in the original block. 390 cmax - the number of block-columsn on the original block. 391 idx - the index of the current block-row in the original block. 392 */ 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatStashValuesColBlocked_Private" 395 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 396 { 397 PetscErrorCode ierr; 398 PetscInt i,j,k,bs2,bs=stash->bs,l; 399 const PetscScalar *vals; 400 PetscScalar *array; 401 PetscMatStashSpace space=stash->space; 402 403 PetscFunctionBegin; 404 if (!space || space->local_remaining < n){ 405 ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 406 } 407 space = stash->space; 408 l = space->local_used; 409 bs2 = bs*bs; 410 for (i=0; i<n; i++) { 411 space->idx[l] = row; 412 space->idy[l] = idxn[i]; 413 /* Now copy over the block of values. Store the values column oriented. 414 This enables inserting multiple blocks belonging to a row with a single 415 funtion call */ 416 array = space->val + bs2*l; 417 vals = values + idx*bs2*n + bs*i; 418 for (j=0; j<bs; j++) { 419 for (k=0; k<bs; k++) {array[k] = vals[k];} 420 array += bs; 421 vals += rmax*bs; 422 } 423 l++; 424 } 425 stash->n += n; 426 space->local_used += n; 427 space->local_remaining -= n; 428 PetscFunctionReturn(0); 429 } 430 /* 431 MatStashScatterBegin_Private - Initiates the transfer of values to the 432 correct owners. This function goes through the stash, and check the 433 owners of each stashed value, and sends the values off to the owner 434 processors. 435 436 Input Parameters: 437 stash - the stash 438 owners - an array of size 'no-of-procs' which gives the ownership range 439 for each node. 440 441 Notes: The 'owners' array in the cased of the blocked-stash has the 442 ranges specified blocked global indices, and for the regular stash in 443 the proper global indices. 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatStashScatterBegin_Private" 447 PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners) 448 { 449 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 450 PetscInt size=stash->size,nsends; 451 PetscErrorCode ierr; 452 PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 453 PetscScalar **rvalues,*svalues; 454 MPI_Comm comm = stash->comm; 455 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 456 PetscMPIInt *nprocs,*nlengths,nreceives; 457 PetscInt *sp_idx,*sp_idy; 458 PetscScalar *sp_val; 459 PetscMatStashSpace space,space_next; 460 461 PetscFunctionBegin; 462 bs2 = stash->bs*stash->bs; 463 464 /* first count number of contributors to each processor */ 465 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 466 ierr = PetscMemzero(nprocs,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 467 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);CHKERRQ(ierr); 468 ierr = PetscMemzero(nlengths,size*sizeof(PetscMPIInt));CHKERRQ(ierr); 469 ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 470 471 i = j = 0; 472 lastidx = -1; 473 space = stash->space_head; 474 while (space != PETSC_NULL){ 475 space_next = space->next; 476 sp_idx = space->idx; 477 for (l=0; l<space->local_used; l++){ 478 /* if indices are NOT locally sorted, need to start search at the beginning */ 479 if (lastidx > (idx = sp_idx[l])) j = 0; 480 lastidx = idx; 481 for (; j<size; j++) { 482 if (idx >= owners[j] && idx < owners[j+1]) { 483 nlengths[j]++; owner[i] = j; break; 484 } 485 } 486 i++; 487 } 488 space = space_next; 489 } 490 /* Now check what procs get messages - and compute nsends. */ 491 for (i=0, nsends=0 ; i<size; i++) { 492 if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 493 } 494 495 {PetscMPIInt *onodes,*olengths; 496 /* Determine the number of messages to expect, their lengths, from from-ids */ 497 ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 498 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 499 /* since clubbing row,col - lengths are multiplied by 2 */ 500 for (i=0; i<nreceives; i++) olengths[i] *=2; 501 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 502 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 503 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 504 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 505 ierr = PetscFree(onodes);CHKERRQ(ierr); 506 ierr = PetscFree(olengths);CHKERRQ(ierr); 507 } 508 509 /* do sends: 510 1) starts[i] gives the starting index in svalues for stuff going to 511 the ith processor 512 */ 513 ierr = PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);CHKERRQ(ierr); 514 ierr = PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 515 ierr = PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);CHKERRQ(ierr); 516 /* use 2 sends the first with all_a, the next with all_i and all_j */ 517 startv[0] = 0; starti[0] = 0; 518 for (i=1; i<size; i++) { 519 startv[i] = startv[i-1] + nlengths[i-1]; 520 starti[i] = starti[i-1] + 2*nlengths[i-1]; 521 } 522 523 i = 0; 524 space = stash->space_head; 525 while (space != PETSC_NULL){ 526 space_next = space->next; 527 sp_idx = space->idx; 528 sp_idy = space->idy; 529 sp_val = space->val; 530 for (l=0; l<space->local_used; l++){ 531 j = owner[i]; 532 if (bs2 == 1) { 533 svalues[startv[j]] = sp_val[l]; 534 } else { 535 PetscInt k; 536 PetscScalar *buf1,*buf2; 537 buf1 = svalues+bs2*startv[j]; 538 buf2 = space->val + bs2*l; 539 for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 540 } 541 sindices[starti[j]] = sp_idx[l]; 542 sindices[starti[j]+nlengths[j]] = sp_idy[l]; 543 startv[j]++; 544 starti[j]++; 545 i++; 546 } 547 space = space_next; 548 } 549 startv[0] = 0; 550 for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 551 552 for (i=0,count=0; i<size; i++) { 553 if (nprocs[i]) { 554 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 555 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 556 } 557 } 558 #if defined(PETSC_USE_INFO) 559 ierr = PetscInfo1(mat,"No of messages: %d \n",nsends);CHKERRQ(ierr); 560 for (i=0; i<size; i++) { 561 if (nprocs[i]) { 562 ierr = PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 563 } 564 } 565 #endif 566 ierr = PetscFree(nlengths);CHKERRQ(ierr); 567 ierr = PetscFree(owner);CHKERRQ(ierr); 568 ierr = PetscFree2(startv,starti);CHKERRQ(ierr); 569 ierr = PetscFree(nprocs);CHKERRQ(ierr); 570 571 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 572 ierr = PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 573 574 for (i=0; i<nreceives; i++) { 575 recv_waits[2*i] = recv_waits1[i]; 576 recv_waits[2*i+1] = recv_waits2[i]; 577 } 578 stash->recv_waits = recv_waits; 579 ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 580 ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 581 582 stash->svalues = svalues; 583 stash->sindices = sindices; 584 stash->rvalues = rvalues; 585 stash->rindices = rindices; 586 stash->send_waits = send_waits; 587 stash->nsends = nsends; 588 stash->nrecvs = nreceives; 589 stash->reproduce_count = 0; 590 PetscFunctionReturn(0); 591 } 592 593 /* 594 MatStashScatterGetMesg_Private - This function waits on the receives posted 595 in the function MatStashScatterBegin_Private() and returns one message at 596 a time to the calling function. If no messages are left, it indicates this 597 by setting flg = 0, else it sets flg = 1. 598 599 Input Parameters: 600 stash - the stash 601 602 Output Parameters: 603 nvals - the number of entries in the current message. 604 rows - an array of row indices (or blocked indices) corresponding to the values 605 cols - an array of columnindices (or blocked indices) corresponding to the values 606 vals - the values 607 flg - 0 indicates no more message left, and the current call has no values associated. 608 1 indicates that the current call successfully received a message, and the 609 other output parameters nvals,rows,cols,vals are set appropriately. 610 */ 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatStashScatterGetMesg_Private" 613 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg) 614 { 615 PetscErrorCode ierr; 616 PetscMPIInt i,*flg_v = stash->flg_v,i1,i2; 617 PetscInt bs2; 618 MPI_Status recv_status; 619 PetscTruth match_found = PETSC_FALSE; 620 621 PetscFunctionBegin; 622 623 *flg = 0; /* When a message is discovered this is reset to 1 */ 624 /* Return if no more messages to process */ 625 if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 626 627 bs2 = stash->bs*stash->bs; 628 /* If a matching pair of receives are found, process them, and return the data to 629 the calling function. Until then keep receiving messages */ 630 while (!match_found) { 631 CHKMEMQ; 632 if (stash->reproduce) { 633 i = stash->reproduce_count++; 634 ierr = MPI_Wait(stash->recv_waits+i,&recv_status);CHKERRQ(ierr); 635 } else { 636 ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 637 } 638 CHKMEMQ; 639 if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!"); 640 641 /* Now pack the received message into a structure which is usable by others */ 642 if (i % 2) { 643 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);CHKERRQ(ierr); 644 flg_v[2*recv_status.MPI_SOURCE] = i/2; 645 *nvals = *nvals/bs2; 646 } else { 647 ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 648 flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 649 *nvals = *nvals/2; /* This message has both row indices and col indices */ 650 } 651 652 /* Check if we have both messages from this proc */ 653 i1 = flg_v[2*recv_status.MPI_SOURCE]; 654 i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 655 if (i1 != -1 && i2 != -1) { 656 *rows = stash->rindices[i2]; 657 *cols = *rows + *nvals; 658 *vals = stash->rvalues[i1]; 659 *flg = 1; 660 stash->nprocessed ++; 661 match_found = PETSC_TRUE; 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666