1 #include <petsc/private/matimpl.h> 2 3 #define DEFAULT_STASH_SIZE 10000 4 5 static PetscErrorCode MatStashScatterBegin_Ref(Mat, MatStash *, PetscInt *); 6 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 7 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *); 8 #if !defined(PETSC_HAVE_MPIUNI) 9 static PetscErrorCode MatStashScatterBegin_BTS(Mat, MatStash *, PetscInt *); 10 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *); 11 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *); 12 #endif 13 14 /* 15 MatStashCreate_Private - Creates a stash,currently used for all the parallel 16 matrix implementations. The stash is where elements of a matrix destined 17 to be stored on other processors are kept until matrix assembly is done. 18 19 This is a simple minded stash. Simply adds entries to end of stash. 20 21 Input Parameters: 22 comm - communicator, required for scatters. 23 bs - stash block size. used when stashing blocks of values 24 25 Output Parameter: 26 stash - the newly created stash 27 */ 28 PetscErrorCode MatStashCreate_Private(MPI_Comm comm, PetscInt bs, MatStash *stash) 29 { 30 PetscInt max, *opt, nopt, i; 31 PetscBool flg; 32 33 PetscFunctionBegin; 34 /* Require 2 tags,get the second using PetscCommGetNewTag() */ 35 stash->comm = comm; 36 37 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag1)); 38 PetscCall(PetscCommGetNewTag(stash->comm, &stash->tag2)); 39 PetscCallMPI(MPI_Comm_size(stash->comm, &stash->size)); 40 PetscCallMPI(MPI_Comm_rank(stash->comm, &stash->rank)); 41 PetscCall(PetscMalloc1(2 * stash->size, &stash->flg_v)); 42 for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 43 44 nopt = stash->size; 45 PetscCall(PetscMalloc1(nopt, &opt)); 46 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-matstash_initial_size", opt, &nopt, &flg)); 47 if (flg) { 48 if (nopt == 1) max = opt[0]; 49 else if (nopt == stash->size) max = opt[stash->rank]; 50 else if (stash->rank < nopt) max = opt[stash->rank]; 51 else max = 0; /* Use default */ 52 stash->umax = max; 53 } else { 54 stash->umax = 0; 55 } 56 PetscCall(PetscFree(opt)); 57 if (bs <= 0) bs = 1; 58 59 stash->bs = bs; 60 stash->nmax = 0; 61 stash->oldnmax = 0; 62 stash->n = 0; 63 stash->reallocs = -1; 64 stash->space_head = NULL; 65 stash->space = NULL; 66 67 stash->send_waits = NULL; 68 stash->recv_waits = NULL; 69 stash->send_status = NULL; 70 stash->nsends = 0; 71 stash->nrecvs = 0; 72 stash->svalues = NULL; 73 stash->rvalues = NULL; 74 stash->rindices = NULL; 75 stash->nprocessed = 0; 76 stash->reproduce = PETSC_FALSE; 77 stash->blocktype = MPI_DATATYPE_NULL; 78 79 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_reproduce", &stash->reproduce, NULL)); 80 #if !defined(PETSC_HAVE_MPIUNI) 81 flg = PETSC_FALSE; 82 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matstash_legacy", &flg, NULL)); 83 if (!flg) { 84 stash->ScatterBegin = MatStashScatterBegin_BTS; 85 stash->ScatterGetMesg = MatStashScatterGetMesg_BTS; 86 stash->ScatterEnd = MatStashScatterEnd_BTS; 87 stash->ScatterDestroy = MatStashScatterDestroy_BTS; 88 } else { 89 #endif 90 stash->ScatterBegin = MatStashScatterBegin_Ref; 91 stash->ScatterGetMesg = MatStashScatterGetMesg_Ref; 92 stash->ScatterEnd = MatStashScatterEnd_Ref; 93 stash->ScatterDestroy = NULL; 94 #if !defined(PETSC_HAVE_MPIUNI) 95 } 96 #endif 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 /* 101 MatStashDestroy_Private - Destroy the stash 102 */ 103 PetscErrorCode MatStashDestroy_Private(MatStash *stash) 104 { 105 PetscFunctionBegin; 106 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 107 if (stash->ScatterDestroy) PetscCall((*stash->ScatterDestroy)(stash)); 108 stash->space = NULL; 109 PetscCall(PetscFree(stash->flg_v)); 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 /* 114 MatStashScatterEnd_Private - This is called as the final stage of 115 scatter. The final stages of message passing is done here, and 116 all the memory used for message passing is cleaned up. This 117 routine also resets the stash, and deallocates the memory used 118 for the stash. It also keeps track of the current memory usage 119 so that the same value can be used the next time through. 120 */ 121 PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 122 { 123 PetscFunctionBegin; 124 PetscCall((*stash->ScatterEnd)(stash)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash) 129 { 130 PetscInt nsends = stash->nsends, bs2, oldnmax, i; 131 MPI_Status *send_status; 132 133 PetscFunctionBegin; 134 for (i = 0; i < 2 * stash->size; i++) stash->flg_v[i] = -1; 135 /* wait on sends */ 136 if (nsends) { 137 PetscCall(PetscMalloc1(2 * nsends, &send_status)); 138 PetscCallMPI(MPI_Waitall(2 * nsends, stash->send_waits, send_status)); 139 PetscCall(PetscFree(send_status)); 140 } 141 142 /* Now update nmaxold to be app 10% more than max n used, this way the 143 wastage of space is reduced the next time this stash is used. 144 Also update the oldmax, only if it increases */ 145 if (stash->n) { 146 bs2 = stash->bs * stash->bs; 147 oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 148 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 149 } 150 151 stash->nmax = 0; 152 stash->n = 0; 153 stash->reallocs = -1; 154 stash->nprocessed = 0; 155 156 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 157 158 stash->space = NULL; 159 160 PetscCall(PetscFree(stash->send_waits)); 161 PetscCall(PetscFree(stash->recv_waits)); 162 PetscCall(PetscFree2(stash->svalues, stash->sindices)); 163 PetscCall(PetscFree(stash->rvalues[0])); 164 PetscCall(PetscFree(stash->rvalues)); 165 PetscCall(PetscFree(stash->rindices[0])); 166 PetscCall(PetscFree(stash->rindices)); 167 PetscFunctionReturn(PETSC_SUCCESS); 168 } 169 170 /* 171 MatStashGetInfo_Private - Gets the relevant statistics of the stash 172 173 Input Parameters: 174 stash - the stash 175 nstash - the size of the stash. Indicates the number of values stored. 176 reallocs - the number of additional mallocs incurred. 177 178 */ 179 PetscErrorCode MatStashGetInfo_Private(MatStash *stash, PetscInt *nstash, PetscInt *reallocs) 180 { 181 PetscInt bs2 = stash->bs * stash->bs; 182 183 PetscFunctionBegin; 184 if (nstash) *nstash = stash->n * bs2; 185 if (reallocs) { 186 if (stash->reallocs < 0) *reallocs = 0; 187 else *reallocs = stash->reallocs; 188 } 189 PetscFunctionReturn(PETSC_SUCCESS); 190 } 191 192 /* 193 MatStashSetInitialSize_Private - Sets the initial size of the stash 194 195 Input Parameters: 196 stash - the stash 197 max - the value that is used as the max size of the stash. 198 this value is used while allocating memory. 199 */ 200 PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash, PetscInt max) 201 { 202 PetscFunctionBegin; 203 stash->umax = max; 204 PetscFunctionReturn(PETSC_SUCCESS); 205 } 206 207 /* MatStashExpand_Private - Expand the stash. This function is called 208 when the space in the stash is not sufficient to add the new values 209 being inserted into the stash. 210 211 Input Parameters: 212 stash - the stash 213 incr - the minimum increase requested 214 215 Note: 216 This routine doubles the currently used memory. 217 */ 218 static PetscErrorCode MatStashExpand_Private(MatStash *stash, PetscInt incr) 219 { 220 PetscInt newnmax, bs2 = stash->bs * stash->bs; 221 222 PetscFunctionBegin; 223 /* allocate a larger stash */ 224 if (!stash->oldnmax && !stash->nmax) { /* new stash */ 225 if (stash->umax) newnmax = stash->umax / bs2; 226 else newnmax = DEFAULT_STASH_SIZE / bs2; 227 } else if (!stash->nmax) { /* reusing stash */ 228 if (stash->umax > stash->oldnmax) newnmax = stash->umax / bs2; 229 else newnmax = stash->oldnmax / bs2; 230 } else newnmax = stash->nmax * 2; 231 if (newnmax < (stash->nmax + incr)) newnmax += 2 * incr; 232 233 /* Get a MatStashSpace and attach it to stash */ 234 PetscCall(PetscMatStashSpaceGet(bs2, newnmax, &stash->space)); 235 if (!stash->space_head) { /* new stash or reusing stash->oldnmax */ 236 stash->space_head = stash->space; 237 } 238 239 stash->reallocs++; 240 stash->nmax = newnmax; 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 /* 244 MatStashValuesRow_Private - inserts values into the stash. This function 245 expects the values to be roworiented. Multiple columns belong to the same row 246 can be inserted with a single call to this function. 247 248 Input Parameters: 249 stash - the stash 250 row - the global row correspoiding to the values 251 n - the number of elements inserted. All elements belong to the above row. 252 idxn - the global column indices corresponding to each of the values. 253 values - the values inserted 254 */ 255 PetscErrorCode MatStashValuesRow_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscBool ignorezeroentries) 256 { 257 PetscInt i, k, cnt = 0; 258 PetscMatStashSpace space = stash->space; 259 260 PetscFunctionBegin; 261 /* Check and see if we have sufficient memory */ 262 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 263 space = stash->space; 264 k = space->local_used; 265 for (i = 0; i < n; i++) { 266 if (ignorezeroentries && values && values[i] == 0.0) continue; 267 space->idx[k] = row; 268 space->idy[k] = idxn[i]; 269 space->val[k] = values ? values[i] : 0.0; 270 k++; 271 cnt++; 272 } 273 stash->n += cnt; 274 space->local_used += cnt; 275 space->local_remaining -= cnt; 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 /* 280 MatStashValuesCol_Private - inserts values into the stash. This function 281 expects the values to be columnoriented. Multiple columns belong to the same row 282 can be inserted with a single call to this function. 283 284 Input Parameters: 285 stash - the stash 286 row - the global row correspoiding to the values 287 n - the number of elements inserted. All elements belong to the above row. 288 idxn - the global column indices corresponding to each of the values. 289 values - the values inserted 290 stepval - the consecutive values are sepated by a distance of stepval. 291 this happens because the input is columnoriented. 292 */ 293 PetscErrorCode MatStashValuesCol_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt stepval, PetscBool ignorezeroentries) 294 { 295 PetscInt i, k, cnt = 0; 296 PetscMatStashSpace space = stash->space; 297 298 PetscFunctionBegin; 299 /* Check and see if we have sufficient memory */ 300 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 301 space = stash->space; 302 k = space->local_used; 303 for (i = 0; i < n; i++) { 304 if (ignorezeroentries && values && values[i * stepval] == 0.0) continue; 305 space->idx[k] = row; 306 space->idy[k] = idxn[i]; 307 space->val[k] = values ? values[i * stepval] : 0.0; 308 k++; 309 cnt++; 310 } 311 stash->n += cnt; 312 space->local_used += cnt; 313 space->local_remaining -= cnt; 314 PetscFunctionReturn(PETSC_SUCCESS); 315 } 316 317 /* 318 MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 319 This function expects the values to be roworiented. Multiple columns belong 320 to the same block-row can be inserted with a single call to this function. 321 This function extracts the sub-block of values based on the dimensions of 322 the original input block, and the row,col values corresponding to the blocks. 323 324 Input Parameters: 325 stash - the stash 326 row - the global block-row correspoiding to the values 327 n - the number of elements inserted. All elements belong to the above row. 328 idxn - the global block-column indices corresponding to each of the blocks of 329 values. Each block is of size bs*bs. 330 values - the values inserted 331 rmax - the number of block-rows in the original block. 332 cmax - the number of block-columns on the original block. 333 idx - the index of the current block-row in the original block. 334 */ 335 PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) 336 { 337 PetscInt i, j, k, bs2, bs = stash->bs, l; 338 const PetscScalar *vals; 339 PetscScalar *array; 340 PetscMatStashSpace space = stash->space; 341 342 PetscFunctionBegin; 343 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 344 space = stash->space; 345 l = space->local_used; 346 bs2 = bs * bs; 347 for (i = 0; i < n; i++) { 348 space->idx[l] = row; 349 space->idy[l] = idxn[i]; 350 /* Now copy over the block of values. Store the values column oriented. 351 This enables inserting multiple blocks belonging to a row with a single 352 function call */ 353 array = space->val + bs2 * l; 354 vals = values + idx * bs2 * n + bs * i; 355 for (j = 0; j < bs; j++) { 356 for (k = 0; k < bs; k++) array[k * bs] = values ? vals[k] : 0.0; 357 array++; 358 vals += cmax * bs; 359 } 360 l++; 361 } 362 stash->n += n; 363 space->local_used += n; 364 space->local_remaining -= n; 365 PetscFunctionReturn(PETSC_SUCCESS); 366 } 367 368 /* 369 MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 370 This function expects the values to be roworiented. Multiple columns belong 371 to the same block-row can be inserted with a single call to this function. 372 This function extracts the sub-block of values based on the dimensions of 373 the original input block, and the row,col values corresponding to the blocks. 374 375 Input Parameters: 376 stash - the stash 377 row - the global block-row correspoiding to the values 378 n - the number of elements inserted. All elements belong to the above row. 379 idxn - the global block-column indices corresponding to each of the blocks of 380 values. Each block is of size bs*bs. 381 values - the values inserted 382 rmax - the number of block-rows in the original block. 383 cmax - the number of block-columns on the original block. 384 idx - the index of the current block-row in the original block. 385 */ 386 PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash, PetscInt row, PetscInt n, const PetscInt idxn[], const PetscScalar values[], PetscInt rmax, PetscInt cmax, PetscInt idx) 387 { 388 PetscInt i, j, k, bs2, bs = stash->bs, l; 389 const PetscScalar *vals; 390 PetscScalar *array; 391 PetscMatStashSpace space = stash->space; 392 393 PetscFunctionBegin; 394 if (!space || space->local_remaining < n) PetscCall(MatStashExpand_Private(stash, n)); 395 space = stash->space; 396 l = space->local_used; 397 bs2 = bs * bs; 398 for (i = 0; i < n; i++) { 399 space->idx[l] = row; 400 space->idy[l] = idxn[i]; 401 /* Now copy over the block of values. Store the values column oriented. 402 This enables inserting multiple blocks belonging to a row with a single 403 function call */ 404 array = space->val + bs2 * l; 405 vals = values + idx * bs2 * n + bs * i; 406 for (j = 0; j < bs; j++) { 407 for (k = 0; k < bs; k++) array[k] = values ? vals[k] : 0.0; 408 array += bs; 409 vals += rmax * bs; 410 } 411 l++; 412 } 413 stash->n += n; 414 space->local_used += n; 415 space->local_remaining -= n; 416 PetscFunctionReturn(PETSC_SUCCESS); 417 } 418 /* 419 MatStashScatterBegin_Private - Initiates the transfer of values to the 420 correct owners. This function goes through the stash, and check the 421 owners of each stashed value, and sends the values off to the owner 422 processors. 423 424 Input Parameters: 425 stash - the stash 426 owners - an array of size 'no-of-procs' which gives the ownership range 427 for each node. 428 429 Note: 430 The 'owners' array in the cased of the blocked-stash has the 431 ranges specified blocked global indices, and for the regular stash in 432 the proper global indices. 433 */ 434 PetscErrorCode MatStashScatterBegin_Private(Mat mat, MatStash *stash, PetscInt *owners) 435 { 436 PetscFunctionBegin; 437 PetscCall((*stash->ScatterBegin)(mat, stash, owners)); 438 PetscFunctionReturn(PETSC_SUCCESS); 439 } 440 441 static PetscErrorCode MatStashScatterBegin_Ref(Mat mat, MatStash *stash, PetscInt *owners) 442 { 443 PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2; 444 PetscInt size = stash->size, nsends; 445 PetscInt count, *sindices, **rindices, i, j, idx, lastidx, l; 446 PetscScalar **rvalues, *svalues; 447 MPI_Comm comm = stash->comm; 448 MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2; 449 PetscMPIInt *sizes, *nlengths, nreceives; 450 PetscInt *sp_idx, *sp_idy; 451 PetscScalar *sp_val; 452 PetscMatStashSpace space, space_next; 453 454 PetscFunctionBegin; 455 { /* make sure all processors are either in INSERTMODE or ADDMODE */ 456 InsertMode addv; 457 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 458 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 459 mat->insertmode = addv; /* in case this processor had no cache */ 460 } 461 462 bs2 = stash->bs * stash->bs; 463 464 /* first count number of contributors to each processor */ 465 PetscCall(PetscCalloc1(size, &nlengths)); 466 PetscCall(PetscMalloc1(stash->n + 1, &owner)); 467 468 i = j = 0; 469 lastidx = -1; 470 space = stash->space_head; 471 while (space) { 472 space_next = space->next; 473 sp_idx = space->idx; 474 for (l = 0; l < space->local_used; l++) { 475 /* if indices are NOT locally sorted, need to start search at the beginning */ 476 if (lastidx > (idx = sp_idx[l])) j = 0; 477 lastidx = idx; 478 for (; j < size; j++) { 479 if (idx >= owners[j] && idx < owners[j + 1]) { 480 nlengths[j]++; 481 owner[i] = j; 482 break; 483 } 484 } 485 i++; 486 } 487 space = space_next; 488 } 489 490 /* Now check what procs get messages - and compute nsends. */ 491 PetscCall(PetscCalloc1(size, &sizes)); 492 for (i = 0, nsends = 0; i < size; i++) { 493 if (nlengths[i]) { 494 sizes[i] = 1; 495 nsends++; 496 } 497 } 498 499 { 500 PetscMPIInt *onodes, *olengths; 501 /* Determine the number of messages to expect, their lengths, from from-ids */ 502 PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives)); 503 PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths)); 504 /* since clubbing row,col - lengths are multiplied by 2 */ 505 for (i = 0; i < nreceives; i++) olengths[i] *= 2; 506 PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1)); 507 /* values are size 'bs2' lengths (and remove earlier factor 2 */ 508 for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2; 509 PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2)); 510 PetscCall(PetscFree(onodes)); 511 PetscCall(PetscFree(olengths)); 512 } 513 514 /* do sends: 515 1) starts[i] gives the starting index in svalues for stuff going to 516 the ith processor 517 */ 518 PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices)); 519 PetscCall(PetscMalloc1(2 * nsends, &send_waits)); 520 PetscCall(PetscMalloc2(size, &startv, size, &starti)); 521 /* use 2 sends the first with all_a, the next with all_i and all_j */ 522 startv[0] = 0; 523 starti[0] = 0; 524 for (i = 1; i < size; i++) { 525 startv[i] = startv[i - 1] + nlengths[i - 1]; 526 starti[i] = starti[i - 1] + 2 * nlengths[i - 1]; 527 } 528 529 i = 0; 530 space = stash->space_head; 531 while (space) { 532 space_next = space->next; 533 sp_idx = space->idx; 534 sp_idy = space->idy; 535 sp_val = space->val; 536 for (l = 0; l < space->local_used; l++) { 537 j = owner[i]; 538 if (bs2 == 1) { 539 svalues[startv[j]] = sp_val[l]; 540 } else { 541 PetscInt k; 542 PetscScalar *buf1, *buf2; 543 buf1 = svalues + bs2 * startv[j]; 544 buf2 = space->val + bs2 * l; 545 for (k = 0; k < bs2; k++) buf1[k] = buf2[k]; 546 } 547 sindices[starti[j]] = sp_idx[l]; 548 sindices[starti[j] + nlengths[j]] = sp_idy[l]; 549 startv[j]++; 550 starti[j]++; 551 i++; 552 } 553 space = space_next; 554 } 555 startv[0] = 0; 556 for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1]; 557 558 for (i = 0, count = 0; i < size; i++) { 559 if (sizes[i]) { 560 PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++)); 561 PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++)); 562 } 563 } 564 #if defined(PETSC_USE_INFO) 565 PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT " \n", nsends)); 566 for (i = 0; i < size; i++) { 567 if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt))))); 568 } 569 #endif 570 PetscCall(PetscFree(nlengths)); 571 PetscCall(PetscFree(owner)); 572 PetscCall(PetscFree2(startv, starti)); 573 PetscCall(PetscFree(sizes)); 574 575 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 576 PetscCall(PetscMalloc1(2 * nreceives, &recv_waits)); 577 578 for (i = 0; i < nreceives; i++) { 579 recv_waits[2 * i] = recv_waits1[i]; 580 recv_waits[2 * i + 1] = recv_waits2[i]; 581 } 582 stash->recv_waits = recv_waits; 583 584 PetscCall(PetscFree(recv_waits1)); 585 PetscCall(PetscFree(recv_waits2)); 586 587 stash->svalues = svalues; 588 stash->sindices = sindices; 589 stash->rvalues = rvalues; 590 stash->rindices = rindices; 591 stash->send_waits = send_waits; 592 stash->nsends = nsends; 593 stash->nrecvs = nreceives; 594 stash->reproduce_count = 0; 595 PetscFunctionReturn(PETSC_SUCCESS); 596 } 597 598 /* 599 MatStashScatterGetMesg_Private - This function waits on the receives posted 600 in the function MatStashScatterBegin_Private() and returns one message at 601 a time to the calling function. If no messages are left, it indicates this 602 by setting flg = 0, else it sets flg = 1. 603 604 Input Parameter: 605 stash - the stash 606 607 Output Parameters: 608 nvals - the number of entries in the current message. 609 rows - an array of row indices (or blocked indices) corresponding to the values 610 cols - an array of columnindices (or blocked indices) corresponding to the values 611 vals - the values 612 flg - 0 indicates no more message left, and the current call has no values associated. 613 1 indicates that the current call successfully received a message, and the 614 other output parameters nvals,rows,cols,vals are set appropriately. 615 */ 616 PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) 617 { 618 PetscFunctionBegin; 619 PetscCall((*stash->ScatterGetMesg)(stash, nvals, rows, cols, vals, flg)); 620 PetscFunctionReturn(PETSC_SUCCESS); 621 } 622 623 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash, PetscMPIInt *nvals, PetscInt **rows, PetscInt **cols, PetscScalar **vals, PetscInt *flg) 624 { 625 PetscMPIInt i, *flg_v = stash->flg_v, i1, i2; 626 PetscInt bs2; 627 MPI_Status recv_status; 628 PetscBool match_found = PETSC_FALSE; 629 630 PetscFunctionBegin; 631 *flg = 0; /* When a message is discovered this is reset to 1 */ 632 /* Return if no more messages to process */ 633 if (stash->nprocessed == stash->nrecvs) PetscFunctionReturn(PETSC_SUCCESS); 634 635 bs2 = stash->bs * stash->bs; 636 /* If a matching pair of receives are found, process them, and return the data to 637 the calling function. Until then keep receiving messages */ 638 while (!match_found) { 639 if (stash->reproduce) { 640 i = stash->reproduce_count++; 641 PetscCallMPI(MPI_Wait(stash->recv_waits + i, &recv_status)); 642 } else { 643 PetscCallMPI(MPI_Waitany(2 * stash->nrecvs, stash->recv_waits, &i, &recv_status)); 644 } 645 PetscCheck(recv_status.MPI_SOURCE >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Negative MPI source!"); 646 647 /* Now pack the received message into a structure which is usable by others */ 648 if (i % 2) { 649 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_SCALAR, nvals)); 650 flg_v[2 * recv_status.MPI_SOURCE] = i / 2; 651 *nvals = *nvals / bs2; 652 } else { 653 PetscCallMPI(MPI_Get_count(&recv_status, MPIU_INT, nvals)); 654 flg_v[2 * recv_status.MPI_SOURCE + 1] = i / 2; 655 *nvals = *nvals / 2; /* This message has both row indices and col indices */ 656 } 657 658 /* Check if we have both messages from this proc */ 659 i1 = flg_v[2 * recv_status.MPI_SOURCE]; 660 i2 = flg_v[2 * recv_status.MPI_SOURCE + 1]; 661 if (i1 != -1 && i2 != -1) { 662 *rows = stash->rindices[i2]; 663 *cols = *rows + *nvals; 664 *vals = stash->rvalues[i1]; 665 *flg = 1; 666 stash->nprocessed++; 667 match_found = PETSC_TRUE; 668 } 669 } 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 #if !defined(PETSC_HAVE_MPIUNI) 674 typedef struct { 675 PetscInt row; 676 PetscInt col; 677 PetscScalar vals[1]; /* Actually an array of length bs2 */ 678 } MatStashBlock; 679 680 static PetscErrorCode MatStashSortCompress_Private(MatStash *stash, InsertMode insertmode) 681 { 682 PetscMatStashSpace space; 683 PetscInt n = stash->n, bs = stash->bs, bs2 = bs * bs, cnt, *row, *col, *perm, rowstart, i; 684 PetscScalar **valptr; 685 686 PetscFunctionBegin; 687 PetscCall(PetscMalloc4(n, &row, n, &col, n, &valptr, n, &perm)); 688 for (space = stash->space_head, cnt = 0; space; space = space->next) { 689 for (i = 0; i < space->local_used; i++) { 690 row[cnt] = space->idx[i]; 691 col[cnt] = space->idy[i]; 692 valptr[cnt] = &space->val[i * bs2]; 693 perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */ 694 cnt++; 695 } 696 } 697 PetscCheck(cnt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatStash n %" PetscInt_FMT ", but counted %" PetscInt_FMT " entries", n, cnt); 698 PetscCall(PetscSortIntWithArrayPair(n, row, col, perm)); 699 /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */ 700 for (rowstart = 0, cnt = 0, i = 1; i <= n; i++) { 701 if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */ 702 PetscInt colstart; 703 PetscCall(PetscSortIntWithArray(i - rowstart, &col[rowstart], &perm[rowstart])); 704 for (colstart = rowstart; colstart < i;) { /* Compress multiple insertions to the same location */ 705 PetscInt j, l; 706 MatStashBlock *block; 707 PetscCall(PetscSegBufferGet(stash->segsendblocks, 1, &block)); 708 block->row = row[rowstart]; 709 block->col = col[colstart]; 710 PetscCall(PetscArraycpy(block->vals, valptr[perm[colstart]], bs2)); 711 for (j = colstart + 1; j < i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */ 712 if (insertmode == ADD_VALUES) { 713 for (l = 0; l < bs2; l++) block->vals[l] += valptr[perm[j]][l]; 714 } else { 715 PetscCall(PetscArraycpy(block->vals, valptr[perm[j]], bs2)); 716 } 717 } 718 colstart = j; 719 } 720 rowstart = i; 721 } 722 } 723 PetscCall(PetscFree4(row, col, valptr, perm)); 724 PetscFunctionReturn(PETSC_SUCCESS); 725 } 726 727 static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash) 728 { 729 PetscFunctionBegin; 730 if (stash->blocktype == MPI_DATATYPE_NULL) { 731 PetscInt bs2 = PetscSqr(stash->bs); 732 PetscMPIInt blocklens[2]; 733 MPI_Aint displs[2]; 734 MPI_Datatype types[2], stype; 735 /* 736 DummyBlock is a type having standard layout, even when PetscScalar is C++ std::complex. 737 std::complex itself has standard layout, so does DummyBlock, recursively. 738 To be compatible with C++ std::complex, complex implementations on GPUs must also have standard layout, 739 though they can have different alignment, e.g, 16 bytes for double complex, instead of 8 bytes as in GCC stdlibc++. 740 offsetof(type, member) only requires type to have standard layout. Ref. https://en.cppreference.com/w/cpp/types/offsetof. 741 742 We can test if std::complex has standard layout with the following code: 743 #include <iostream> 744 #include <type_traits> 745 #include <complex> 746 int main() { 747 std::cout << std::boolalpha; 748 std::cout << std::is_standard_layout<std::complex<double> >::value << '\n'; 749 } 750 Output: true 751 */ 752 struct DummyBlock { 753 PetscInt row, col; 754 PetscScalar vals; 755 }; 756 757 stash->blocktype_size = offsetof(struct DummyBlock, vals) + bs2 * sizeof(PetscScalar); 758 if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */ 759 stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt); 760 } 761 PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segsendblocks)); 762 PetscCall(PetscSegBufferCreate(stash->blocktype_size, 1, &stash->segrecvblocks)); 763 PetscCall(PetscSegBufferCreate(sizeof(MatStashFrame), 1, &stash->segrecvframe)); 764 blocklens[0] = 2; 765 blocklens[1] = bs2; 766 displs[0] = offsetof(struct DummyBlock, row); 767 displs[1] = offsetof(struct DummyBlock, vals); 768 types[0] = MPIU_INT; 769 types[1] = MPIU_SCALAR; 770 PetscCallMPI(MPI_Type_create_struct(2, blocklens, displs, types, &stype)); 771 PetscCallMPI(MPI_Type_commit(&stype)); 772 PetscCallMPI(MPI_Type_create_resized(stype, 0, stash->blocktype_size, &stash->blocktype)); 773 PetscCallMPI(MPI_Type_commit(&stash->blocktype)); 774 PetscCallMPI(MPI_Type_free(&stype)); 775 } 776 PetscFunctionReturn(PETSC_SUCCESS); 777 } 778 779 /* Callback invoked after target rank has initiated receive of rendezvous message. 780 * Here we post the main sends. 781 */ 782 static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rankid, PetscMPIInt rank, void *sdata, MPI_Request req[], void *ctx) 783 { 784 MatStash *stash = (MatStash *)ctx; 785 MatStashHeader *hdr = (MatStashHeader *)sdata; 786 787 PetscFunctionBegin; 788 PetscCheck(rank == stash->sendranks[rankid], comm, PETSC_ERR_PLIB, "BTS Send rank %d does not match sendranks[%d] %d", rank, rankid, stash->sendranks[rankid]); 789 PetscCallMPI(MPI_Isend(stash->sendframes[rankid].buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 790 stash->sendframes[rankid].count = hdr->count; 791 stash->sendframes[rankid].pending = 1; 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 /* 796 Callback invoked by target after receiving rendezvous message. 797 Here we post the main recvs. 798 */ 799 static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm, const PetscMPIInt tag[], PetscMPIInt rank, void *rdata, MPI_Request req[], void *ctx) 800 { 801 MatStash *stash = (MatStash *)ctx; 802 MatStashHeader *hdr = (MatStashHeader *)rdata; 803 MatStashFrame *frame; 804 805 PetscFunctionBegin; 806 PetscCall(PetscSegBufferGet(stash->segrecvframe, 1, &frame)); 807 PetscCall(PetscSegBufferGet(stash->segrecvblocks, hdr->count, &frame->buffer)); 808 PetscCallMPI(MPI_Irecv(frame->buffer, hdr->count, stash->blocktype, rank, tag[0], comm, &req[0])); 809 frame->count = hdr->count; 810 frame->pending = 1; 811 PetscFunctionReturn(PETSC_SUCCESS); 812 } 813 814 /* 815 * owners[] contains the ownership ranges; may be indexed by either blocks or scalars 816 */ 817 static PetscErrorCode MatStashScatterBegin_BTS(Mat mat, MatStash *stash, PetscInt owners[]) 818 { 819 size_t nblocks; 820 char *sendblocks; 821 822 PetscFunctionBegin; 823 if (PetscDefined(USE_DEBUG)) { /* make sure all processors are either in INSERTMODE or ADDMODE */ 824 InsertMode addv; 825 PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat))); 826 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added"); 827 } 828 829 PetscCall(MatStashBlockTypeSetUp(stash)); 830 PetscCall(MatStashSortCompress_Private(stash, mat->insertmode)); 831 PetscCall(PetscSegBufferGetSize(stash->segsendblocks, &nblocks)); 832 PetscCall(PetscSegBufferExtractInPlace(stash->segsendblocks, &sendblocks)); 833 if (stash->first_assembly_done) { /* Set up sendhdrs and sendframes for each rank that we sent before */ 834 PetscInt i; 835 size_t b; 836 for (i = 0, b = 0; i < stash->nsendranks; i++) { 837 stash->sendframes[i].buffer = &sendblocks[b * stash->blocktype_size]; 838 /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */ 839 stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */ 840 for (; b < nblocks; b++) { 841 MatStashBlock *sendblock_b = (MatStashBlock *)&sendblocks[b * stash->blocktype_size]; 842 PetscCheck(sendblock_b->row >= owners[stash->sendranks[i]], stash->comm, PETSC_ERR_ARG_WRONG, "MAT_SUBSET_OFF_PROC_ENTRIES set, but row %" PetscInt_FMT " owned by %d not communicated in initial assembly", sendblock_b->row, stash->sendranks[i]); 843 if (sendblock_b->row >= owners[stash->sendranks[i] + 1]) break; 844 stash->sendhdr[i].count++; 845 } 846 } 847 } else { /* Dynamically count and pack (first time) */ 848 PetscInt sendno; 849 size_t i, rowstart; 850 851 /* Count number of send ranks and allocate for sends */ 852 stash->nsendranks = 0; 853 for (rowstart = 0; rowstart < nblocks;) { 854 PetscInt owner; 855 MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 856 PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner)); 857 if (owner < 0) owner = -(owner + 2); 858 for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 859 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 860 if (sendblock_i->row >= owners[owner + 1]) break; 861 } 862 stash->nsendranks++; 863 rowstart = i; 864 } 865 PetscCall(PetscMalloc3(stash->nsendranks, &stash->sendranks, stash->nsendranks, &stash->sendhdr, stash->nsendranks, &stash->sendframes)); 866 867 /* Set up sendhdrs and sendframes */ 868 sendno = 0; 869 for (rowstart = 0; rowstart < nblocks;) { 870 PetscInt owner; 871 MatStashBlock *sendblock_rowstart = (MatStashBlock *)&sendblocks[rowstart * stash->blocktype_size]; 872 PetscCall(PetscFindInt(sendblock_rowstart->row, stash->size + 1, owners, &owner)); 873 if (owner < 0) owner = -(owner + 2); 874 stash->sendranks[sendno] = owner; 875 for (i = rowstart + 1; i < nblocks; i++) { /* Move forward through a run of blocks with the same owner */ 876 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 877 if (sendblock_i->row >= owners[owner + 1]) break; 878 } 879 stash->sendframes[sendno].buffer = sendblock_rowstart; 880 stash->sendframes[sendno].pending = 0; 881 stash->sendhdr[sendno].count = i - rowstart; 882 sendno++; 883 rowstart = i; 884 } 885 PetscCheck(sendno == stash->nsendranks, stash->comm, PETSC_ERR_PLIB, "BTS counted %d sendranks, but %" PetscInt_FMT " sends", stash->nsendranks, sendno); 886 } 887 888 /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new 889 * message or a dummy entry of some sort. */ 890 if (mat->insertmode == INSERT_VALUES) { 891 size_t i; 892 for (i = 0; i < nblocks; i++) { 893 MatStashBlock *sendblock_i = (MatStashBlock *)&sendblocks[i * stash->blocktype_size]; 894 sendblock_i->row = -(sendblock_i->row + 1); 895 } 896 } 897 898 if (stash->first_assembly_done) { 899 PetscMPIInt i, tag; 900 PetscCall(PetscCommGetNewTag(stash->comm, &tag)); 901 for (i = 0; i < stash->nrecvranks; i++) PetscCall(MatStashBTSRecv_Private(stash->comm, &tag, stash->recvranks[i], &stash->recvhdr[i], &stash->recvreqs[i], stash)); 902 for (i = 0; i < stash->nsendranks; i++) PetscCall(MatStashBTSSend_Private(stash->comm, &tag, i, stash->sendranks[i], &stash->sendhdr[i], &stash->sendreqs[i], stash)); 903 stash->use_status = PETSC_TRUE; /* Use count from message status. */ 904 } else { 905 PetscCall(PetscCommBuildTwoSidedFReq(stash->comm, 1, MPIU_INT, stash->nsendranks, stash->sendranks, (PetscInt *)stash->sendhdr, &stash->nrecvranks, &stash->recvranks, (PetscInt *)&stash->recvhdr, 1, &stash->sendreqs, &stash->recvreqs, MatStashBTSSend_Private, MatStashBTSRecv_Private, stash)); 906 PetscCall(PetscMalloc2(stash->nrecvranks, &stash->some_indices, stash->nrecvranks, &stash->some_statuses)); 907 stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */ 908 } 909 910 PetscCall(PetscSegBufferExtractInPlace(stash->segrecvframe, &stash->recvframes)); 911 stash->recvframe_active = NULL; 912 stash->recvframe_i = 0; 913 stash->some_i = 0; 914 stash->some_count = 0; 915 stash->recvcount = 0; 916 stash->first_assembly_done = mat->assembly_subset; /* See the same logic in VecAssemblyBegin_MPI_BTS */ 917 stash->insertmode = &mat->insertmode; 918 PetscFunctionReturn(PETSC_SUCCESS); 919 } 920 921 static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash, PetscMPIInt *n, PetscInt **row, PetscInt **col, PetscScalar **val, PetscInt *flg) 922 { 923 MatStashBlock *block; 924 925 PetscFunctionBegin; 926 *flg = 0; 927 while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) { 928 if (stash->some_i == stash->some_count) { 929 if (stash->recvcount == stash->nrecvranks) PetscFunctionReturn(PETSC_SUCCESS); /* Done */ 930 PetscCallMPI(MPI_Waitsome(stash->nrecvranks, stash->recvreqs, &stash->some_count, stash->some_indices, stash->use_status ? stash->some_statuses : MPI_STATUSES_IGNORE)); 931 stash->some_i = 0; 932 } 933 stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]]; 934 stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */ 935 if (stash->use_status) { /* Count what was actually sent */ 936 PetscCallMPI(MPI_Get_count(&stash->some_statuses[stash->some_i], stash->blocktype, &stash->recvframe_count)); 937 } 938 if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */ 939 block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[0]; 940 if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES; 941 PetscCheck(*stash->insertmode != INSERT_VALUES || block->row < 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling INSERT_VALUES, but rank %d requested ADD_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 942 PetscCheck(*stash->insertmode != ADD_VALUES || block->row >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Assembling ADD_VALUES, but rank %d requested INSERT_VALUES", stash->recvranks[stash->some_indices[stash->some_i]]); 943 } 944 stash->some_i++; 945 stash->recvcount++; 946 stash->recvframe_i = 0; 947 } 948 *n = 1; 949 block = (MatStashBlock *)&((char *)stash->recvframe_active->buffer)[stash->recvframe_i * stash->blocktype_size]; 950 if (block->row < 0) block->row = -(block->row + 1); 951 *row = &block->row; 952 *col = &block->col; 953 *val = block->vals; 954 stash->recvframe_i++; 955 *flg = 1; 956 PetscFunctionReturn(PETSC_SUCCESS); 957 } 958 959 static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash) 960 { 961 PetscFunctionBegin; 962 PetscCallMPI(MPI_Waitall(stash->nsendranks, stash->sendreqs, MPI_STATUSES_IGNORE)); 963 if (stash->first_assembly_done) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */ 964 PetscCall(PetscSegBufferExtractInPlace(stash->segrecvblocks, NULL)); 965 } else { /* No reuse, so collect everything. */ 966 PetscCall(MatStashScatterDestroy_BTS(stash)); 967 } 968 969 /* Now update nmaxold to be app 10% more than max n used, this way the 970 wastage of space is reduced the next time this stash is used. 971 Also update the oldmax, only if it increases */ 972 if (stash->n) { 973 PetscInt bs2 = stash->bs * stash->bs; 974 PetscInt oldnmax = ((int)(stash->n * 1.1) + 5) * bs2; 975 if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 976 } 977 978 stash->nmax = 0; 979 stash->n = 0; 980 stash->reallocs = -1; 981 stash->nprocessed = 0; 982 983 PetscCall(PetscMatStashSpaceDestroy(&stash->space_head)); 984 985 stash->space = NULL; 986 987 PetscFunctionReturn(PETSC_SUCCESS); 988 } 989 990 PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash) 991 { 992 PetscFunctionBegin; 993 PetscCall(PetscSegBufferDestroy(&stash->segsendblocks)); 994 PetscCall(PetscSegBufferDestroy(&stash->segrecvframe)); 995 stash->recvframes = NULL; 996 PetscCall(PetscSegBufferDestroy(&stash->segrecvblocks)); 997 if (stash->blocktype != MPI_DATATYPE_NULL) PetscCallMPI(MPI_Type_free(&stash->blocktype)); 998 stash->nsendranks = 0; 999 stash->nrecvranks = 0; 1000 PetscCall(PetscFree3(stash->sendranks, stash->sendhdr, stash->sendframes)); 1001 PetscCall(PetscFree(stash->sendreqs)); 1002 PetscCall(PetscFree(stash->recvreqs)); 1003 PetscCall(PetscFree(stash->recvranks)); 1004 PetscCall(PetscFree(stash->recvhdr)); 1005 PetscCall(PetscFree2(stash->some_indices, stash->some_statuses)); 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 #endif 1009