1 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> 4 #include <petscdm.h> 5 6 /* linked list methods 7 * 8 * PetscCDCreate 9 */ 10 PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out) 11 { 12 PetscCoarsenData *ail; 13 14 PetscFunctionBegin; 15 /* allocate pool, partially */ 16 PetscCall(PetscNew(&ail)); 17 *a_out = ail; 18 ail->pool_list.next = NULL; 19 ail->pool_list.array = NULL; 20 ail->chk_sz = 0; 21 /* allocate array */ 22 ail->size = a_size; 23 PetscCall(PetscCalloc1(a_size, &ail->array)); 24 ail->extra_nodes = NULL; 25 ail->mat = NULL; 26 PetscFunctionReturn(PETSC_SUCCESS); 27 } 28 29 /* PetscCDDestroy 30 */ 31 PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail) 32 { 33 PetscCDArrNd *n = &ail->pool_list; 34 35 PetscFunctionBegin; 36 n = n->next; 37 while (n) { 38 PetscCDArrNd *lstn = n; 39 n = n->next; 40 PetscCall(PetscFree(lstn)); 41 } 42 if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array)); 43 PetscCall(PetscFree(ail->array)); 44 if (ail->mat) PetscCall(MatDestroy(&ail->mat)); 45 /* delete this (+agg+pool array) */ 46 PetscCall(PetscFree(ail)); 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 /* PetscCDSetChunkSize 51 */ 52 PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz) 53 { 54 PetscFunctionBegin; 55 ail->chk_sz = a_sz; 56 PetscFunctionReturn(PETSC_SUCCESS); 57 } 58 59 /* PetscCDGetNewNode 60 */ 61 static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id) 62 { 63 PetscFunctionBegin; 64 *a_out = NULL; /* squelch -Wmaybe-uninitialized */ 65 if (ail->extra_nodes) { 66 PetscCDIntNd *node = ail->extra_nodes; 67 ail->extra_nodes = node->next; 68 node->gid = a_id; 69 node->next = NULL; 70 *a_out = node; 71 } else { 72 if (!ail->pool_list.array) { 73 if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */ 74 PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array)); 75 ail->new_node = ail->pool_list.array; 76 ail->new_left = ail->chk_sz; 77 ail->new_node->next = NULL; 78 } else if (!ail->new_left) { 79 PetscCDArrNd *node; 80 PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node)); 81 node->array = (PetscCDIntNd *)(node + 1); 82 node->next = ail->pool_list.next; 83 ail->pool_list.next = node; 84 ail->new_left = ail->chk_sz; 85 ail->new_node = node->array; 86 } 87 ail->new_node->gid = a_id; 88 ail->new_node->next = NULL; 89 *a_out = ail->new_node++; 90 ail->new_left--; 91 } 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* PetscCDIntNdSetID 96 */ 97 PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id) 98 { 99 PetscFunctionBegin; 100 a_this->gid = a_id; 101 PetscFunctionReturn(PETSC_SUCCESS); 102 } 103 104 /* PetscCDIntNdGetID 105 */ 106 PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid) 107 { 108 PetscFunctionBegin; 109 *a_gid = a_this->gid; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 /* PetscCDGetHeadPos 114 */ 115 PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos) 116 { 117 PetscFunctionBegin; 118 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx); 119 *pos = ail->array[a_idx]; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 /* PetscCDGetNextPos 124 */ 125 PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos) 126 { 127 PetscFunctionBegin; 128 PetscCheck(*pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position."); 129 *pos = (*pos)->next; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /* PetscCDAppendID 134 */ 135 PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id) 136 { 137 PetscCDIntNd *n, *n2; 138 139 PetscFunctionBegin; 140 PetscCall(PetscCDGetNewNode(ail, &n, a_id)); 141 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 142 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n; 143 else { 144 do { 145 if (!n2->next) { 146 n2->next = n; 147 PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next"); 148 break; 149 } 150 n2 = n2->next; 151 } while (n2); 152 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 153 } 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 /* PetscCDAppendNode 158 */ 159 PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n) 160 { 161 PetscCDIntNd *n2; 162 163 PetscFunctionBegin; 164 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 165 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n; 166 else { 167 do { 168 if (!n2->next) { 169 n2->next = a_n; 170 a_n->next = NULL; 171 break; 172 } 173 n2 = n2->next; 174 } while (n2); 175 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 176 } 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used) 181 */ 182 PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last) 183 { 184 PetscCDIntNd *del; 185 186 PetscFunctionBegin; 187 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 188 PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next"); 189 del = a_last->next; 190 a_last->next = del->next; 191 /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */ 192 /* could reuse n2 but PetscCDAppendNode sometimes uses it */ 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 /* PetscCDPrint 197 */ 198 PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm) 199 { 200 PetscCDIntNd *n, *n2; 201 PetscInt ii; 202 203 PetscFunctionBegin; 204 for (ii = 0; ii < ail->size; ii++) { 205 n2 = n = ail->array[ii]; 206 if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0)); 207 while (n) { 208 PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid)); 209 n = n->next; 210 } 211 if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n")); 212 } 213 PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx 218 */ 219 PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx) 220 { 221 PetscCDIntNd *n; 222 223 PetscFunctionBegin; 224 PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx); 225 PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx); 226 PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx); 227 n = ail->array[a_destidx]; 228 if (!n) ail->array[a_destidx] = ail->array[a_srcidx]; 229 else { 230 do { 231 if (!n->next) { 232 n->next = ail->array[a_srcidx]; // append 233 break; 234 } 235 n = n->next; 236 } while (1); 237 } 238 ail->array[a_srcidx] = NULL; // empty 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 /* PetscCDRemoveAllAt - empty one list and move data to cache 243 */ 244 PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx) 245 { 246 PetscCDIntNd *rem, *n1; 247 248 PetscFunctionBegin; 249 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 250 rem = ail->array[a_idx]; 251 ail->array[a_idx] = NULL; 252 if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem; 253 else { 254 while (n1->next) n1 = n1->next; 255 n1->next = rem; 256 } 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 /* PetscCDCountAt 261 */ 262 PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz) 263 { 264 PetscCDIntNd *n1; 265 PetscInt sz = 0; 266 267 PetscFunctionBegin; 268 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 269 n1 = ail->array[a_idx]; 270 while (n1) { 271 n1 = n1->next; 272 sz++; 273 } 274 *a_sz = sz; 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 /* PetscCDSize 279 */ 280 PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz) 281 { 282 PetscInt sz = 0; 283 284 PetscFunctionBegin; 285 for (int ii = 0; ii < ail->size; ii++) { 286 PetscCDIntNd *n1 = ail->array[ii]; 287 while (n1) { 288 n1 = n1->next; 289 sz++; 290 } 291 } 292 *a_sz = sz; 293 PetscFunctionReturn(PETSC_SUCCESS); 294 } 295 296 /* PetscCDIsEmptyAt - Is the list empty? (not used) 297 */ 298 PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e) 299 { 300 PetscFunctionBegin; 301 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 302 *a_e = (PetscBool)(ail->array[a_idx] == NULL); 303 PetscFunctionReturn(PETSC_SUCCESS); 304 } 305 306 /* PetscCDGetNonemptyIS - used for C-F methods 307 */ 308 PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis) 309 { 310 PetscCDIntNd *n; 311 PetscInt ii, kk; 312 PetscInt *permute; 313 314 PetscFunctionBegin; 315 for (ii = kk = 0; ii < ail->size; ii++) { 316 n = ail->array[ii]; 317 if (n) kk++; 318 } 319 PetscCall(PetscMalloc1(kk, &permute)); 320 for (ii = kk = 0; ii < ail->size; ii++) { 321 n = ail->array[ii]; 322 if (n) permute[kk++] = ii; 323 } 324 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 /* PetscCDGetMat 329 */ 330 PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat) 331 { 332 PetscFunctionBegin; 333 *a_mat = ail->mat; 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336 337 /* PetscCDSetMat 338 */ 339 PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat) 340 { 341 PetscFunctionBegin; 342 if (ail->mat) { 343 PetscCall(MatDestroy(&ail->mat)); //should not happen 344 } 345 ail->mat = a_mat; 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 /* PetscCDClearMat 350 */ 351 PetscErrorCode PetscCDClearMat(PetscCoarsenData *ail) 352 { 353 PetscFunctionBegin; 354 ail->mat = NULL; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers 359 */ 360 PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is) 361 { 362 PetscCDIntNd *n; 363 PetscInt lsz, ii, kk, *idxs, jj, gid; 364 IS *is_loc = NULL; 365 366 PetscFunctionBegin; 367 for (ii = kk = 0; ii < ail->size; ii++) { 368 if (ail->array[ii]) kk++; 369 } 370 *a_sz = kk; 371 PetscCall(PetscMalloc1(kk, &is_loc)); 372 for (ii = kk = 0; ii < ail->size; ii++) { 373 for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */ 374 ; 375 if (lsz) { 376 PetscCall(PetscMalloc1(a_bs * lsz, &idxs)); 377 for (lsz = 0, n = ail->array[ii]; n; n = n->next) { 378 PetscCall(PetscCDIntNdGetID(n, &gid)); 379 for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj; 380 } 381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++])); 382 } 383 } 384 PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk); 385 *a_local_is = is_loc; /* out */ 386 PetscFunctionReturn(PETSC_SUCCESS); 387 } 388 389 /* edge for priority queue */ 390 typedef struct edge_tag { 391 PetscReal weight; 392 PetscInt lid0, gid1, ghost1_idx; 393 } Edge; 394 395 #define MY_MEPS (PETSC_MACHINE_EPSILON * 100) 396 static int gamg_hem_compare(const void *a, const void *b) 397 { 398 PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight; 399 return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */ 400 } 401 402 /* 403 MatCoarsenApply_HEM_private - parallel heavy edge matching 404 405 Input Parameter: 406 . a_Gmat - global matrix of the graph 407 . n_iter - number of matching iterations 408 . threshold - threshold for filtering graphs 409 410 Output Parameter: 411 . a_locals_llist - array of list of local nodes rooted at local node 412 */ 413 static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist) 414 { 415 #define REQ_BF_SIZE 100 416 PetscBool isMPI; 417 MPI_Comm comm; 418 PetscInt ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0; 419 PetscMPIInt rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe; 420 const PetscInt nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt)); 421 PetscInt *lid_cprowID; 422 PetscBool *lid_matched; 423 Mat_SeqAIJ *matA, *matB = NULL; 424 Mat_MPIAIJ *mpimat = NULL; 425 PetscScalar one = 1.; 426 PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL; 427 Mat cMat, tMat, P; 428 MatScalar *ap; 429 IS info_is; 430 431 PetscFunctionBegin; 432 PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm)); 433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 434 PetscCallMPI(MPI_Comm_size(comm, &size)); 435 PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend)); 436 PetscCall(ISCreate(comm, &info_is)); 437 PetscCall(PetscInfo(info_is, "Start %" PetscInt_FMT " iterations of HEM.\n", n_iter)); 438 439 PetscCall(PetscMalloc1(nloc, &lid_matched)); 440 PetscCall(PetscMalloc1(nloc, &lid_cprowID)); 441 PetscCall(PetscMalloc1(nloc, &lid_max_pe)); 442 443 PetscCall(PetscCDCreate(nloc, &agg_llists)); 444 PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1)); 445 *a_locals_llist = agg_llists; 446 /* add self to all lists */ 447 for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk)); 448 /* make a copy of the graph, this gets destroyed in iterates */ 449 PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat)); 450 PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat)); 451 isMPI = (PetscBool)(size > 1); 452 if (isMPI) { 453 /* list of deleted ghosts, should compress this */ 454 PetscCall(PetscCDCreate(size, &ghost_deleted_list)); 455 PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100)); 456 } 457 for (int iter = 0; iter < n_iter; iter++) { 458 const PetscScalar *lghost_max_ew, *lid_max_ew; 459 PetscBool *lghost_matched; 460 PetscMPIInt *lghost_pe, *lghost_max_pe; 461 Vec locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE; 462 PetscInt *lghost_gid, nEdges, nEdges0, num_ghosts = 0; 463 Edge *Edges; 464 const int n_sub_its = 1000; // in case of a bug, stop at some point 465 /* get submatrices of cMat */ 466 for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1; 467 if (isMPI) { 468 mpimat = (Mat_MPIAIJ *)cMat->data; 469 matA = (Mat_SeqAIJ *)mpimat->A->data; 470 matB = (Mat_SeqAIJ *)mpimat->B->data; 471 if (!matB->compressedrow.use) { 472 /* force construction of compressed row data structure since code below requires it */ 473 PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0)); 474 } 475 /* set index into compressed row 'lid_cprowID' */ 476 for (ix = 0; ix < matB->compressedrow.nrows; ix++) { 477 PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix]; 478 if (ridx[ix] >= 0) lid_cprowID[lid] = ix; 479 } 480 } else { 481 matA = (Mat_SeqAIJ *)cMat->data; 482 } 483 /* set matched flags: true for empty list */ 484 for (int kk = 0; kk < nloc; kk++) { 485 PetscCall(PetscCDCountAt(agg_llists, kk, &ix)); 486 if (ix > 0) lid_matched[kk] = PETSC_FALSE; 487 else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched 488 } 489 /* max edge and pe vecs */ 490 PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL)); 491 PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL)); 492 /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */ 493 if (isMPI) { 494 Vec vec; 495 PetscScalar vval; 496 const PetscScalar *buf; 497 PetscCall(MatCreateVecs(cMat, &vec, NULL)); 498 PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); 499 /* lghost_matched */ 500 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 501 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 502 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); 503 } 504 PetscCall(VecAssemblyBegin(vec)); 505 PetscCall(VecAssemblyEnd(vec)); 506 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 507 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 508 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 509 PetscCall(PetscMalloc1(num_ghosts, &lghost_matched)); 510 for (int kk = 0; kk < num_ghosts; kk++) { 511 lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now 512 } 513 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 514 /* lghost_pe */ 515 vval = (PetscScalar)(rank); 516 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 517 PetscCall(VecAssemblyBegin(vec)); 518 PetscCall(VecAssemblyEnd(vec)); 519 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 520 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 521 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 522 PetscCall(PetscMalloc1(num_ghosts, &lghost_pe)); 523 for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now 524 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 525 /* lghost_gid */ 526 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 527 vval = (PetscScalar)(gid); 528 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 529 } 530 PetscCall(VecAssemblyBegin(vec)); 531 PetscCall(VecAssemblyEnd(vec)); 532 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 533 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 534 PetscCall(VecDestroy(&vec)); 535 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */ 536 PetscCall(PetscMalloc1(num_ghosts, &lghost_gid)); 537 for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]); 538 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 539 } 540 // get 'comm_procs' (could hoist) 541 for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1; 542 for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) { 543 PetscMPIInt proc = lghost_pe[ix], idx = -1; 544 for (int k = 0; k < ncomm_procs && idx == -1; k++) 545 if (comm_procs[k] == proc) idx = k; 546 if (idx == -1) { comm_procs[ncomm_procs++] = proc; } 547 PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs); 548 } 549 /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */ 550 nEdges0 = 0; 551 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 552 PetscReal max_e = 0., tt; 553 PetscScalar vval; 554 PetscInt lid = kk, max_pe = rank, pe, n; 555 ii = matA->i; 556 n = ii[lid + 1] - ii[lid]; 557 aj = PetscSafePointerPlusOffset(matA->j, ii[lid]); 558 ap = PetscSafePointerPlusOffset(matA->a, ii[lid]); 559 for (int jj = 0; jj < n; jj++) { 560 PetscInt lidj = aj[jj]; 561 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 562 if (tt > max_e) max_e = tt; 563 if (lidj > lid) nEdges0++; 564 } 565 } 566 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 567 ii = matB->compressedrow.i; 568 n = ii[ix + 1] - ii[ix]; 569 ap = matB->a + ii[ix]; 570 aj = matB->j + ii[ix]; 571 for (int jj = 0; jj < n; jj++) { 572 if ((tt = PetscRealPart(ap[jj])) > threshold) { 573 if (tt > max_e) max_e = tt; 574 nEdges0++; 575 if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe; 576 } 577 } 578 } 579 vval = max_e; 580 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); 581 vval = (PetscScalar)max_pe; 582 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 583 if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate 584 lid_matched[lid] = PETSC_TRUE; 585 if (bc_agg == -1) { 586 bc_agg = lid; 587 PetscCall(PetscCDCreate(1, &bc_list)); 588 } 589 PetscCall(PetscCDRemoveAllAt(agg_llists, lid)); 590 PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid)); 591 } 592 } 593 PetscCall(VecAssemblyBegin(locMaxEdge)); 594 PetscCall(VecAssemblyEnd(locMaxEdge)); 595 PetscCall(VecAssemblyBegin(locMaxPE)); 596 PetscCall(VecAssemblyEnd(locMaxPE)); 597 /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */ 598 if (mpimat) { 599 const PetscScalar *buf; 600 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge)); 601 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 602 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 603 604 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE)); 605 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 606 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 607 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 608 PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe)); 609 for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 610 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 611 } 612 { // make lid_max_pe 613 const PetscScalar *buf; 614 PetscCall(VecGetArrayRead(locMaxPE, &buf)); 615 for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 616 PetscCall(VecRestoreArrayRead(locMaxPE, &buf)); 617 } 618 /* setup sorted list of edges, and make 'Edges' */ 619 PetscCall(PetscMalloc1(nEdges0, &Edges)); 620 nEdges = 0; 621 for (int kk = 0, n; kk < nloc; kk++) { 622 const PetscInt lid = kk; 623 PetscReal tt; 624 ii = matA->i; 625 n = ii[lid + 1] - ii[lid]; 626 aj = PetscSafePointerPlusOffset(matA->j, ii[lid]); 627 ap = PetscSafePointerPlusOffset(matA->a, ii[lid]); 628 for (int jj = 0; jj < n; jj++) { 629 PetscInt lidj = aj[jj]; 630 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 631 if (lidj > lid) { 632 Edges[nEdges].lid0 = lid; 633 Edges[nEdges].gid1 = lidj + my0; 634 Edges[nEdges].ghost1_idx = -1; 635 Edges[nEdges].weight = tt; 636 nEdges++; 637 } 638 } 639 } 640 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */ 641 ii = matB->compressedrow.i; 642 n = ii[ix + 1] - ii[ix]; 643 ap = matB->a + ii[ix]; 644 aj = matB->j + ii[ix]; 645 for (int jj = 0; jj < n; jj++) { 646 if ((tt = PetscRealPart(ap[jj])) > threshold) { 647 Edges[nEdges].lid0 = lid; 648 Edges[nEdges].gid1 = lghost_gid[aj[jj]]; 649 Edges[nEdges].ghost1_idx = aj[jj]; 650 Edges[nEdges].weight = tt; 651 nEdges++; 652 } 653 } 654 } 655 } 656 PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges); 657 if (Edges) qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare); 658 659 PetscCall(PetscInfo(info_is, "[%d] HEM iteration %d with %d edges\n", rank, iter, (int)nEdges)); 660 661 /* projection matrix */ 662 PetscCall(MatCreate(comm, &P)); 663 PetscCall(MatSetType(P, MATAIJ)); 664 PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 665 PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL)); 666 PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL)); 667 PetscCall(MatSetUp(P)); 668 /* process - communicate - process */ 669 for (int sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) { 670 PetscInt nactive_edges = 0, n_act_n[3], gn_act_n[3]; 671 PetscMPIInt tag1, tag2; 672 PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew)); 673 if (isMPI) { 674 PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew)); 675 PetscCall(PetscCommGetNewTag(comm, &tag1)); 676 PetscCall(PetscCommGetNewTag(comm, &tag2)); 677 } 678 for (int kk = 0; kk < nEdges; kk++) { 679 /* HEM */ 680 const Edge *e = &Edges[kk]; 681 const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0; 682 PetscBool isOK = PETSC_TRUE, print = PETSC_FALSE; 683 if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1]))); 684 /* skip if either vertex is matched already */ 685 if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue; 686 687 nactive_edges++; 688 PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0])); 689 if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight))); 690 // smaller edge, lid_max_ew get updated - e0 691 if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) { 692 if (print) 693 PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]), 694 (double)e->weight)); 695 continue; // we are basically filter edges here 696 } 697 // e1 - local 698 if (ghost1_idx == -1) { 699 if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) { 700 if (print) 701 PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight))); 702 continue; // we are basically filter edges here 703 } 704 } else { // e1 - ghost 705 /* see if edge might get matched on other proc */ 706 PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]); 707 if (print) 708 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]), 709 (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx])); 710 if (g_max_e1 > e->weight + MY_MEPS) { 711 /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 712 continue; 713 } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed? 714 /* check for max_ea == to this edge and larger processor that will deal with this */ 715 if (print) 716 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, 717 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight)); 718 isOK = PETSC_FALSE; // this guy could delete me 719 continue; 720 } else { 721 /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 722 } 723 } 724 /* check ghost for v0 */ 725 if (isOK) { 726 PetscReal max_e, ew; 727 if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */ 728 int n; 729 ii = matB->compressedrow.i; 730 n = ii[ix + 1] - ii[ix]; 731 ap = matB->a + ii[ix]; 732 aj = matB->j + ii[ix]; 733 for (int jj = 0; jj < n && isOK; jj++) { 734 PetscInt lidj = aj[jj]; 735 if (lghost_matched[lidj]) continue; 736 ew = PetscRealPart(ap[jj]); 737 if (ew <= threshold) continue; 738 max_e = PetscRealPart(lghost_max_ew[lidj]); 739 /* check for max_e == to this edge and larger processor that will deal with this */ 740 if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE; 741 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e. ncols = %d, gid0 = %d, gid1 = %d", (double)PetscRealPart(ew), (double)PetscRealPart(max_e), (int)n, (int)(lid0 + my0), (int)lghost_gid[lidj]); 742 if (print) 743 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS)); 744 } 745 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 746 } 747 /* check local v1 */ 748 if (ghost1_idx == -1) { 749 if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */ 750 int n; 751 ii = matB->compressedrow.i; 752 n = ii[ix + 1] - ii[ix]; 753 ap = matB->a + ii[ix]; 754 aj = matB->j + ii[ix]; 755 for (int jj = 0; jj < n && isOK; jj++) { 756 PetscInt lidj = aj[jj]; 757 if (lghost_matched[lidj]) continue; 758 ew = PetscRealPart(ap[jj]); 759 if (ew <= threshold) continue; 760 max_e = PetscRealPart(lghost_max_ew[lidj]); 761 /* check for max_e == to this edge and larger processor that will deal with this */ 762 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE; 763 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e)); 764 if (print) 765 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj])); 766 } 767 } 768 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 769 } 770 } 771 PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx])); 772 if (print) 773 PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 weight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK)); 774 /* do it */ 775 if (isOK) { 776 if (ghost1_idx == -1) { 777 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1); 778 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 779 PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's 780 } else { 781 /* add gid1 to list of ghost deleted by me -- I need their children */ 782 PetscMPIInt proc = lghost_pe[ghost1_idx]; 783 PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]); 784 lghost_matched[ghost1_idx] = PETSC_TRUE; 785 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */ 786 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0)); 787 } 788 lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */ 789 /* set projection */ 790 PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES)); 791 PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES)); 792 //PetscCall(PetscPrintf(comm,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1)); 793 } /* matched */ 794 } /* edge loop */ 795 PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 796 if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 797 PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew)); 798 // count active for test, latter, update deleted ghosts 799 n_act_n[0] = nactive_edges; 800 if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2])); 801 else n_act_n[2] = 0; 802 PetscCall(PetscCDCount(agg_llists, &n_act_n[1])); 803 PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm)); 804 PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1])); 805 /* deal with deleted ghost */ 806 if (isMPI) { 807 PetscCDIntNd *pos; 808 PetscInt *sbuffs1[REQ_BF_SIZE], ndel; 809 PetscInt *sbuffs2[REQ_BF_SIZE]; 810 MPI_Status status; 811 812 /* send deleted ghosts */ 813 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 814 const PetscMPIInt proc = comm_procs[proc_idx]; 815 PetscInt *sbuff, *pt, scount; 816 MPI_Request *request; 817 /* count ghosts */ 818 PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel)); 819 ndel /= 2; // two entries for each proc 820 scount = 2 + 2 * ndel; 821 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 822 /* save requests */ 823 sbuffs1[proc_idx] = sbuff; 824 request = (MPI_Request *)sbuff; 825 sbuff = pt = (PetscInt *)(sbuff + request_size); 826 /* write [ndel, proc, n*[gid1,gid0] */ 827 *pt++ = ndel; // number of deleted to send 828 *pt++ = rank; // proc (not used) 829 PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos)); 830 while (pos) { 831 PetscInt lid0, ghost_idx, gid1; 832 PetscCall(PetscCDIntNdGetID(pos, &ghost_idx)); 833 gid1 = lghost_gid[ghost_idx]; 834 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 835 PetscCall(PetscCDIntNdGetID(pos, &lid0)); 836 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 837 *pt++ = gid1; 838 *pt++ = lid0 + my0; // gid0 839 } 840 PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff)); 841 /* MPI_Isend: tag1 [ndel, proc, n*[gid1,gid0] ] */ 842 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request)); 843 PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list 844 } 845 /* receive deleted, send back partial aggregates, clear lists */ 846 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 847 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status)); 848 { 849 PetscInt *pt, *pt2, *pt3, *sbuff, tmp; 850 MPI_Request *request; 851 int rcount, scount, ndel; 852 const PetscMPIInt proc = status.MPI_SOURCE; 853 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 854 if (rcount > rbuff_sz) { 855 if (rbuff) PetscCall(PetscFree(rbuff)); 856 PetscCall(PetscMalloc1(rcount, &rbuff)); 857 rbuff_sz = rcount; 858 } 859 /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */ 860 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status)); 861 /* read and count sends *[lid0, n, n*[gid] ] */ 862 pt = rbuff; 863 scount = 0; 864 ndel = *pt++; // number of deleted to recv 865 tmp = *pt++; // proc (not used) 866 while (ndel--) { 867 PetscInt gid1 = *pt++, lid1 = gid1 - my0; 868 int gh_gid0 = *pt++; // gid on other proc (not used here to count) 869 PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1); 870 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc); 871 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 872 PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n 873 /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */ 874 scount += tmp + 2; // lid0, n, n*[gid] 875 } 876 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount); 877 /* send tag2: *[gid0, n, n*[gid] ] */ 878 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 879 sbuffs2[proc_idx] = sbuff; /* cache request */ 880 request = (MPI_Request *)sbuff; 881 pt2 = sbuff = sbuff + request_size; 882 // read again: n, proc, n*[gid1,gid0] 883 pt = rbuff; 884 ndel = *pt++; 885 tmp = *pt++; // proc (not used) 886 while (ndel--) { 887 PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++; 888 /* write [gid0, aggSz, aggSz[gid] ] */ 889 *pt2++ = gh_gid0; 890 pt3 = pt2++; /* save pointer for later */ 891 PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos)); 892 while (pos) { 893 PetscInt gid; 894 PetscCall(PetscCDIntNdGetID(pos, &gid)); 895 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos)); 896 *pt2++ = gid; 897 } 898 *pt3 = (pt2 - pt3) - 1; 899 /* clear list */ 900 PetscCall(PetscCDRemoveAllAt(agg_llists, lid1)); 901 } 902 PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount); 903 /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */ 904 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request)); 905 } 906 } // proc_idx 907 /* receive tag2 *[gid0, n, n*[gid] ] */ 908 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 909 PetscMPIInt proc; 910 PetscInt *pt; 911 int rcount; 912 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status)); 913 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 914 if (rcount > rbuff_sz) { 915 if (rbuff) PetscCall(PetscFree(rbuff)); 916 PetscCall(PetscMalloc1(rcount, &rbuff)); 917 rbuff_sz = rcount; 918 } 919 proc = status.MPI_SOURCE; 920 /* MPI_Recv: tag1 [n, proc, n*[gid1,lid0] ] */ 921 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status)); 922 pt = rbuff; 923 while (pt - rbuff < rcount) { 924 PetscInt gid0 = *pt++, n = *pt++; 925 while (n--) { 926 PetscInt gid1 = *pt++; 927 PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1)); 928 } 929 } 930 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount); 931 } 932 /* wait for tag1 isends */ 933 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 934 MPI_Request *request; 935 request = (MPI_Request *)sbuffs1[proc_idx]; 936 PetscCallMPI(MPI_Wait(request, &status)); 937 PetscCall(PetscFree(sbuffs1[proc_idx])); 938 } 939 /* wait for tag2 isends */ 940 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 941 MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx]; 942 PetscCallMPI(MPI_Wait(request, &status)); 943 PetscCall(PetscFree(sbuffs2[proc_idx])); 944 } 945 } /* MPI */ 946 /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */ 947 if (isMPI) { 948 const PetscScalar *sbuff; 949 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 950 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 951 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 952 } 953 PetscCall(VecAssemblyBegin(locMaxEdge)); 954 PetscCall(VecAssemblyEnd(locMaxEdge)); 955 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 956 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 957 PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff)); 958 for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); } 959 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff)); 960 } 961 /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */ 962 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 963 PetscReal max_e = 0., tt; 964 PetscScalar vval; 965 const PetscInt lid = kk; 966 int max_pe = rank, pe, n; 967 ii = matA->i; 968 n = ii[lid + 1] - ii[lid]; 969 aj = PetscSafePointerPlusOffset(matA->j, ii[lid]); 970 ap = PetscSafePointerPlusOffset(matA->a, ii[lid]); 971 for (int jj = 0; jj < n; jj++) { 972 PetscInt lidj = aj[jj]; 973 if (lid_matched[lidj]) continue; /* this is new - can change local max */ 974 if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]); 975 } 976 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 977 ii = matB->compressedrow.i; 978 n = ii[ix + 1] - ii[ix]; 979 ap = matB->a + ii[ix]; 980 aj = matB->j + ii[ix]; 981 for (int jj = 0; jj < n; jj++) { 982 PetscInt lidj = aj[jj]; 983 if (lghost_matched[lidj]) continue; 984 if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt; 985 } 986 } 987 vval = (PetscScalar)max_e; 988 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 989 // max PE with max edge 990 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 991 ii = matB->compressedrow.i; 992 n = ii[ix + 1] - ii[ix]; 993 ap = matB->a + ii[ix]; 994 aj = matB->j + ii[ix]; 995 for (int jj = 0; jj < n; jj++) { 996 PetscInt lidj = aj[jj]; 997 if (lghost_matched[lidj]) continue; 998 if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; } 999 } 1000 } 1001 vval = (PetscScalar)max_pe; 1002 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 1003 } 1004 PetscCall(VecAssemblyBegin(locMaxEdge)); 1005 PetscCall(VecAssemblyEnd(locMaxEdge)); 1006 PetscCall(VecAssemblyBegin(locMaxPE)); 1007 PetscCall(VecAssemblyEnd(locMaxPE)); 1008 /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/ 1009 if (isMPI) { 1010 const PetscScalar *buf; 1011 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1012 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1013 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1014 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1015 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 1016 for (int kk = 0; kk < num_ghosts; kk++) { 1017 lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 1018 } 1019 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 1020 } 1021 // if no active edges, stop 1022 if (gn_act_n[0] < 1) break; 1023 // inc and check (self stopping iteration 1024 PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %d/%d", sub_it + 1, n_sub_its); 1025 sub_it++; 1026 PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its); 1027 old_num_edge = gn_act_n[0]; 1028 } /* sub_it loop */ 1029 /* clean up iteration */ 1030 PetscCall(PetscFree(Edges)); 1031 if (mpimat) { // can be hoisted 1032 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 1033 PetscCall(VecDestroy(&ghostMaxEdge)); 1034 PetscCall(VecDestroy(&ghostMaxPE)); 1035 PetscCall(PetscFree(lghost_pe)); 1036 PetscCall(PetscFree(lghost_gid)); 1037 PetscCall(PetscFree(lghost_matched)); 1038 PetscCall(PetscFree(lghost_max_pe)); 1039 } 1040 PetscCall(VecDestroy(&locMaxEdge)); 1041 PetscCall(VecDestroy(&locMaxPE)); 1042 /* create next graph */ 1043 { 1044 Vec diag; 1045 /* add identity for unmatched vertices so they stay alive */ 1046 for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) { 1047 if (!lid_matched[kk]) { 1048 const PetscInt lid = kk; 1049 PetscCDIntNd *pos; 1050 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1051 PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid); 1052 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1053 PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid); 1054 PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES)); 1055 } 1056 } 1057 PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY)); 1058 PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY)); 1059 1060 /* project to make new graph with collapsed edges */ 1061 PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat)); 1062 PetscCall(MatDestroy(&P)); 1063 PetscCall(MatDestroy(&cMat)); 1064 cMat = tMat; 1065 PetscCall(MatCreateVecs(cMat, &diag, NULL)); 1066 PetscCall(MatGetDiagonal(cMat, diag)); 1067 PetscCall(VecReciprocal(diag)); 1068 PetscCall(VecSqrtAbs(diag)); 1069 PetscCall(MatDiagonalScale(cMat, diag, diag)); 1070 PetscCall(VecDestroy(&diag)); 1071 } 1072 } /* coarsen iterator */ 1073 1074 /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */ 1075 if (size > 1) { 1076 Mat mat; 1077 PetscCDIntNd *pos; 1078 PetscInt NN, MM, jj = 0, mxsz = 0; 1079 1080 for (int kk = 0; kk < nloc; kk++) { 1081 PetscCall(PetscCDCountAt(agg_llists, kk, &jj)); 1082 if (jj > mxsz) mxsz = jj; 1083 } 1084 PetscCall(MatGetSize(a_Gmat, &MM, &NN)); 1085 if (mxsz > MM - nloc) mxsz = MM - nloc; 1086 /* matrix of ghost adj for square graph */ 1087 PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat)); 1088 for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) { 1089 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1090 while (pos) { 1091 PetscInt gid1; 1092 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1093 PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos)); 1094 if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES)); 1095 } 1096 } 1097 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1098 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1099 PetscCall(PetscCDSetMat(agg_llists, mat)); 1100 PetscCall(PetscCDDestroy(ghost_deleted_list)); 1101 if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true 1102 } 1103 // move BCs into some node 1104 if (bc_list) { 1105 PetscCDIntNd *pos; 1106 PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos)); 1107 while (pos) { 1108 PetscInt gid1; 1109 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1110 PetscCall(PetscCDGetNextPos(bc_list, 0, &pos)); 1111 PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1)); 1112 } 1113 PetscCall(PetscCDRemoveAllAt(bc_list, 0)); 1114 PetscCall(PetscCDDestroy(bc_list)); 1115 } 1116 { 1117 // check sizes -- all vertices must get in graph 1118 PetscInt sz, globalsz, MM; 1119 PetscCall(MatGetSize(a_Gmat, &MM, NULL)); 1120 PetscCall(PetscCDCount(agg_llists, &sz)); 1121 PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm)); 1122 PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz)); 1123 } 1124 // cleanup 1125 PetscCall(MatDestroy(&cMat)); 1126 PetscCall(PetscFree(lid_cprowID)); 1127 PetscCall(PetscFree(lid_max_pe)); 1128 PetscCall(PetscFree(lid_matched)); 1129 PetscCall(ISDestroy(&info_is)); 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 /* 1134 HEM coarsen, simple greedy. 1135 */ 1136 static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse) 1137 { 1138 Mat mat = coarse->graph; 1139 1140 PetscFunctionBegin; 1141 PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer) 1146 { 1147 PetscMPIInt rank; 1148 PetscBool iascii; 1149 1150 PetscFunctionBegin; 1151 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank)); 1152 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1153 if (iascii) { 1154 PetscCDIntNd *pos, *pos2; 1155 PetscViewerFormat format; 1156 1157 PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold)); 1158 PetscCall(PetscViewerGetFormat(viewer, &format)); 1159 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1160 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1161 for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) { 1162 PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos)); 1163 if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk)); 1164 while (pos) { 1165 PetscInt gid1; 1166 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1167 PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos)); 1168 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1)); 1169 } 1170 if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1171 } 1172 PetscCall(PetscViewerFlush(viewer)); 1173 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1174 } 1175 } 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 /* 1180 MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener 1181 */ 1182 PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse) 1183 { 1184 PetscFunctionBegin; 1185 coarse->ops->apply = MatCoarsenApply_HEM; 1186 coarse->ops->view = MatCoarsenView_HEM; 1187 coarse->max_it = 4; 1188 PetscFunctionReturn(PETSC_SUCCESS); 1189 } 1190