1 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> 4 5 /* linked list methods 6 * 7 * PetscCDCreate 8 */ 9 PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out) 10 { 11 PetscCoarsenData *ail; 12 13 PetscFunctionBegin; 14 /* allocate pool, partially */ 15 PetscCall(PetscNew(&ail)); 16 *a_out = ail; 17 ail->pool_list.next = NULL; 18 ail->pool_list.array = NULL; 19 ail->chk_sz = 0; 20 /* allocate array */ 21 ail->size = a_size; 22 PetscCall(PetscCalloc1(a_size, &ail->array)); 23 ail->extra_nodes = NULL; 24 ail->mat = NULL; 25 PetscFunctionReturn(PETSC_SUCCESS); 26 } 27 28 /* PetscCDDestroy 29 */ 30 PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail) 31 { 32 PetscCDArrNd *n = &ail->pool_list; 33 34 PetscFunctionBegin; 35 n = n->next; 36 while (n) { 37 PetscCDArrNd *lstn = n; 38 n = n->next; 39 PetscCall(PetscFree(lstn)); 40 } 41 if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array)); 42 PetscCall(PetscFree(ail->array)); 43 if (ail->mat) PetscCall(MatDestroy(&ail->mat)); 44 /* delete this (+agg+pool array) */ 45 PetscCall(PetscFree(ail)); 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } 48 49 /* PetscCDSetChunkSize 50 */ 51 PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz) 52 { 53 PetscFunctionBegin; 54 ail->chk_sz = a_sz; 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 /* PetscCDGetNewNode 59 */ 60 static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id) 61 { 62 PetscFunctionBegin; 63 *a_out = NULL; /* squelch -Wmaybe-uninitialized */ 64 if (ail->extra_nodes) { 65 PetscCDIntNd *node = ail->extra_nodes; 66 ail->extra_nodes = node->next; 67 node->gid = a_id; 68 node->next = NULL; 69 *a_out = node; 70 } else { 71 if (!ail->pool_list.array) { 72 if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */ 73 PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array)); 74 ail->new_node = ail->pool_list.array; 75 ail->new_left = ail->chk_sz; 76 ail->new_node->next = NULL; 77 } else if (!ail->new_left) { 78 PetscCDArrNd *node; 79 PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node)); 80 node->array = (PetscCDIntNd *)(node + 1); 81 node->next = ail->pool_list.next; 82 ail->pool_list.next = node; 83 ail->new_left = ail->chk_sz; 84 ail->new_node = node->array; 85 } 86 ail->new_node->gid = a_id; 87 ail->new_node->next = NULL; 88 *a_out = ail->new_node++; 89 ail->new_left--; 90 } 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 94 /* PetscCDIntNdSetID 95 */ 96 PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id) 97 { 98 PetscFunctionBegin; 99 a_this->gid = a_id; 100 PetscFunctionReturn(PETSC_SUCCESS); 101 } 102 103 /* PetscCDIntNdGetID 104 */ 105 PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid) 106 { 107 PetscFunctionBegin; 108 *a_gid = a_this->gid; 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /* PetscCDGetHeadPos 113 */ 114 PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos) 115 { 116 PetscFunctionBegin; 117 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx); 118 *pos = ail->array[a_idx]; 119 PetscFunctionReturn(PETSC_SUCCESS); 120 } 121 122 /* PetscCDGetNextPos 123 */ 124 PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos) 125 { 126 PetscFunctionBegin; 127 PetscCheck((*pos), PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position."); 128 *pos = (*pos)->next; 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 132 /* PetscCDAppendID 133 */ 134 PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id) 135 { 136 PetscCDIntNd *n, *n2; 137 138 PetscFunctionBegin; 139 PetscCall(PetscCDGetNewNode(ail, &n, a_id)); 140 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 141 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n; 142 else { 143 do { 144 if (!n2->next) { 145 n2->next = n; 146 PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next"); 147 break; 148 } 149 n2 = n2->next; 150 } while (n2); 151 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 152 } 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 /* PetscCDAppendNode 157 */ 158 PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n) 159 { 160 PetscCDIntNd *n2; 161 162 PetscFunctionBegin; 163 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 164 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n; 165 else { 166 do { 167 if (!n2->next) { 168 n2->next = a_n; 169 a_n->next = NULL; 170 break; 171 } 172 n2 = n2->next; 173 } while (n2); 174 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 175 } 176 PetscFunctionReturn(PETSC_SUCCESS); 177 } 178 179 /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used) 180 */ 181 PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last) 182 { 183 PetscCDIntNd *del; 184 185 PetscFunctionBegin; 186 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 187 PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next"); 188 del = a_last->next; 189 a_last->next = del->next; 190 /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */ 191 /* could reuse n2 but PetscCDAppendNode sometimes uses it */ 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 /* PetscCDPrint 196 */ 197 PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm) 198 { 199 PetscCDIntNd *n, *n2; 200 PetscInt ii; 201 202 PetscFunctionBegin; 203 for (ii = 0; ii < ail->size; ii++) { 204 n2 = n = ail->array[ii]; 205 if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0)); 206 while (n) { 207 PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid)); 208 n = n->next; 209 } 210 if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n")); 211 } 212 PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 213 PetscFunctionReturn(PETSC_SUCCESS); 214 } 215 216 /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx 217 */ 218 PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx) 219 { 220 PetscCDIntNd *n; 221 222 PetscFunctionBegin; 223 PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx); 224 PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx); 225 PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx); 226 n = ail->array[a_destidx]; 227 if (!n) ail->array[a_destidx] = ail->array[a_srcidx]; 228 else { 229 do { 230 if (!n->next) { 231 n->next = ail->array[a_srcidx]; // append 232 break; 233 } 234 n = n->next; 235 } while (1); 236 } 237 ail->array[a_srcidx] = NULL; // empty 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 /* PetscCDRemoveAllAt - empty one list and move data to cache 242 */ 243 PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx) 244 { 245 PetscCDIntNd *rem, *n1; 246 247 PetscFunctionBegin; 248 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 249 rem = ail->array[a_idx]; 250 ail->array[a_idx] = NULL; 251 if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem; 252 else { 253 while (n1->next) n1 = n1->next; 254 n1->next = rem; 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 /* PetscCDCountAt 260 */ 261 PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz) 262 { 263 PetscCDIntNd *n1; 264 PetscInt sz = 0; 265 266 PetscFunctionBegin; 267 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 268 n1 = ail->array[a_idx]; 269 while (n1) { 270 n1 = n1->next; 271 sz++; 272 } 273 *a_sz = sz; 274 PetscFunctionReturn(PETSC_SUCCESS); 275 } 276 277 /* PetscCDSize 278 */ 279 PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz) 280 { 281 PetscInt sz = 0; 282 283 PetscFunctionBegin; 284 for (int ii = 0; ii < ail->size; ii++) { 285 PetscCDIntNd *n1 = ail->array[ii]; 286 while (n1) { 287 n1 = n1->next; 288 sz++; 289 } 290 } 291 *a_sz = sz; 292 PetscFunctionReturn(PETSC_SUCCESS); 293 } 294 295 /* PetscCDIsEmptyAt - Is the list empty? (not used) 296 */ 297 PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e) 298 { 299 PetscFunctionBegin; 300 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 301 *a_e = (PetscBool)(ail->array[a_idx] == NULL); 302 PetscFunctionReturn(PETSC_SUCCESS); 303 } 304 305 /* PetscCDGetNonemptyIS - used for C-F methods 306 */ 307 PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis) 308 { 309 PetscCDIntNd *n; 310 PetscInt ii, kk; 311 PetscInt *permute; 312 313 PetscFunctionBegin; 314 for (ii = kk = 0; ii < ail->size; ii++) { 315 n = ail->array[ii]; 316 if (n) kk++; 317 } 318 PetscCall(PetscMalloc1(kk, &permute)); 319 for (ii = kk = 0; ii < ail->size; ii++) { 320 n = ail->array[ii]; 321 if (n) permute[kk++] = ii; 322 } 323 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 /* PetscCDGetMat 328 */ 329 PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat) 330 { 331 PetscFunctionBegin; 332 *a_mat = ail->mat; 333 ail->mat = NULL; // give it up 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336 337 /* PetscCDSetMat 338 */ 339 PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat) 340 { 341 PetscFunctionBegin; 342 if (ail->mat) { 343 PetscCall(MatDestroy(&ail->mat)); //should not happen 344 } 345 ail->mat = a_mat; 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers 350 */ 351 PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is) 352 { 353 PetscCDIntNd *n; 354 PetscInt lsz, ii, kk, *idxs, jj, gid; 355 IS *is_loc = NULL; 356 357 PetscFunctionBegin; 358 for (ii = kk = 0; ii < ail->size; ii++) { 359 if (ail->array[ii]) kk++; 360 } 361 *a_sz = kk; 362 PetscCall(PetscMalloc1(kk, &is_loc)); 363 for (ii = kk = 0; ii < ail->size; ii++) { 364 for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */ 365 ; 366 if (lsz) { 367 PetscCall(PetscMalloc1(a_bs * lsz, &idxs)); 368 for (lsz = 0, n = ail->array[ii]; n; n = n->next) { 369 PetscCall(PetscCDIntNdGetID(n, &gid)); 370 for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj; 371 } 372 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++])); 373 } 374 } 375 PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk); 376 *a_local_is = is_loc; /* out */ 377 378 PetscFunctionReturn(PETSC_SUCCESS); 379 } 380 381 /* edge for priority queue */ 382 typedef struct edge_tag { 383 PetscReal weight; 384 PetscInt lid0, gid1, ghost1_idx; 385 } Edge; 386 387 #define MY_MEPS (PETSC_MACHINE_EPSILON * 100) 388 static int gamg_hem_compare(const void *a, const void *b) 389 { 390 PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight; 391 return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */ 392 } 393 /* static int gamg_hem_compare3(const void *a, const void *b, void *ctx) */ 394 /* { */ 395 /* return gamg_hem_compare(a, b); */ 396 /* } */ 397 398 /* 399 MatCoarsenApply_HEM_private - parallel heavy edge matching 400 401 Input Parameter: 402 . a_Gmat - global matrix of the graph 403 . n_iter - number of matching iterations 404 . threshold - threshold for filtering graphs 405 406 Output Parameter: 407 . a_locals_llist - array of list of local nodes rooted at local node 408 */ 409 static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist) 410 { 411 #define REQ_BF_SIZE 100 412 PetscBool isMPI; 413 MPI_Comm comm; 414 PetscInt ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0; 415 PetscMPIInt rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe; 416 const PetscInt nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt)); 417 PetscInt *lid_cprowID; 418 PetscBool *lid_matched; 419 Mat_SeqAIJ *matA, *matB = NULL; 420 Mat_MPIAIJ *mpimat = NULL; 421 PetscScalar one = 1.; 422 PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL; 423 Mat cMat, tMat, P; 424 MatScalar *ap; 425 IS info_is; 426 427 PetscFunctionBegin; 428 PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm)); 429 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 430 PetscCallMPI(MPI_Comm_size(comm, &size)); 431 PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend)); 432 PetscCall(ISCreate(comm, &info_is)); 433 PetscCall(PetscInfo(info_is, "%" PetscInt_FMT " iterations of HEM.\n", n_iter)); 434 435 PetscCall(PetscMalloc1(nloc, &lid_matched)); 436 PetscCall(PetscMalloc1(nloc, &lid_cprowID)); 437 PetscCall(PetscMalloc1(nloc, &lid_max_pe)); 438 439 PetscCall(PetscCDCreate(nloc, &agg_llists)); 440 PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1)); 441 *a_locals_llist = agg_llists; 442 /* add self to all lists */ 443 for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk)); 444 /* make a copy of the graph, this gets destroyed in iterates */ 445 PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat)); 446 PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat)); 447 isMPI = (PetscBool)(size > 1); 448 if (isMPI) { 449 /* list of deleted ghosts, should compress this */ 450 PetscCall(PetscCDCreate(size, &ghost_deleted_list)); 451 PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100)); 452 } 453 for (int iter = 0; iter < n_iter; iter++) { 454 const PetscScalar *lghost_max_ew, *lid_max_ew; 455 PetscBool *lghost_matched; 456 PetscMPIInt *lghost_pe, *lghost_max_pe; 457 Vec locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE; 458 PetscInt *lghost_gid, nEdges, nEdges0, num_ghosts = 0; 459 Edge *Edges; 460 const int n_sub_its = 2500; // in case of a bug, stop at some point 461 /* get submatrices of cMat */ 462 for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1; 463 if (isMPI) { 464 mpimat = (Mat_MPIAIJ *)cMat->data; 465 matA = (Mat_SeqAIJ *)mpimat->A->data; 466 matB = (Mat_SeqAIJ *)mpimat->B->data; 467 if (!matB->compressedrow.use) { 468 /* force construction of compressed row data structure since code below requires it */ 469 PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0)); 470 } 471 /* set index into compressed row 'lid_cprowID' */ 472 for (ix = 0; ix < matB->compressedrow.nrows; ix++) { 473 PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix]; 474 if (ridx[ix] >= 0) lid_cprowID[lid] = ix; 475 else PetscCall(PetscPrintf(PETSC_COMM_SELF, "Missing slot in cprow? %d/%d ", (int)ix, (int)matB->compressedrow.nrows)); 476 } 477 } else { 478 matA = (Mat_SeqAIJ *)cMat->data; 479 } 480 /* set matched flags: true for empty list */ 481 for (int kk = 0; kk < nloc; kk++) { 482 PetscCall(PetscCDCountAt(agg_llists, kk, &ix)); 483 if (ix > 0) lid_matched[kk] = PETSC_FALSE; 484 else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched 485 } 486 /* max edge and pe vecs */ 487 PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL)); 488 PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL)); 489 /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */ 490 if (isMPI) { 491 Vec vec; 492 PetscScalar vval; 493 const PetscScalar *buf; 494 PetscCall(MatCreateVecs(cMat, &vec, NULL)); 495 PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); 496 /* lghost_matched */ 497 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 498 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 499 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); 500 } 501 PetscCall(VecAssemblyBegin(vec)); 502 PetscCall(VecAssemblyEnd(vec)); 503 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 504 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 505 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 506 PetscCall(PetscMalloc1(num_ghosts, &lghost_matched)); 507 for (int kk = 0; kk < num_ghosts; kk++) { 508 lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now 509 } 510 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 511 /* lghost_pe */ 512 vval = (PetscScalar)(rank); 513 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 514 PetscCall(VecAssemblyBegin(vec)); 515 PetscCall(VecAssemblyEnd(vec)); 516 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 517 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 518 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 519 PetscCall(PetscMalloc1(num_ghosts, &lghost_pe)); 520 for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now 521 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 522 /* lghost_gid */ 523 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 524 vval = (PetscScalar)(gid); 525 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 526 } 527 PetscCall(VecAssemblyBegin(vec)); 528 PetscCall(VecAssemblyEnd(vec)); 529 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 530 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 531 PetscCall(VecDestroy(&vec)); 532 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */ 533 PetscCall(PetscMalloc1(num_ghosts, &lghost_gid)); 534 for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]); 535 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 536 } 537 // get 'comm_procs' (could hoist) 538 for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1; 539 for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) { 540 PetscMPIInt proc = lghost_pe[ix], idx = -1; 541 for (int k = 0; k < ncomm_procs && idx == -1; k++) 542 if (comm_procs[k] == proc) idx = k; 543 if (idx == -1) { comm_procs[ncomm_procs++] = proc; } 544 PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs); 545 } 546 /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */ 547 nEdges0 = 0; 548 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 549 PetscReal max_e = 0., tt; 550 PetscScalar vval; 551 PetscInt lid = kk, max_pe = rank, pe, n; 552 ii = matA->i; 553 n = ii[lid + 1] - ii[lid]; 554 aj = matA->j + ii[lid]; 555 ap = matA->a + ii[lid]; 556 for (int jj = 0; jj < n; jj++) { 557 PetscInt lidj = aj[jj]; 558 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 559 if (tt > max_e) max_e = tt; 560 if (lidj > lid) nEdges0++; 561 } 562 } 563 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 564 ii = matB->compressedrow.i; 565 n = ii[ix + 1] - ii[ix]; 566 ap = matB->a + ii[ix]; 567 aj = matB->j + ii[ix]; 568 for (int jj = 0; jj < n; jj++) { 569 if ((tt = PetscRealPart(ap[jj])) > threshold) { 570 if (tt > max_e) max_e = tt; 571 nEdges0++; 572 if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe; 573 } 574 } 575 } 576 vval = max_e; 577 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); 578 vval = (PetscScalar)max_pe; 579 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 580 if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate 581 lid_matched[lid] = PETSC_TRUE; 582 if (bc_agg == -1) { 583 bc_agg = lid; 584 PetscCall(PetscCDCreate(1, &bc_list)); 585 } 586 PetscCall(PetscCDRemoveAllAt(agg_llists, lid)); 587 PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid)); 588 } 589 } 590 PetscCall(VecAssemblyBegin(locMaxEdge)); 591 PetscCall(VecAssemblyEnd(locMaxEdge)); 592 PetscCall(VecAssemblyBegin(locMaxPE)); 593 PetscCall(VecAssemblyEnd(locMaxPE)); 594 /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */ 595 if (mpimat) { 596 const PetscScalar *buf; 597 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge)); 598 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 599 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 600 601 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE)); 602 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 603 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 604 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 605 PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe)); 606 for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 607 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 608 } 609 { // make lid_max_pe 610 const PetscScalar *buf; 611 PetscCall(VecGetArrayRead(locMaxPE, &buf)); 612 for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 613 PetscCall(VecRestoreArrayRead(locMaxPE, &buf)); 614 } 615 /* setup sorted list of edges, and make 'Edges' */ 616 PetscCall(PetscMalloc1(nEdges0, &Edges)); 617 nEdges = 0; 618 for (int kk = 0, n; kk < nloc; kk++) { 619 const PetscInt lid = kk; 620 PetscReal tt; 621 ii = matA->i; 622 n = ii[lid + 1] - ii[lid]; 623 aj = matA->j + ii[lid]; 624 ap = matA->a + ii[lid]; 625 for (int jj = 0; jj < n; jj++) { 626 PetscInt lidj = aj[jj]; 627 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 628 if (lidj > lid) { 629 Edges[nEdges].lid0 = lid; 630 Edges[nEdges].gid1 = lidj + my0; 631 Edges[nEdges].ghost1_idx = -1; 632 Edges[nEdges].weight = tt; 633 nEdges++; 634 } 635 } 636 } 637 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */ 638 ii = matB->compressedrow.i; 639 n = ii[ix + 1] - ii[ix]; 640 ap = matB->a + ii[ix]; 641 aj = matB->j + ii[ix]; 642 for (int jj = 0; jj < n; jj++) { 643 if ((tt = PetscRealPart(ap[jj])) > threshold) { 644 Edges[nEdges].lid0 = lid; 645 Edges[nEdges].gid1 = lghost_gid[aj[jj]]; 646 Edges[nEdges].ghost1_idx = aj[jj]; 647 Edges[nEdges].weight = tt; 648 nEdges++; 649 } 650 } 651 } 652 } 653 PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges); 654 qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare); 655 656 PetscCall(PetscInfo(info_is, "[%d] start HEM iteration %d with number edges=%d\n", rank, iter, (int)nEdges)); 657 658 /* projection matrix */ 659 PetscCall(MatCreate(comm, &P)); 660 PetscCall(MatSetType(P, MATAIJ)); 661 PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 662 PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL)); 663 PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL)); 664 PetscCall(MatSetUp(P)); 665 /* process - communicate - process */ 666 for (int sub_it = 0; /* sub_it < n_sub_its */; /* sub_it++ */) { 667 PetscInt nactive_edges = 0, n_act_n[3], gn_act_n[3]; 668 PetscMPIInt tag1, tag2; 669 PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew)); 670 if (isMPI) { 671 PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew)); 672 PetscCall(PetscCommGetNewTag(comm, &tag1)); 673 PetscCall(PetscCommGetNewTag(comm, &tag2)); 674 } 675 for (int kk = 0; kk < nEdges; kk++) { 676 /* HEM */ 677 const Edge *e = &Edges[kk]; 678 const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0; 679 PetscBool isOK = PETSC_TRUE, print = PETSC_FALSE; 680 if (print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1]))); 681 /* skip if either vertex is matched already */ 682 if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue; 683 684 nactive_edges++; 685 PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0])); 686 if (print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight))); 687 // smaller edge, lid_max_ew get updated - e0 688 if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) { 689 if (print) 690 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]), 691 (double)e->weight)); 692 continue; // we are basically filter edges here 693 } 694 // e1 - local 695 if (ghost1_idx == -1) { 696 if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) { 697 if (print) 698 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight))); 699 continue; // we are basically filter edges here 700 } 701 } else { // e1 - ghost 702 /* see if edge might get matched on other proc */ 703 PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]); 704 if (print) 705 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]), 706 (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx])); 707 if (g_max_e1 > e->weight + MY_MEPS) { 708 /* PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 709 continue; 710 } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_max_pe[ghost1_idx] > rank) { 711 /* check for max_ea == to this edge and larger processor that will deal with this */ 712 if (print) 713 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, 714 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight)); 715 isOK = PETSC_FALSE; // this guy could delete me 716 continue; 717 } else { 718 /* PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 719 } 720 } 721 /* check ghost for v0 */ 722 if (isOK) { 723 PetscReal max_e, ew; 724 if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */ 725 int n; 726 ii = matB->compressedrow.i; 727 n = ii[ix + 1] - ii[ix]; 728 ap = matB->a + ii[ix]; 729 aj = matB->j + ii[ix]; 730 for (int jj = 0; jj < n && isOK; jj++) { 731 PetscInt lidj = aj[jj]; 732 if (lghost_matched[lidj]) continue; 733 ew = PetscRealPart(ap[jj]); 734 if (ew <= threshold) continue; 735 max_e = PetscRealPart(lghost_max_ew[lidj]); 736 /* check for max_e == to this edge and larger processor that will deal with this */ 737 if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_pe[lidj] > rank) isOK = PETSC_FALSE; 738 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e)); 739 if (print) 740 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS)); 741 } 742 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 743 } 744 /* check local v1 */ 745 if (ghost1_idx == -1) { 746 if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */ 747 int n; 748 ii = matB->compressedrow.i; 749 n = ii[ix + 1] - ii[ix]; 750 ap = matB->a + ii[ix]; 751 aj = matB->j + ii[ix]; 752 for (int jj = 0; jj < n && isOK; jj++) { 753 PetscInt lidj = aj[jj]; 754 if (lghost_matched[lidj]) continue; 755 ew = PetscRealPart(ap[jj]); 756 if (ew <= threshold) continue; 757 max_e = PetscRealPart(lghost_max_ew[lidj]); 758 /* check for max_e == to this edge and larger processor that will deal with this */ 759 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_pe[lidj] > rank) isOK = PETSC_FALSE; 760 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e)); 761 if (print) 762 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj])); 763 } 764 } 765 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 766 } 767 } 768 PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx])); 769 if (print) 770 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 wight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK)); 771 /* do it */ 772 if (isOK) { 773 if (ghost1_idx == -1) { 774 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1); 775 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 776 PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's 777 } else { 778 /* add gid1 to list of ghost deleted by me -- I need their children */ 779 PetscMPIInt proc = lghost_pe[ghost1_idx]; 780 PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]); 781 lghost_matched[ghost1_idx] = PETSC_TRUE; 782 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */ 783 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0)); 784 } 785 lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */ 786 /* set projection */ 787 PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES)); 788 PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES)); 789 //PetscCall(PetscPrintf(PETSC_COMM_WORLD,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1)); 790 } /* matched */ 791 } /* edge loop */ 792 PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT)); 793 if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 794 PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew)); 795 // count active for test, latter, update deleted ghosts 796 n_act_n[0] = nactive_edges; 797 if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2])); 798 else n_act_n[2] = 0; 799 PetscCall(PetscCDCount(agg_llists, &n_act_n[1])); 800 PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm)); 801 PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1])); 802 /* deal with deleted ghost */ 803 if (isMPI) { 804 PetscCDIntNd *pos; 805 PetscInt *sbuffs1[REQ_BF_SIZE], ndel; 806 PetscInt *sbuffs2[REQ_BF_SIZE]; 807 MPI_Status status; 808 809 /* send deleted ghosts */ 810 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 811 const PetscMPIInt proc = comm_procs[proc_idx]; 812 PetscInt *sbuff, *pt, scount; 813 MPI_Request *request; 814 /* count ghosts */ 815 PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel)); 816 ndel /= 2; // two entries for each proc 817 scount = 2 + 2 * ndel; 818 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 819 /* save requests */ 820 sbuffs1[proc_idx] = sbuff; 821 request = (MPI_Request *)sbuff; 822 sbuff = pt = (PetscInt *)(sbuff + request_size); 823 /* write [ndel, proc, n*[gid1,gid0] */ 824 *pt++ = ndel; // number of deleted to send 825 *pt++ = rank; // proc (not used) 826 PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos)); 827 while (pos) { 828 PetscInt lid0, ghost_idx, gid1; 829 PetscCall(PetscCDIntNdGetID(pos, &ghost_idx)); 830 gid1 = lghost_gid[ghost_idx]; 831 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 832 PetscCall(PetscCDIntNdGetID(pos, &lid0)); 833 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 834 *pt++ = gid1; 835 *pt++ = lid0 + my0; // gid0 836 } 837 PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff)); 838 /* MPI_Isend: tag1 [ndel, proc, n*[gid1,gid0] ] */ 839 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request)); 840 PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list 841 } 842 /* receive deleted, send back partial aggregates, clear lists */ 843 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 844 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status)); 845 { 846 PetscInt *pt, *pt2, *pt3, *sbuff, tmp; 847 MPI_Request *request; 848 int rcount, scount, ndel; 849 const PetscMPIInt proc = status.MPI_SOURCE; 850 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 851 if (rcount > rbuff_sz) { 852 if (rbuff) PetscCall(PetscFree(rbuff)); 853 PetscCall(PetscMalloc1(rcount, &rbuff)); 854 rbuff_sz = rcount; 855 } 856 /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */ 857 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status)); 858 /* read and count sends *[lid0, n, n*[gid] ] */ 859 pt = rbuff; 860 scount = 0; 861 ndel = *pt++; // number of deleted to recv 862 tmp = *pt++; // proc (not used) 863 while (ndel--) { 864 PetscInt gid1 = *pt++, lid1 = gid1 - my0; 865 int gh_gid0 = *pt++; // gid on other proc (not used here to count) 866 PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1); 867 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc); 868 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 869 PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n 870 /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */ 871 scount += tmp + 2; // lid0, n, n*[gid] 872 } 873 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount); 874 /* send tag2: *[gid0, n, n*[gid] ] */ 875 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 876 sbuffs2[proc_idx] = sbuff; /* cache request */ 877 request = (MPI_Request *)sbuff; 878 pt2 = sbuff = sbuff + request_size; 879 // read again: n, proc, n*[gid1,gid0] 880 pt = rbuff; 881 ndel = *pt++; 882 tmp = *pt++; // proc (not used) 883 while (ndel--) { 884 PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++; 885 /* write [gid0, aggSz, aggSz[gid] ] */ 886 *pt2++ = gh_gid0; 887 pt3 = pt2++; /* save pointer for later */ 888 PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos)); 889 while (pos) { 890 PetscInt gid; 891 PetscCall(PetscCDIntNdGetID(pos, &gid)); 892 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos)); 893 *pt2++ = gid; 894 } 895 *pt3 = (pt2 - pt3) - 1; 896 /* clear list */ 897 PetscCall(PetscCDRemoveAllAt(agg_llists, lid1)); 898 } 899 PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount); 900 /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */ 901 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request)); 902 } 903 } // proc_idx 904 /* receive tag2 *[gid0, n, n*[gid] ] */ 905 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 906 PetscMPIInt proc; 907 PetscInt *pt; 908 int rcount; 909 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status)); 910 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 911 if (rcount > rbuff_sz) { 912 if (rbuff) PetscCall(PetscFree(rbuff)); 913 PetscCall(PetscMalloc1(rcount, &rbuff)); 914 rbuff_sz = rcount; 915 } 916 proc = status.MPI_SOURCE; 917 /* MPI_Recv: tag1 [n, proc, n*[gid1,lid0] ] */ 918 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status)); 919 pt = rbuff; 920 while (pt - rbuff < rcount) { 921 PetscInt gid0 = *pt++, n = *pt++; 922 while (n--) { 923 PetscInt gid1 = *pt++; 924 PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1)); 925 } 926 } 927 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount); 928 } 929 /* wait for tag1 isends */ 930 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 931 MPI_Request *request; 932 request = (MPI_Request *)sbuffs1[proc_idx]; 933 PetscCallMPI(MPI_Wait(request, &status)); 934 PetscCall(PetscFree(sbuffs1[proc_idx])); 935 } 936 /* wait for tag2 isends */ 937 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 938 MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx]; 939 PetscCallMPI(MPI_Wait(request, &status)); 940 PetscCall(PetscFree(sbuffs2[proc_idx])); 941 } 942 } /* MPI */ 943 /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */ 944 if (isMPI) { 945 const PetscScalar *sbuff; 946 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 947 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 948 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 949 } 950 PetscCall(VecAssemblyBegin(locMaxEdge)); 951 PetscCall(VecAssemblyEnd(locMaxEdge)); 952 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 953 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 954 PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff)); 955 for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); } 956 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff)); 957 } 958 /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */ 959 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 960 PetscReal max_e = 0., tt; 961 PetscScalar vval; 962 const PetscInt lid = kk; 963 int max_pe = rank, pe, n; 964 ii = matA->i; 965 n = ii[lid + 1] - ii[lid]; 966 aj = matA->j + ii[lid]; 967 ap = matA->a + ii[lid]; 968 for (int jj = 0; jj < n; jj++) { 969 PetscInt lidj = aj[jj]; 970 if (lid_matched[lidj]) continue; /* this is new - can change local max */ 971 if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]); 972 } 973 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 974 ii = matB->compressedrow.i; 975 n = ii[ix + 1] - ii[ix]; 976 ap = matB->a + ii[ix]; 977 aj = matB->j + ii[ix]; 978 for (int jj = 0; jj < n; jj++) { 979 PetscInt lidj = aj[jj]; 980 if (lghost_matched[lidj]) continue; 981 if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt; 982 } 983 } 984 vval = (PetscScalar)max_e; 985 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 986 // max PE with max edge 987 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 988 ii = matB->compressedrow.i; 989 n = ii[ix + 1] - ii[ix]; 990 ap = matB->a + ii[ix]; 991 aj = matB->j + ii[ix]; 992 for (int jj = 0; jj < n; jj++) { 993 PetscInt lidj = aj[jj]; 994 if (lghost_matched[lidj]) continue; 995 if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; } 996 } 997 } 998 vval = (PetscScalar)max_pe; 999 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 1000 } 1001 PetscCall(VecAssemblyBegin(locMaxEdge)); 1002 PetscCall(VecAssemblyEnd(locMaxEdge)); 1003 PetscCall(VecAssemblyBegin(locMaxPE)); 1004 PetscCall(VecAssemblyEnd(locMaxPE)); 1005 /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/ 1006 if (isMPI) { 1007 const PetscScalar *buf; 1008 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1009 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1010 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1011 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1012 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 1013 for (int kk = 0; kk < num_ghosts; kk++) { 1014 lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 1015 } 1016 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 1017 } 1018 // if no active edges, stop 1019 if (gn_act_n[0] < 1) break; 1020 // inc and check (self stopping iteration 1021 sub_it++; 1022 PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its); 1023 } /* sub_it loop */ 1024 /* clean up iteration */ 1025 PetscCall(PetscFree(Edges)); 1026 if (mpimat) { // can be hoisted 1027 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 1028 PetscCall(VecDestroy(&ghostMaxEdge)); 1029 PetscCall(VecDestroy(&ghostMaxPE)); 1030 PetscCall(PetscFree(lghost_pe)); 1031 PetscCall(PetscFree(lghost_gid)); 1032 PetscCall(PetscFree(lghost_matched)); 1033 PetscCall(PetscFree(lghost_max_pe)); 1034 } 1035 PetscCall(VecDestroy(&locMaxEdge)); 1036 PetscCall(VecDestroy(&locMaxPE)); 1037 /* create next graph */ 1038 { 1039 Vec diag; 1040 /* add identity for unmatched vertices so they stay alive */ 1041 for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) { 1042 if (!lid_matched[kk]) { 1043 const PetscInt lid = kk; 1044 PetscCDIntNd *pos; 1045 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1046 PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid); 1047 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1048 PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid); 1049 PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES)); 1050 } 1051 } 1052 PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY)); 1053 PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY)); 1054 1055 /* project to make new graph with collapsed edges */ 1056 PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat)); 1057 PetscCall(MatDestroy(&P)); 1058 PetscCall(MatDestroy(&cMat)); 1059 cMat = tMat; 1060 PetscCall(MatCreateVecs(cMat, &diag, NULL)); 1061 PetscCall(MatGetDiagonal(cMat, diag)); 1062 PetscCall(VecReciprocal(diag)); 1063 PetscCall(VecSqrtAbs(diag)); 1064 PetscCall(MatDiagonalScale(cMat, diag, diag)); 1065 PetscCall(VecDestroy(&diag)); 1066 } 1067 } /* coarsen iterator */ 1068 1069 /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */ 1070 if (size > 1) { 1071 Mat mat; 1072 PetscCDIntNd *pos; 1073 PetscInt NN, MM, jj = 0, mxsz = 0; 1074 1075 for (int kk = 0; kk < nloc; kk++) { 1076 PetscCall(PetscCDCountAt(agg_llists, kk, &jj)); 1077 if (jj > mxsz) mxsz = jj; 1078 } 1079 PetscCall(MatGetSize(a_Gmat, &MM, &NN)); 1080 if (mxsz > MM - nloc) mxsz = MM - nloc; 1081 /* matrix of ghost adj for square graph */ 1082 PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat)); 1083 for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) { 1084 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1085 while (pos) { 1086 PetscInt gid1; 1087 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1088 PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos)); 1089 if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES)); 1090 } 1091 } 1092 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1093 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1094 PetscCall(PetscCDSetMat(agg_llists, mat)); 1095 PetscCall(PetscCDDestroy(ghost_deleted_list)); 1096 if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true 1097 } 1098 // move BCs into some node 1099 if (bc_list) { 1100 PetscCDIntNd *pos; 1101 PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos)); 1102 while (pos) { 1103 PetscInt gid1; 1104 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1105 PetscCall(PetscCDGetNextPos(bc_list, 0, &pos)); 1106 PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1)); 1107 } 1108 PetscCall(PetscCDRemoveAllAt(bc_list, 0)); 1109 PetscCall(PetscCDDestroy(bc_list)); 1110 } 1111 { 1112 // check sizes -- all vertices must get in graph 1113 PetscInt sz, globalsz, MM; 1114 PetscCall(MatGetSize(a_Gmat, &MM, NULL)); 1115 PetscCall(PetscCDCount(agg_llists, &sz)); 1116 PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm)); 1117 PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz)); 1118 } 1119 // cleanup 1120 PetscCall(MatDestroy(&cMat)); 1121 PetscCall(PetscFree(lid_cprowID)); 1122 PetscCall(PetscFree(lid_max_pe)); 1123 PetscCall(PetscFree(lid_matched)); 1124 PetscCall(ISDestroy(&info_is)); 1125 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 /* 1130 HEM coarsen, simple greedy. 1131 */ 1132 static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse) 1133 { 1134 Mat mat = coarse->graph; 1135 1136 PetscFunctionBegin; 1137 PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists)); 1138 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer) 1143 { 1144 PetscMPIInt rank; 1145 PetscBool iascii; 1146 1147 PetscFunctionBegin; 1148 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank)); 1149 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1150 if (iascii) { 1151 PetscCDIntNd *pos, *pos2; 1152 PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold)); 1153 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1154 for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) { 1155 PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos)); 1156 if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk)); 1157 while (pos) { 1158 PetscInt gid1; 1159 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1160 PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos)); 1161 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1)); 1162 } 1163 if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1164 } 1165 PetscCall(PetscViewerFlush(viewer)); 1166 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1167 } 1168 PetscFunctionReturn(PETSC_SUCCESS); 1169 } 1170 1171 /* 1172 MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener 1173 */ 1174 PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse) 1175 { 1176 PetscFunctionBegin; 1177 coarse->ops->apply = MatCoarsenApply_HEM; 1178 coarse->ops->view = MatCoarsenView_HEM; 1179 coarse->max_it = 4; 1180 PetscFunctionReturn(PETSC_SUCCESS); 1181 } 1182