1 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> 4 #include <petscdm.h> 5 6 /* linked list methods 7 * 8 * PetscCDCreate 9 */ 10 PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out) 11 { 12 PetscCoarsenData *ail; 13 14 PetscFunctionBegin; 15 /* allocate pool, partially */ 16 PetscCall(PetscNew(&ail)); 17 *a_out = ail; 18 ail->pool_list.next = NULL; 19 ail->pool_list.array = NULL; 20 ail->chk_sz = 0; 21 /* allocate array */ 22 ail->size = a_size; 23 PetscCall(PetscCalloc1(a_size, &ail->array)); 24 ail->extra_nodes = NULL; 25 ail->mat = NULL; 26 PetscFunctionReturn(PETSC_SUCCESS); 27 } 28 29 /* PetscCDDestroy 30 */ 31 PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail) 32 { 33 PetscCDArrNd *n = &ail->pool_list; 34 35 PetscFunctionBegin; 36 n = n->next; 37 while (n) { 38 PetscCDArrNd *lstn = n; 39 n = n->next; 40 PetscCall(PetscFree(lstn)); 41 } 42 if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array)); 43 PetscCall(PetscFree(ail->array)); 44 if (ail->mat) PetscCall(MatDestroy(&ail->mat)); 45 /* delete this (+agg+pool array) */ 46 PetscCall(PetscFree(ail)); 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 /* PetscCDSetChunkSize 51 */ 52 PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz) 53 { 54 PetscFunctionBegin; 55 ail->chk_sz = a_sz; 56 PetscFunctionReturn(PETSC_SUCCESS); 57 } 58 59 /* PetscCDGetNewNode 60 */ 61 static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id) 62 { 63 PetscFunctionBegin; 64 *a_out = NULL; /* squelch -Wmaybe-uninitialized */ 65 if (ail->extra_nodes) { 66 PetscCDIntNd *node = ail->extra_nodes; 67 ail->extra_nodes = node->next; 68 node->gid = a_id; 69 node->next = NULL; 70 *a_out = node; 71 } else { 72 if (!ail->pool_list.array) { 73 if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */ 74 PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array)); 75 ail->new_node = ail->pool_list.array; 76 ail->new_left = ail->chk_sz; 77 ail->new_node->next = NULL; 78 } else if (!ail->new_left) { 79 PetscCDArrNd *node; 80 PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node)); 81 node->array = (PetscCDIntNd *)(node + 1); 82 node->next = ail->pool_list.next; 83 ail->pool_list.next = node; 84 ail->new_left = ail->chk_sz; 85 ail->new_node = node->array; 86 } 87 ail->new_node->gid = a_id; 88 ail->new_node->next = NULL; 89 *a_out = ail->new_node++; 90 ail->new_left--; 91 } 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* PetscCDIntNdSetID 96 */ 97 PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id) 98 { 99 PetscFunctionBegin; 100 a_this->gid = a_id; 101 PetscFunctionReturn(PETSC_SUCCESS); 102 } 103 104 /* PetscCDIntNdGetID 105 */ 106 PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid) 107 { 108 PetscFunctionBegin; 109 *a_gid = a_this->gid; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 113 /* PetscCDGetHeadPos 114 */ 115 PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos) 116 { 117 PetscFunctionBegin; 118 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx); 119 *pos = ail->array[a_idx]; 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 /* PetscCDGetNextPos 124 */ 125 PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos) 126 { 127 PetscFunctionBegin; 128 PetscCheck((*pos), PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position."); 129 *pos = (*pos)->next; 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /* PetscCDAppendID 134 */ 135 PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id) 136 { 137 PetscCDIntNd *n, *n2; 138 139 PetscFunctionBegin; 140 PetscCall(PetscCDGetNewNode(ail, &n, a_id)); 141 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 142 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n; 143 else { 144 do { 145 if (!n2->next) { 146 n2->next = n; 147 PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next"); 148 break; 149 } 150 n2 = n2->next; 151 } while (n2); 152 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 153 } 154 PetscFunctionReturn(PETSC_SUCCESS); 155 } 156 157 /* PetscCDAppendNode 158 */ 159 PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n) 160 { 161 PetscCDIntNd *n2; 162 163 PetscFunctionBegin; 164 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 165 if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n; 166 else { 167 do { 168 if (!n2->next) { 169 n2->next = a_n; 170 a_n->next = NULL; 171 break; 172 } 173 n2 = n2->next; 174 } while (n2); 175 PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null"); 176 } 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used) 181 */ 182 PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last) 183 { 184 PetscCDIntNd *del; 185 186 PetscFunctionBegin; 187 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 188 PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next"); 189 del = a_last->next; 190 a_last->next = del->next; 191 /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */ 192 /* could reuse n2 but PetscCDAppendNode sometimes uses it */ 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 /* PetscCDPrint 197 */ 198 PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm) 199 { 200 PetscCDIntNd *n, *n2; 201 PetscInt ii; 202 203 PetscFunctionBegin; 204 for (ii = 0; ii < ail->size; ii++) { 205 n2 = n = ail->array[ii]; 206 if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0)); 207 while (n) { 208 PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid)); 209 n = n->next; 210 } 211 if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n")); 212 } 213 PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx 218 */ 219 PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx) 220 { 221 PetscCDIntNd *n; 222 223 PetscFunctionBegin; 224 PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx); 225 PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx); 226 PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx); 227 n = ail->array[a_destidx]; 228 if (!n) ail->array[a_destidx] = ail->array[a_srcidx]; 229 else { 230 do { 231 if (!n->next) { 232 n->next = ail->array[a_srcidx]; // append 233 break; 234 } 235 n = n->next; 236 } while (1); 237 } 238 ail->array[a_srcidx] = NULL; // empty 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 /* PetscCDRemoveAllAt - empty one list and move data to cache 243 */ 244 PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx) 245 { 246 PetscCDIntNd *rem, *n1; 247 248 PetscFunctionBegin; 249 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 250 rem = ail->array[a_idx]; 251 ail->array[a_idx] = NULL; 252 if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem; 253 else { 254 while (n1->next) n1 = n1->next; 255 n1->next = rem; 256 } 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 /* PetscCDCountAt 261 */ 262 PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz) 263 { 264 PetscCDIntNd *n1; 265 PetscInt sz = 0; 266 267 PetscFunctionBegin; 268 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 269 n1 = ail->array[a_idx]; 270 while (n1) { 271 n1 = n1->next; 272 sz++; 273 } 274 *a_sz = sz; 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 /* PetscCDSize 279 */ 280 PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz) 281 { 282 PetscInt sz = 0; 283 284 PetscFunctionBegin; 285 for (int ii = 0; ii < ail->size; ii++) { 286 PetscCDIntNd *n1 = ail->array[ii]; 287 while (n1) { 288 n1 = n1->next; 289 sz++; 290 } 291 } 292 *a_sz = sz; 293 PetscFunctionReturn(PETSC_SUCCESS); 294 } 295 296 /* PetscCDIsEmptyAt - Is the list empty? (not used) 297 */ 298 PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e) 299 { 300 PetscFunctionBegin; 301 PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx); 302 *a_e = (PetscBool)(ail->array[a_idx] == NULL); 303 PetscFunctionReturn(PETSC_SUCCESS); 304 } 305 306 /* PetscCDGetNonemptyIS - used for C-F methods 307 */ 308 PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis) 309 { 310 PetscCDIntNd *n; 311 PetscInt ii, kk; 312 PetscInt *permute; 313 314 PetscFunctionBegin; 315 for (ii = kk = 0; ii < ail->size; ii++) { 316 n = ail->array[ii]; 317 if (n) kk++; 318 } 319 PetscCall(PetscMalloc1(kk, &permute)); 320 for (ii = kk = 0; ii < ail->size; ii++) { 321 n = ail->array[ii]; 322 if (n) permute[kk++] = ii; 323 } 324 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 /* PetscCDGetMat 329 */ 330 PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat) 331 { 332 PetscFunctionBegin; 333 *a_mat = ail->mat; 334 ail->mat = NULL; // give it up 335 PetscFunctionReturn(PETSC_SUCCESS); 336 } 337 338 /* PetscCDSetMat 339 */ 340 PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat) 341 { 342 PetscFunctionBegin; 343 if (ail->mat) { 344 PetscCall(MatDestroy(&ail->mat)); //should not happen 345 } 346 ail->mat = a_mat; 347 PetscFunctionReturn(PETSC_SUCCESS); 348 } 349 350 /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers 351 */ 352 PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is) 353 { 354 PetscCDIntNd *n; 355 PetscInt lsz, ii, kk, *idxs, jj, gid; 356 IS *is_loc = NULL; 357 358 PetscFunctionBegin; 359 for (ii = kk = 0; ii < ail->size; ii++) { 360 if (ail->array[ii]) kk++; 361 } 362 *a_sz = kk; 363 PetscCall(PetscMalloc1(kk, &is_loc)); 364 for (ii = kk = 0; ii < ail->size; ii++) { 365 for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */ 366 ; 367 if (lsz) { 368 PetscCall(PetscMalloc1(a_bs * lsz, &idxs)); 369 for (lsz = 0, n = ail->array[ii]; n; n = n->next) { 370 PetscCall(PetscCDIntNdGetID(n, &gid)); 371 for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj; 372 } 373 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++])); 374 } 375 } 376 PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk); 377 *a_local_is = is_loc; /* out */ 378 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 /* edge for priority queue */ 383 typedef struct edge_tag { 384 PetscReal weight; 385 PetscInt lid0, gid1, ghost1_idx; 386 } Edge; 387 388 #define MY_MEPS (PETSC_MACHINE_EPSILON * 100) 389 static int gamg_hem_compare(const void *a, const void *b) 390 { 391 PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight; 392 return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */ 393 } 394 /* static int gamg_hem_compare3(const void *a, const void *b, void *ctx) */ 395 /* { */ 396 /* return gamg_hem_compare(a, b); */ 397 /* } */ 398 399 /* 400 MatCoarsenApply_HEM_private - parallel heavy edge matching 401 402 Input Parameter: 403 . a_Gmat - global matrix of the graph 404 . n_iter - number of matching iterations 405 . threshold - threshold for filtering graphs 406 407 Output Parameter: 408 . a_locals_llist - array of list of local nodes rooted at local node 409 */ 410 static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist) 411 { 412 #define REQ_BF_SIZE 100 413 PetscBool isMPI; 414 MPI_Comm comm; 415 PetscInt ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0; 416 PetscMPIInt rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe; 417 const PetscInt nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt)); 418 PetscInt *lid_cprowID; 419 PetscBool *lid_matched; 420 Mat_SeqAIJ *matA, *matB = NULL; 421 Mat_MPIAIJ *mpimat = NULL; 422 PetscScalar one = 1.; 423 PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL; 424 Mat cMat, tMat, P; 425 MatScalar *ap; 426 IS info_is; 427 428 PetscFunctionBegin; 429 PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm)); 430 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 431 PetscCallMPI(MPI_Comm_size(comm, &size)); 432 PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend)); 433 PetscCall(ISCreate(comm, &info_is)); 434 PetscCall(PetscInfo(info_is, "%" PetscInt_FMT " iterations of HEM.\n", n_iter)); 435 436 PetscCall(PetscMalloc1(nloc, &lid_matched)); 437 PetscCall(PetscMalloc1(nloc, &lid_cprowID)); 438 PetscCall(PetscMalloc1(nloc, &lid_max_pe)); 439 440 PetscCall(PetscCDCreate(nloc, &agg_llists)); 441 PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1)); 442 *a_locals_llist = agg_llists; 443 /* add self to all lists */ 444 for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk)); 445 /* make a copy of the graph, this gets destroyed in iterates */ 446 PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat)); 447 PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat)); 448 isMPI = (PetscBool)(size > 1); 449 if (isMPI) { 450 /* list of deleted ghosts, should compress this */ 451 PetscCall(PetscCDCreate(size, &ghost_deleted_list)); 452 PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100)); 453 } 454 for (int iter = 0; iter < n_iter; iter++) { 455 const PetscScalar *lghost_max_ew, *lid_max_ew; 456 PetscBool *lghost_matched; 457 PetscMPIInt *lghost_pe, *lghost_max_pe; 458 Vec locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE; 459 PetscInt *lghost_gid, nEdges, nEdges0, num_ghosts = 0; 460 Edge *Edges; 461 const int n_sub_its = 1000; // in case of a bug, stop at some point 462 /* get submatrices of cMat */ 463 for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1; 464 if (isMPI) { 465 mpimat = (Mat_MPIAIJ *)cMat->data; 466 matA = (Mat_SeqAIJ *)mpimat->A->data; 467 matB = (Mat_SeqAIJ *)mpimat->B->data; 468 if (!matB->compressedrow.use) { 469 /* force construction of compressed row data structure since code below requires it */ 470 PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0)); 471 } 472 /* set index into compressed row 'lid_cprowID' */ 473 for (ix = 0; ix < matB->compressedrow.nrows; ix++) { 474 PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix]; 475 if (ridx[ix] >= 0) lid_cprowID[lid] = ix; 476 else PetscCall(PetscPrintf(PETSC_COMM_SELF, "Missing slot in cprow? %d/%d ", (int)ix, (int)matB->compressedrow.nrows)); 477 } 478 } else { 479 matA = (Mat_SeqAIJ *)cMat->data; 480 } 481 /* set matched flags: true for empty list */ 482 for (int kk = 0; kk < nloc; kk++) { 483 PetscCall(PetscCDCountAt(agg_llists, kk, &ix)); 484 if (ix > 0) lid_matched[kk] = PETSC_FALSE; 485 else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched 486 } 487 /* max edge and pe vecs */ 488 PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL)); 489 PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL)); 490 /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */ 491 if (isMPI) { 492 Vec vec; 493 PetscScalar vval; 494 const PetscScalar *buf; 495 PetscCall(MatCreateVecs(cMat, &vec, NULL)); 496 PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts)); 497 /* lghost_matched */ 498 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 499 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 500 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); 501 } 502 PetscCall(VecAssemblyBegin(vec)); 503 PetscCall(VecAssemblyEnd(vec)); 504 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 505 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 506 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 507 PetscCall(PetscMalloc1(num_ghosts, &lghost_matched)); 508 for (int kk = 0; kk < num_ghosts; kk++) { 509 lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now 510 } 511 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 512 /* lghost_pe */ 513 vval = (PetscScalar)(rank); 514 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 515 PetscCall(VecAssemblyBegin(vec)); 516 PetscCall(VecAssemblyEnd(vec)); 517 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 518 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 519 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */ 520 PetscCall(PetscMalloc1(num_ghosts, &lghost_pe)); 521 for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now 522 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 523 /* lghost_gid */ 524 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 525 vval = (PetscScalar)(gid); 526 PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 527 } 528 PetscCall(VecAssemblyBegin(vec)); 529 PetscCall(VecAssemblyEnd(vec)); 530 PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 531 PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 532 PetscCall(VecDestroy(&vec)); 533 PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */ 534 PetscCall(PetscMalloc1(num_ghosts, &lghost_gid)); 535 for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]); 536 PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf)); 537 } 538 // get 'comm_procs' (could hoist) 539 for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1; 540 for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) { 541 PetscMPIInt proc = lghost_pe[ix], idx = -1; 542 for (int k = 0; k < ncomm_procs && idx == -1; k++) 543 if (comm_procs[k] == proc) idx = k; 544 if (idx == -1) { comm_procs[ncomm_procs++] = proc; } 545 PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs); 546 } 547 /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */ 548 nEdges0 = 0; 549 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 550 PetscReal max_e = 0., tt; 551 PetscScalar vval; 552 PetscInt lid = kk, max_pe = rank, pe, n; 553 ii = matA->i; 554 n = ii[lid + 1] - ii[lid]; 555 aj = matA->j + ii[lid]; 556 ap = matA->a + ii[lid]; 557 for (int jj = 0; jj < n; jj++) { 558 PetscInt lidj = aj[jj]; 559 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 560 if (tt > max_e) max_e = tt; 561 if (lidj > lid) nEdges0++; 562 } 563 } 564 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 565 ii = matB->compressedrow.i; 566 n = ii[ix + 1] - ii[ix]; 567 ap = matB->a + ii[ix]; 568 aj = matB->j + ii[ix]; 569 for (int jj = 0; jj < n; jj++) { 570 if ((tt = PetscRealPart(ap[jj])) > threshold) { 571 if (tt > max_e) max_e = tt; 572 nEdges0++; 573 if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe; 574 } 575 } 576 } 577 vval = max_e; 578 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); 579 vval = (PetscScalar)max_pe; 580 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 581 if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate 582 lid_matched[lid] = PETSC_TRUE; 583 if (bc_agg == -1) { 584 bc_agg = lid; 585 PetscCall(PetscCDCreate(1, &bc_list)); 586 } 587 PetscCall(PetscCDRemoveAllAt(agg_llists, lid)); 588 PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid)); 589 } 590 } 591 PetscCall(VecAssemblyBegin(locMaxEdge)); 592 PetscCall(VecAssemblyEnd(locMaxEdge)); 593 PetscCall(VecAssemblyBegin(locMaxPE)); 594 PetscCall(VecAssemblyEnd(locMaxPE)); 595 /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */ 596 if (mpimat) { 597 const PetscScalar *buf; 598 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge)); 599 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 600 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 601 602 PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE)); 603 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 604 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 605 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 606 PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe)); 607 for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 608 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 609 } 610 { // make lid_max_pe 611 const PetscScalar *buf; 612 PetscCall(VecGetArrayRead(locMaxPE, &buf)); 613 for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 614 PetscCall(VecRestoreArrayRead(locMaxPE, &buf)); 615 } 616 /* setup sorted list of edges, and make 'Edges' */ 617 PetscCall(PetscMalloc1(nEdges0, &Edges)); 618 nEdges = 0; 619 for (int kk = 0, n; kk < nloc; kk++) { 620 const PetscInt lid = kk; 621 PetscReal tt; 622 ii = matA->i; 623 n = ii[lid + 1] - ii[lid]; 624 aj = matA->j + ii[lid]; 625 ap = matA->a + ii[lid]; 626 for (int jj = 0; jj < n; jj++) { 627 PetscInt lidj = aj[jj]; 628 if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) { 629 if (lidj > lid) { 630 Edges[nEdges].lid0 = lid; 631 Edges[nEdges].gid1 = lidj + my0; 632 Edges[nEdges].ghost1_idx = -1; 633 Edges[nEdges].weight = tt; 634 nEdges++; 635 } 636 } 637 } 638 if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */ 639 ii = matB->compressedrow.i; 640 n = ii[ix + 1] - ii[ix]; 641 ap = matB->a + ii[ix]; 642 aj = matB->j + ii[ix]; 643 for (int jj = 0; jj < n; jj++) { 644 if ((tt = PetscRealPart(ap[jj])) > threshold) { 645 Edges[nEdges].lid0 = lid; 646 Edges[nEdges].gid1 = lghost_gid[aj[jj]]; 647 Edges[nEdges].ghost1_idx = aj[jj]; 648 Edges[nEdges].weight = tt; 649 nEdges++; 650 } 651 } 652 } 653 } 654 PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges); 655 qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare); 656 657 PetscCall(PetscInfo(info_is, "[%d] start HEM iteration %d with %d edges\n", rank, iter, (int)nEdges)); 658 659 /* projection matrix */ 660 PetscCall(MatCreate(comm, &P)); 661 PetscCall(MatSetType(P, MATAIJ)); 662 PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 663 PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL)); 664 PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL)); 665 PetscCall(MatSetUp(P)); 666 /* process - communicate - process */ 667 for (int sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) { 668 PetscInt nactive_edges = 0, n_act_n[3], gn_act_n[3]; 669 PetscMPIInt tag1, tag2; 670 PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew)); 671 if (isMPI) { 672 PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew)); 673 PetscCall(PetscCommGetNewTag(comm, &tag1)); 674 PetscCall(PetscCommGetNewTag(comm, &tag2)); 675 } 676 for (int kk = 0; kk < nEdges; kk++) { 677 /* HEM */ 678 const Edge *e = &Edges[kk]; 679 const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0; 680 PetscBool isOK = PETSC_TRUE, print = PETSC_FALSE; 681 if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1]))); 682 /* skip if either vertex is matched already */ 683 if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue; 684 685 nactive_edges++; 686 PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0])); 687 if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight))); 688 // smaller edge, lid_max_ew get updated - e0 689 if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) { 690 if (print) 691 PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]), 692 (double)e->weight)); 693 continue; // we are basically filter edges here 694 } 695 // e1 - local 696 if (ghost1_idx == -1) { 697 if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) { 698 if (print) 699 PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight))); 700 continue; // we are basically filter edges here 701 } 702 } else { // e1 - ghost 703 /* see if edge might get matched on other proc */ 704 PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]); 705 if (print) 706 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]), 707 (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx])); 708 if (g_max_e1 > e->weight + MY_MEPS) { 709 /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 710 continue; 711 } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed? 712 /* check for max_ea == to this edge and larger processor that will deal with this */ 713 if (print) 714 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, 715 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight)); 716 isOK = PETSC_FALSE; // this guy could delete me 717 continue; 718 } else { 719 /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */ 720 } 721 } 722 /* check ghost for v0 */ 723 if (isOK) { 724 PetscReal max_e, ew; 725 if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */ 726 int n; 727 ii = matB->compressedrow.i; 728 n = ii[ix + 1] - ii[ix]; 729 ap = matB->a + ii[ix]; 730 aj = matB->j + ii[ix]; 731 for (int jj = 0; jj < n && isOK; jj++) { 732 PetscInt lidj = aj[jj]; 733 if (lghost_matched[lidj]) continue; 734 ew = PetscRealPart(ap[jj]); 735 if (ew <= threshold) continue; 736 max_e = PetscRealPart(lghost_max_ew[lidj]); 737 /* check for max_e == to this edge and larger processor that will deal with this */ 738 if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE; 739 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e)); 740 if (print) 741 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS)); 742 } 743 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 744 } 745 /* check local v1 */ 746 if (ghost1_idx == -1) { 747 if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */ 748 int n; 749 ii = matB->compressedrow.i; 750 n = ii[ix + 1] - ii[ix]; 751 ap = matB->a + ii[ix]; 752 aj = matB->j + ii[ix]; 753 for (int jj = 0; jj < n && isOK; jj++) { 754 PetscInt lidj = aj[jj]; 755 if (lghost_matched[lidj]) continue; 756 ew = PetscRealPart(ap[jj]); 757 if (ew <= threshold) continue; 758 max_e = PetscRealPart(lghost_max_ew[lidj]); 759 /* check for max_e == to this edge and larger processor that will deal with this */ 760 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE; 761 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e)); 762 if (print) 763 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj])); 764 } 765 } 766 if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1)); 767 } 768 } 769 PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx])); 770 if (print) 771 PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 wight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK)); 772 /* do it */ 773 if (isOK) { 774 if (ghost1_idx == -1) { 775 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1); 776 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 777 PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's 778 } else { 779 /* add gid1 to list of ghost deleted by me -- I need their children */ 780 PetscMPIInt proc = lghost_pe[ghost1_idx]; 781 PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]); 782 lghost_matched[ghost1_idx] = PETSC_TRUE; 783 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */ 784 PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0)); 785 } 786 lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */ 787 /* set projection */ 788 PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES)); 789 PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES)); 790 //PetscCall(PetscPrintf(comm,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1)); 791 } /* matched */ 792 } /* edge loop */ 793 PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT)); 794 if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 795 PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew)); 796 // count active for test, latter, update deleted ghosts 797 n_act_n[0] = nactive_edges; 798 if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2])); 799 else n_act_n[2] = 0; 800 PetscCall(PetscCDCount(agg_llists, &n_act_n[1])); 801 PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm)); 802 PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1])); 803 /* deal with deleted ghost */ 804 if (isMPI) { 805 PetscCDIntNd *pos; 806 PetscInt *sbuffs1[REQ_BF_SIZE], ndel; 807 PetscInt *sbuffs2[REQ_BF_SIZE]; 808 MPI_Status status; 809 810 /* send deleted ghosts */ 811 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 812 const PetscMPIInt proc = comm_procs[proc_idx]; 813 PetscInt *sbuff, *pt, scount; 814 MPI_Request *request; 815 /* count ghosts */ 816 PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel)); 817 ndel /= 2; // two entries for each proc 818 scount = 2 + 2 * ndel; 819 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 820 /* save requests */ 821 sbuffs1[proc_idx] = sbuff; 822 request = (MPI_Request *)sbuff; 823 sbuff = pt = (PetscInt *)(sbuff + request_size); 824 /* write [ndel, proc, n*[gid1,gid0] */ 825 *pt++ = ndel; // number of deleted to send 826 *pt++ = rank; // proc (not used) 827 PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos)); 828 while (pos) { 829 PetscInt lid0, ghost_idx, gid1; 830 PetscCall(PetscCDIntNdGetID(pos, &ghost_idx)); 831 gid1 = lghost_gid[ghost_idx]; 832 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 833 PetscCall(PetscCDIntNdGetID(pos, &lid0)); 834 PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos)); 835 *pt++ = gid1; 836 *pt++ = lid0 + my0; // gid0 837 } 838 PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff)); 839 /* MPI_Isend: tag1 [ndel, proc, n*[gid1,gid0] ] */ 840 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request)); 841 PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list 842 } 843 /* receive deleted, send back partial aggregates, clear lists */ 844 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 845 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status)); 846 { 847 PetscInt *pt, *pt2, *pt3, *sbuff, tmp; 848 MPI_Request *request; 849 int rcount, scount, ndel; 850 const PetscMPIInt proc = status.MPI_SOURCE; 851 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 852 if (rcount > rbuff_sz) { 853 if (rbuff) PetscCall(PetscFree(rbuff)); 854 PetscCall(PetscMalloc1(rcount, &rbuff)); 855 rbuff_sz = rcount; 856 } 857 /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */ 858 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status)); 859 /* read and count sends *[lid0, n, n*[gid] ] */ 860 pt = rbuff; 861 scount = 0; 862 ndel = *pt++; // number of deleted to recv 863 tmp = *pt++; // proc (not used) 864 while (ndel--) { 865 PetscInt gid1 = *pt++, lid1 = gid1 - my0; 866 int gh_gid0 = *pt++; // gid on other proc (not used here to count) 867 PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1); 868 PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc); 869 lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */ 870 PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n 871 /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */ 872 scount += tmp + 2; // lid0, n, n*[gid] 873 } 874 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount); 875 /* send tag2: *[gid0, n, n*[gid] ] */ 876 PetscCall(PetscMalloc1(scount + request_size, &sbuff)); 877 sbuffs2[proc_idx] = sbuff; /* cache request */ 878 request = (MPI_Request *)sbuff; 879 pt2 = sbuff = sbuff + request_size; 880 // read again: n, proc, n*[gid1,gid0] 881 pt = rbuff; 882 ndel = *pt++; 883 tmp = *pt++; // proc (not used) 884 while (ndel--) { 885 PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++; 886 /* write [gid0, aggSz, aggSz[gid] ] */ 887 *pt2++ = gh_gid0; 888 pt3 = pt2++; /* save pointer for later */ 889 PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos)); 890 while (pos) { 891 PetscInt gid; 892 PetscCall(PetscCDIntNdGetID(pos, &gid)); 893 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos)); 894 *pt2++ = gid; 895 } 896 *pt3 = (pt2 - pt3) - 1; 897 /* clear list */ 898 PetscCall(PetscCDRemoveAllAt(agg_llists, lid1)); 899 } 900 PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount); 901 /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */ 902 PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request)); 903 } 904 } // proc_idx 905 /* receive tag2 *[gid0, n, n*[gid] ] */ 906 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 907 PetscMPIInt proc; 908 PetscInt *pt; 909 int rcount; 910 PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status)); 911 PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount)); 912 if (rcount > rbuff_sz) { 913 if (rbuff) PetscCall(PetscFree(rbuff)); 914 PetscCall(PetscMalloc1(rcount, &rbuff)); 915 rbuff_sz = rcount; 916 } 917 proc = status.MPI_SOURCE; 918 /* MPI_Recv: tag1 [n, proc, n*[gid1,lid0] ] */ 919 PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status)); 920 pt = rbuff; 921 while (pt - rbuff < rcount) { 922 PetscInt gid0 = *pt++, n = *pt++; 923 while (n--) { 924 PetscInt gid1 = *pt++; 925 PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1)); 926 } 927 } 928 PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount); 929 } 930 /* wait for tag1 isends */ 931 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 932 MPI_Request *request; 933 request = (MPI_Request *)sbuffs1[proc_idx]; 934 PetscCallMPI(MPI_Wait(request, &status)); 935 PetscCall(PetscFree(sbuffs1[proc_idx])); 936 } 937 /* wait for tag2 isends */ 938 for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) { 939 MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx]; 940 PetscCallMPI(MPI_Wait(request, &status)); 941 PetscCall(PetscFree(sbuffs2[proc_idx])); 942 } 943 } /* MPI */ 944 /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */ 945 if (isMPI) { 946 const PetscScalar *sbuff; 947 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 948 PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0; 949 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 950 } 951 PetscCall(VecAssemblyBegin(locMaxEdge)); 952 PetscCall(VecAssemblyEnd(locMaxEdge)); 953 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 954 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 955 PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff)); 956 for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); } 957 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff)); 958 } 959 /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */ 960 for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) { 961 PetscReal max_e = 0., tt; 962 PetscScalar vval; 963 const PetscInt lid = kk; 964 int max_pe = rank, pe, n; 965 ii = matA->i; 966 n = ii[lid + 1] - ii[lid]; 967 aj = matA->j + ii[lid]; 968 ap = matA->a + ii[lid]; 969 for (int jj = 0; jj < n; jj++) { 970 PetscInt lidj = aj[jj]; 971 if (lid_matched[lidj]) continue; /* this is new - can change local max */ 972 if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]); 973 } 974 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 975 ii = matB->compressedrow.i; 976 n = ii[ix + 1] - ii[ix]; 977 ap = matB->a + ii[ix]; 978 aj = matB->j + ii[ix]; 979 for (int jj = 0; jj < n; jj++) { 980 PetscInt lidj = aj[jj]; 981 if (lghost_matched[lidj]) continue; 982 if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt; 983 } 984 } 985 vval = (PetscScalar)max_e; 986 PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ 987 // max PE with max edge 988 if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ 989 ii = matB->compressedrow.i; 990 n = ii[ix + 1] - ii[ix]; 991 ap = matB->a + ii[ix]; 992 aj = matB->j + ii[ix]; 993 for (int jj = 0; jj < n; jj++) { 994 PetscInt lidj = aj[jj]; 995 if (lghost_matched[lidj]) continue; 996 if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; } 997 } 998 } 999 vval = (PetscScalar)max_pe; 1000 PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); 1001 } 1002 PetscCall(VecAssemblyBegin(locMaxEdge)); 1003 PetscCall(VecAssemblyEnd(locMaxEdge)); 1004 PetscCall(VecAssemblyBegin(locMaxPE)); 1005 PetscCall(VecAssemblyEnd(locMaxPE)); 1006 /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/ 1007 if (isMPI) { 1008 const PetscScalar *buf; 1009 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1010 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD)); 1011 PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1012 PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD)); 1013 PetscCall(VecGetArrayRead(ghostMaxPE, &buf)); 1014 for (int kk = 0; kk < num_ghosts; kk++) { 1015 lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now 1016 } 1017 PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf)); 1018 } 1019 // if no active edges, stop 1020 if (gn_act_n[0] < 1) break; 1021 // inc and check (self stopping iteration 1022 PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %d/%d", sub_it + 1, n_sub_its); 1023 sub_it++; 1024 PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its); 1025 old_num_edge = gn_act_n[0]; 1026 } /* sub_it loop */ 1027 /* clean up iteration */ 1028 PetscCall(PetscFree(Edges)); 1029 if (mpimat) { // can be hoisted 1030 PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew)); 1031 PetscCall(VecDestroy(&ghostMaxEdge)); 1032 PetscCall(VecDestroy(&ghostMaxPE)); 1033 PetscCall(PetscFree(lghost_pe)); 1034 PetscCall(PetscFree(lghost_gid)); 1035 PetscCall(PetscFree(lghost_matched)); 1036 PetscCall(PetscFree(lghost_max_pe)); 1037 } 1038 PetscCall(VecDestroy(&locMaxEdge)); 1039 PetscCall(VecDestroy(&locMaxPE)); 1040 /* create next graph */ 1041 { 1042 Vec diag; 1043 /* add identity for unmatched vertices so they stay alive */ 1044 for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) { 1045 if (!lid_matched[kk]) { 1046 const PetscInt lid = kk; 1047 PetscCDIntNd *pos; 1048 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1049 PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid); 1050 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1051 PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid); 1052 PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES)); 1053 } 1054 } 1055 PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY)); 1056 PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY)); 1057 1058 /* project to make new graph with collapsed edges */ 1059 PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat)); 1060 PetscCall(MatDestroy(&P)); 1061 PetscCall(MatDestroy(&cMat)); 1062 cMat = tMat; 1063 PetscCall(MatCreateVecs(cMat, &diag, NULL)); 1064 PetscCall(MatGetDiagonal(cMat, diag)); 1065 PetscCall(VecReciprocal(diag)); 1066 PetscCall(VecSqrtAbs(diag)); 1067 PetscCall(MatDiagonalScale(cMat, diag, diag)); 1068 PetscCall(VecDestroy(&diag)); 1069 } 1070 } /* coarsen iterator */ 1071 1072 /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */ 1073 if (size > 1) { 1074 Mat mat; 1075 PetscCDIntNd *pos; 1076 PetscInt NN, MM, jj = 0, mxsz = 0; 1077 1078 for (int kk = 0; kk < nloc; kk++) { 1079 PetscCall(PetscCDCountAt(agg_llists, kk, &jj)); 1080 if (jj > mxsz) mxsz = jj; 1081 } 1082 PetscCall(MatGetSize(a_Gmat, &MM, &NN)); 1083 if (mxsz > MM - nloc) mxsz = MM - nloc; 1084 /* matrix of ghost adj for square graph */ 1085 PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat)); 1086 for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) { 1087 PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos)); 1088 while (pos) { 1089 PetscInt gid1; 1090 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1091 PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos)); 1092 if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES)); 1093 } 1094 } 1095 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 1096 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 1097 PetscCall(PetscCDSetMat(agg_llists, mat)); 1098 PetscCall(PetscCDDestroy(ghost_deleted_list)); 1099 if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true 1100 } 1101 // move BCs into some node 1102 if (bc_list) { 1103 PetscCDIntNd *pos; 1104 PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos)); 1105 while (pos) { 1106 PetscInt gid1; 1107 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1108 PetscCall(PetscCDGetNextPos(bc_list, 0, &pos)); 1109 PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1)); 1110 } 1111 PetscCall(PetscCDRemoveAllAt(bc_list, 0)); 1112 PetscCall(PetscCDDestroy(bc_list)); 1113 } 1114 { 1115 // check sizes -- all vertices must get in graph 1116 PetscInt sz, globalsz, MM; 1117 PetscCall(MatGetSize(a_Gmat, &MM, NULL)); 1118 PetscCall(PetscCDCount(agg_llists, &sz)); 1119 PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm)); 1120 PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz)); 1121 } 1122 // cleanup 1123 PetscCall(MatDestroy(&cMat)); 1124 PetscCall(PetscFree(lid_cprowID)); 1125 PetscCall(PetscFree(lid_max_pe)); 1126 PetscCall(PetscFree(lid_matched)); 1127 PetscCall(ISDestroy(&info_is)); 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /* 1132 HEM coarsen, simple greedy. 1133 */ 1134 static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse) 1135 { 1136 Mat mat = coarse->graph; 1137 1138 PetscFunctionBegin; 1139 PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists)); 1140 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer) 1145 { 1146 PetscMPIInt rank; 1147 PetscBool iascii; 1148 1149 PetscFunctionBegin; 1150 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank)); 1151 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1152 if (iascii) { 1153 PetscCDIntNd *pos, *pos2; 1154 PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold)); 1155 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1156 for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) { 1157 PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos)); 1158 if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk)); 1159 while (pos) { 1160 PetscInt gid1; 1161 PetscCall(PetscCDIntNdGetID(pos, &gid1)); 1162 PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos)); 1163 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1)); 1164 } 1165 if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n")); 1166 } 1167 PetscCall(PetscViewerFlush(viewer)); 1168 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1169 } 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 /* 1174 MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener 1175 */ 1176 PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse) 1177 { 1178 PetscFunctionBegin; 1179 coarse->ops->apply = MatCoarsenApply_HEM; 1180 coarse->ops->view = MatCoarsenView_HEM; 1181 coarse->max_it = 4; 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184