1 #include <petsc/private/pcimpl.h> 2 #include <petsc/private/matimpl.h> 3 #include <petscdm.h> 4 #include <h2opusconf.h> 5 6 /* Use GPU only if H2OPUS is configured for GPU */ 7 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 8 #define PETSC_H2OPUS_USE_GPU 9 #endif 10 11 typedef struct { 12 Mat A; 13 Mat M; 14 PetscScalar s0; 15 16 /* sampler for Newton-Schultz */ 17 Mat S; 18 PetscInt hyperorder; 19 Vec wns[4]; 20 Mat wnsmat[4]; 21 22 /* convergence testing */ 23 Mat T; 24 Vec w; 25 26 /* Support for PCSetCoordinates */ 27 PetscInt sdim; 28 PetscInt nlocc; 29 PetscReal *coords; 30 31 /* Newton-Schultz customization */ 32 PetscInt maxits; 33 PetscReal rtol, atol; 34 PetscBool monitor; 35 PetscBool useapproximatenorms; 36 NormType normtype; 37 38 /* Used when pmat != MATH2OPUS */ 39 PetscReal eta; 40 PetscInt leafsize; 41 PetscInt max_rank; 42 PetscInt bs; 43 PetscReal mrtol; 44 45 /* CPU/GPU */ 46 PetscBool forcecpu; 47 PetscBool boundtocpu; 48 } PC_H2OPUS; 49 50 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat, NormType, PetscReal *); 51 52 static PetscErrorCode PCH2OpusInferCoordinates_Private(PC pc) 53 { 54 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 55 DM dm; 56 PetscBool isdmda; 57 58 PetscFunctionBegin; 59 if (pch2opus->sdim) PetscFunctionReturn(PETSC_SUCCESS); 60 PetscCall(PCGetDM(pc, &dm)); 61 if (!dm) PetscCall(MatGetDM(pc->useAmat ? pc->mat : pc->pmat, &dm)); 62 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isdmda)); 63 if (isdmda) { 64 Vec c; 65 const PetscScalar *coords; 66 PetscInt n, sdim; 67 68 PetscCall(DMGetCoordinates(dm, &c)); 69 PetscCall(DMGetDimension(dm, &sdim)); 70 PetscCall(VecGetLocalSize(c, &n)); 71 PetscCall(VecGetArrayRead(c, &coords)); 72 PetscCall(PCSetCoordinates(pc, sdim, n / sdim, (PetscScalar *)coords)); 73 PetscCall(VecRestoreArrayRead(c, &coords)); 74 } 75 PetscFunctionReturn(PETSC_SUCCESS); 76 } 77 78 static PetscErrorCode PCReset_H2OPUS(PC pc) 79 { 80 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 81 82 PetscFunctionBegin; 83 pch2opus->sdim = 0; 84 pch2opus->nlocc = 0; 85 PetscCall(PetscFree(pch2opus->coords)); 86 PetscFunctionReturn(PETSC_SUCCESS); 87 } 88 89 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 90 { 91 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 92 PetscBool reset = PETSC_TRUE; 93 94 PetscFunctionBegin; 95 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 96 PetscCall(PetscArraycmp(pch2opus->coords, coords, sdim * nlocc, &reset)); 97 reset = (PetscBool)!reset; 98 } 99 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc))); 100 if (reset) { 101 PetscCall(PCReset_H2OPUS(pc)); 102 PetscCall(PetscMalloc1(sdim * nlocc, &pch2opus->coords)); 103 PetscCall(PetscArraycpy(pch2opus->coords, coords, sdim * nlocc)); 104 pch2opus->sdim = sdim; 105 pch2opus->nlocc = nlocc; 106 } 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode PCDestroy_H2OPUS(PC pc) 111 { 112 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 113 114 PetscFunctionBegin; 115 PetscCall(PCReset_H2OPUS(pc)); 116 PetscCall(MatDestroy(&pch2opus->A)); 117 PetscCall(MatDestroy(&pch2opus->M)); 118 PetscCall(MatDestroy(&pch2opus->T)); 119 PetscCall(VecDestroy(&pch2opus->w)); 120 PetscCall(MatDestroy(&pch2opus->S)); 121 PetscCall(VecDestroy(&pch2opus->wns[0])); 122 PetscCall(VecDestroy(&pch2opus->wns[1])); 123 PetscCall(VecDestroy(&pch2opus->wns[2])); 124 PetscCall(VecDestroy(&pch2opus->wns[3])); 125 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 126 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 127 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 128 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 129 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL)); 130 PetscCall(PetscFree(pc->data)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 static PetscErrorCode PCSetFromOptions_H2OPUS(PC pc, PetscOptionItems PetscOptionsObject) 135 { 136 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 137 138 PetscFunctionBegin; 139 PetscOptionsHeadBegin(PetscOptionsObject, "H2OPUS options"); 140 PetscCall(PetscOptionsInt("-pc_h2opus_maxits", "Maximum number of iterations for Newton-Schultz", NULL, pch2opus->maxits, &pch2opus->maxits, NULL)); 141 PetscCall(PetscOptionsBool("-pc_h2opus_monitor", "Monitor Newton-Schultz convergence", NULL, pch2opus->monitor, &pch2opus->monitor, NULL)); 142 PetscCall(PetscOptionsReal("-pc_h2opus_atol", "Absolute tolerance", NULL, pch2opus->atol, &pch2opus->atol, NULL)); 143 PetscCall(PetscOptionsReal("-pc_h2opus_rtol", "Relative tolerance", NULL, pch2opus->rtol, &pch2opus->rtol, NULL)); 144 PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type", "Norm type for convergence monitoring", NULL, NormTypes, (PetscEnum)pch2opus->normtype, (PetscEnum *)&pch2opus->normtype, NULL)); 145 PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder", "Hyper power order of sampling", NULL, pch2opus->hyperorder, &pch2opus->hyperorder, NULL)); 146 PetscCall(PetscOptionsInt("-pc_h2opus_leafsize", "Leaf size when constructed from kernel", NULL, pch2opus->leafsize, &pch2opus->leafsize, NULL)); 147 PetscCall(PetscOptionsReal("-pc_h2opus_eta", "Admissibility condition tolerance", NULL, pch2opus->eta, &pch2opus->eta, NULL)); 148 PetscCall(PetscOptionsInt("-pc_h2opus_maxrank", "Maximum rank when constructed from matvecs", NULL, pch2opus->max_rank, &pch2opus->max_rank, NULL)); 149 PetscCall(PetscOptionsInt("-pc_h2opus_samples", "Number of samples to be taken concurrently when constructing from matvecs", NULL, pch2opus->bs, &pch2opus->bs, NULL)); 150 PetscCall(PetscOptionsReal("-pc_h2opus_mrtol", "Relative tolerance for construction from sampling", NULL, pch2opus->mrtol, &pch2opus->mrtol, NULL)); 151 PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu", "Force construction on CPU", NULL, pch2opus->forcecpu, &pch2opus->forcecpu, NULL)); 152 PetscOptionsHeadEnd(); 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 typedef struct { 157 Mat A; 158 Mat M; 159 Vec w; 160 } AAtCtx; 161 162 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 163 { 164 AAtCtx *aat; 165 166 PetscFunctionBegin; 167 PetscCall(MatShellGetContext(A, &aat)); 168 /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */ 169 PetscCall(MatMultTranspose(aat->A, x, aat->w)); 170 PetscCall(MatMult(aat->A, aat->w, y)); 171 PetscFunctionReturn(PETSC_SUCCESS); 172 } 173 174 static PetscErrorCode PCH2OpusSetUpInit(PC pc) 175 { 176 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 177 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 178 PetscInt M, m; 179 VecType vtype; 180 PetscReal n; 181 AAtCtx aat; 182 183 PetscFunctionBegin; 184 aat.A = A; 185 aat.M = pch2opus->M; /* unused so far */ 186 PetscCall(MatCreateVecs(A, NULL, &aat.w)); 187 PetscCall(MatGetSize(A, &M, NULL)); 188 PetscCall(MatGetLocalSize(A, &m, NULL)); 189 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, m, M, M, &aat, &AAt)); 190 PetscCall(MatBindToCPU(AAt, pch2opus->boundtocpu)); 191 PetscCall(MatShellSetOperation(AAt, MATOP_MULT, (PetscErrorCodeFn *)MatMult_AAt)); 192 PetscCall(MatShellSetOperation(AAt, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_AAt)); 193 PetscCall(MatShellSetOperation(AAt, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS)); 194 PetscCall(MatGetVecType(A, &vtype)); 195 PetscCall(MatShellSetVecType(AAt, vtype)); 196 PetscCall(MatNorm(AAt, NORM_1, &n)); 197 pch2opus->s0 = 1. / n; 198 PetscCall(VecDestroy(&aat.w)); 199 PetscCall(MatDestroy(&AAt)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 204 { 205 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 206 207 PetscFunctionBegin; 208 if (t) PetscCall(MatMultTranspose(pch2opus->M, x, y)); 209 else PetscCall(MatMult(pch2opus->M, x, y)); 210 PetscFunctionReturn(PETSC_SUCCESS); 211 } 212 213 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 214 { 215 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 216 217 PetscFunctionBegin; 218 if (t) PetscCall(MatTransposeMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_DETERMINE, &Y)); 219 else PetscCall(MatMatMult(pch2opus->M, X, MAT_REUSE_MATRIX, PETSC_CURRENT, &Y)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 224 { 225 PetscFunctionBegin; 226 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_FALSE)); 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 231 { 232 PetscFunctionBegin; 233 PetscCall(PCApplyMatKernel_H2OPUS(pc, X, Y, PETSC_TRUE)); 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 238 { 239 PetscFunctionBegin; 240 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_FALSE)); 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 245 { 246 PetscFunctionBegin; 247 PetscCall(PCApplyKernel_H2OPUS(pc, x, y, PETSC_TRUE)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 /* used to test the norm of (M^-1 A - I) */ 252 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 253 { 254 PC pc; 255 Mat A; 256 PC_H2OPUS *pch2opus; 257 PetscBool sideleft = PETSC_TRUE; 258 259 PetscFunctionBegin; 260 PetscCall(MatShellGetContext(M, &pc)); 261 pch2opus = (PC_H2OPUS *)pc->data; 262 if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M, &pch2opus->w, NULL)); 263 A = pch2opus->A; 264 PetscCall(VecBindToCPU(pch2opus->w, pch2opus->boundtocpu)); 265 if (t) { 266 if (sideleft) { 267 PetscCall(PCApplyTranspose_H2OPUS(pc, x, pch2opus->w)); 268 PetscCall(MatMultTranspose(A, pch2opus->w, y)); 269 } else { 270 PetscCall(MatMultTranspose(A, x, pch2opus->w)); 271 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->w, y)); 272 } 273 } else { 274 if (sideleft) { 275 PetscCall(MatMult(A, x, pch2opus->w)); 276 PetscCall(PCApply_H2OPUS(pc, pch2opus->w, y)); 277 } else { 278 PetscCall(PCApply_H2OPUS(pc, x, pch2opus->w)); 279 PetscCall(MatMult(A, pch2opus->w, y)); 280 } 281 } 282 PetscCall(VecAXPY(y, -1.0, x)); 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 287 { 288 PetscFunctionBegin; 289 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_FALSE)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 294 { 295 PetscFunctionBegin; 296 PetscCall(MatMultKernel_MAmI(A, x, y, PETSC_TRUE)); 297 PetscFunctionReturn(PETSC_SUCCESS); 298 } 299 300 /* HyperPower kernel: 301 Y = R = x 302 for i = 1 . . . l - 1 do 303 R = (I - A * Xk) * R 304 Y = Y + R 305 Y = Xk * Y 306 */ 307 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 308 { 309 PC pc; 310 Mat A; 311 PC_H2OPUS *pch2opus; 312 313 PetscFunctionBegin; 314 PetscCall(MatShellGetContext(M, &pc)); 315 pch2opus = (PC_H2OPUS *)pc->data; 316 A = pch2opus->A; 317 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 318 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[2] ? NULL : &pch2opus->wns[2], pch2opus->wns[3] ? NULL : &pch2opus->wns[3])); 319 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 320 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 321 PetscCall(VecBindToCPU(pch2opus->wns[2], pch2opus->boundtocpu)); 322 PetscCall(VecBindToCPU(pch2opus->wns[3], pch2opus->boundtocpu)); 323 PetscCall(VecCopy(x, pch2opus->wns[0])); 324 PetscCall(VecCopy(x, pch2opus->wns[3])); 325 if (t) { 326 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 327 PetscCall(MatMultTranspose(A, pch2opus->wns[0], pch2opus->wns[1])); 328 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[2])); 329 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 330 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 331 } 332 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[3], y)); 333 } else { 334 for (PetscInt i = 0; i < pch2opus->hyperorder - 1; i++) { 335 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 336 PetscCall(MatMult(A, pch2opus->wns[1], pch2opus->wns[2])); 337 PetscCall(VecAXPY(pch2opus->wns[0], -1., pch2opus->wns[2])); 338 PetscCall(VecAXPY(pch2opus->wns[3], 1., pch2opus->wns[0])); 339 } 340 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[3], y)); 341 } 342 PetscFunctionReturn(PETSC_SUCCESS); 343 } 344 345 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 346 { 347 PetscFunctionBegin; 348 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_FALSE)); 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 353 { 354 PetscFunctionBegin; 355 PetscCall(MatMultKernel_Hyper(M, x, y, PETSC_TRUE)); 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 /* Hyper power kernel, MatMat version */ 360 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 361 { 362 PC pc; 363 Mat A; 364 PC_H2OPUS *pch2opus; 365 PetscInt i; 366 367 PetscFunctionBegin; 368 PetscCall(MatShellGetContext(M, &pc)); 369 pch2opus = (PC_H2OPUS *)pc->data; 370 A = pch2opus->A; 371 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 372 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 373 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 374 } 375 if (!pch2opus->wnsmat[0]) { 376 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 377 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 378 } 379 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 380 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 381 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 382 } 383 if (!pch2opus->wnsmat[2]) { 384 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[2])); 385 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[3])); 386 } 387 PetscCall(MatCopy(X, pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 388 PetscCall(MatCopy(X, pch2opus->wnsmat[3], SAME_NONZERO_PATTERN)); 389 if (t) { 390 for (i = 0; i < pch2opus->hyperorder - 1; i++) { 391 PetscCall(MatTransposeMatMult(A, pch2opus->wnsmat[0], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1])); 392 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[2])); 393 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 394 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 395 } 396 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 397 } else { 398 for (i = 0; i < pch2opus->hyperorder - 1; i++) { 399 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 400 PetscCall(MatMatMult(A, pch2opus->wnsmat[1], MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[2])); 401 PetscCall(MatAXPY(pch2opus->wnsmat[0], -1., pch2opus->wnsmat[2], SAME_NONZERO_PATTERN)); 402 PetscCall(MatAXPY(pch2opus->wnsmat[3], 1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 403 } 404 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[3], Y)); 405 } 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y, PetscCtx ctx) 410 { 411 PetscFunctionBegin; 412 PetscCall(MatMatMultKernel_Hyper(M, X, Y, PETSC_FALSE)); 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 417 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 418 { 419 PC pc; 420 Mat A; 421 PC_H2OPUS *pch2opus; 422 423 PetscFunctionBegin; 424 PetscCall(MatShellGetContext(M, &pc)); 425 pch2opus = (PC_H2OPUS *)pc->data; 426 A = pch2opus->A; 427 PetscCall(MatCreateVecs(pch2opus->M, pch2opus->wns[0] ? NULL : &pch2opus->wns[0], pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 428 PetscCall(VecBindToCPU(pch2opus->wns[0], pch2opus->boundtocpu)); 429 PetscCall(VecBindToCPU(pch2opus->wns[1], pch2opus->boundtocpu)); 430 if (t) { 431 PetscCall(PCApplyTranspose_H2OPUS(pc, x, y)); 432 PetscCall(MatMultTranspose(A, y, pch2opus->wns[1])); 433 PetscCall(PCApplyTranspose_H2OPUS(pc, pch2opus->wns[1], pch2opus->wns[0])); 434 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[0])); 435 } else { 436 PetscCall(PCApply_H2OPUS(pc, x, y)); 437 PetscCall(MatMult(A, y, pch2opus->wns[0])); 438 PetscCall(PCApply_H2OPUS(pc, pch2opus->wns[0], pch2opus->wns[1])); 439 PetscCall(VecAXPBY(y, -1., 2., pch2opus->wns[1])); 440 } 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 444 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 445 { 446 PetscFunctionBegin; 447 PetscCall(MatMultKernel_NS(M, x, y, PETSC_FALSE)); 448 PetscFunctionReturn(PETSC_SUCCESS); 449 } 450 451 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 452 { 453 PetscFunctionBegin; 454 PetscCall(MatMultKernel_NS(M, x, y, PETSC_TRUE)); 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 459 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 460 { 461 PC pc; 462 Mat A; 463 PC_H2OPUS *pch2opus; 464 465 PetscFunctionBegin; 466 PetscCall(MatShellGetContext(M, &pc)); 467 pch2opus = (PC_H2OPUS *)pc->data; 468 A = pch2opus->A; 469 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 470 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 471 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 472 } 473 if (!pch2opus->wnsmat[0]) { 474 PetscCall(MatDuplicate(X, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[0])); 475 PetscCall(MatDuplicate(Y, MAT_SHARE_NONZERO_PATTERN, &pch2opus->wnsmat[1])); 476 } 477 if (t) { 478 PetscCall(PCApplyTransposeMat_H2OPUS(pc, X, Y)); 479 PetscCall(MatTransposeMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[1])); 480 PetscCall(PCApplyTransposeMat_H2OPUS(pc, pch2opus->wnsmat[1], pch2opus->wnsmat[0])); 481 PetscCall(MatScale(Y, 2.)); 482 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[0], SAME_NONZERO_PATTERN)); 483 } else { 484 PetscCall(PCApplyMat_H2OPUS(pc, X, Y)); 485 PetscCall(MatMatMult(A, Y, MAT_REUSE_MATRIX, PETSC_CURRENT, &pch2opus->wnsmat[0])); 486 PetscCall(PCApplyMat_H2OPUS(pc, pch2opus->wnsmat[0], pch2opus->wnsmat[1])); 487 PetscCall(MatScale(Y, 2.)); 488 PetscCall(MatAXPY(Y, -1., pch2opus->wnsmat[1], SAME_NONZERO_PATTERN)); 489 } 490 PetscFunctionReturn(PETSC_SUCCESS); 491 } 492 493 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, PetscCtx ctx) 494 { 495 PetscFunctionBegin; 496 PetscCall(MatMatMultKernel_NS(M, X, Y, PETSC_FALSE)); 497 PetscFunctionReturn(PETSC_SUCCESS); 498 } 499 500 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 501 { 502 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 503 Mat A = pc->useAmat ? pc->mat : pc->pmat; 504 505 PetscFunctionBegin; 506 if (!pch2opus->S) { 507 PetscInt M, N, m, n; 508 509 PetscCall(MatGetSize(A, &M, &N)); 510 PetscCall(MatGetLocalSize(A, &m, &n)); 511 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A), m, n, M, N, pc, &pch2opus->S)); 512 PetscCall(MatSetBlockSizesFromMats(pch2opus->S, A, A)); 513 #if defined(PETSC_H2OPUS_USE_GPU) 514 PetscCall(MatShellSetVecType(pch2opus->S, VECCUDA)); 515 #endif 516 } 517 if (pch2opus->hyperorder >= 2) { 518 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Hyper)); 519 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Hyper)); 520 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSE, MATDENSE)); 521 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_Hyper, NULL, MATDENSECUDA, MATDENSECUDA)); 522 } else { 523 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_NS)); 524 PetscCall(MatShellSetOperation(pch2opus->S, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_NS)); 525 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSE, MATDENSE)); 526 PetscCall(MatShellSetMatProductOperation(pch2opus->S, MATPRODUCT_AB, NULL, MatMatMultNumeric_NS, NULL, MATDENSECUDA, MATDENSECUDA)); 527 } 528 PetscCall(MatPropagateSymmetryOptions(A, pch2opus->S)); 529 PetscCall(MatBindToCPU(pch2opus->S, pch2opus->boundtocpu)); 530 /* XXX */ 531 PetscCall(MatSetOption(pch2opus->S, MAT_SYMMETRIC, PETSC_TRUE)); 532 PetscFunctionReturn(PETSC_SUCCESS); 533 } 534 535 static PetscErrorCode PCSetUp_H2OPUS(PC pc) 536 { 537 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 538 Mat A = pc->useAmat ? pc->mat : pc->pmat; 539 NormType norm = pch2opus->normtype; 540 PetscReal initerr = 0.0, err; 541 PetscBool ish2opus; 542 543 PetscFunctionBegin; 544 if (!pch2opus->T) { 545 PetscInt M, N, m, n; 546 const char *prefix; 547 548 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 549 PetscCall(MatGetSize(A, &M, &N)); 550 PetscCall(MatGetLocalSize(A, &m, &n)); 551 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat), m, n, M, N, pc, &pch2opus->T)); 552 PetscCall(MatSetBlockSizesFromMats(pch2opus->T, A, A)); 553 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT, (PetscErrorCodeFn *)MatMult_MAmI)); 554 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_MAmI)); 555 PetscCall(MatShellSetOperation(pch2opus->T, MATOP_NORM, (PetscErrorCodeFn *)MatNorm_H2OPUS)); 556 #if defined(PETSC_H2OPUS_USE_GPU) 557 PetscCall(MatShellSetVecType(pch2opus->T, VECCUDA)); 558 #endif 559 PetscCall(MatSetOptionsPrefix(pch2opus->T, prefix)); 560 PetscCall(MatAppendOptionsPrefix(pch2opus->T, "pc_h2opus_est_")); 561 } 562 PetscCall(MatDestroy(&pch2opus->A)); 563 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATH2OPUS, &ish2opus)); 564 if (ish2opus) { 565 PetscCall(PetscObjectReference((PetscObject)A)); 566 pch2opus->A = A; 567 } else { 568 const char *prefix; 569 570 /* See if we can infer coordinates from the DM */ 571 if (!pch2opus->sdim) PetscCall(PCH2OpusInferCoordinates_Private(pc)); 572 PetscCall(MatCreateH2OpusFromMat(A, pch2opus->sdim, pch2opus->coords, PETSC_FALSE, pch2opus->eta, pch2opus->leafsize, pch2opus->max_rank, pch2opus->bs, pch2opus->mrtol, &pch2opus->A)); 573 /* XXX */ 574 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 575 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 576 PetscCall(MatSetOptionsPrefix(pch2opus->A, prefix)); 577 PetscCall(MatAppendOptionsPrefix(pch2opus->A, "pc_h2opus_init_")); 578 PetscCall(MatSetFromOptions(pch2opus->A)); 579 PetscCall(MatAssemblyBegin(pch2opus->A, MAT_FINAL_ASSEMBLY)); 580 PetscCall(MatAssemblyEnd(pch2opus->A, MAT_FINAL_ASSEMBLY)); 581 /* XXX */ 582 PetscCall(MatSetOption(pch2opus->A, MAT_SYMMETRIC, PETSC_TRUE)); 583 584 /* always perform construction on the GPU unless forcecpu is true */ 585 PetscCall(MatBindToCPU(pch2opus->A, pch2opus->forcecpu)); 586 } 587 #if defined(PETSC_H2OPUS_USE_GPU) 588 pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu; 589 #endif 590 PetscCall(MatBindToCPU(pch2opus->T, pch2opus->boundtocpu)); 591 if (pch2opus->M) { /* see if we can reuse M as initial guess */ 592 PetscReal reuse; 593 594 PetscCall(MatBindToCPU(pch2opus->M, pch2opus->boundtocpu)); 595 PetscCall(MatNorm(pch2opus->T, norm, &reuse)); 596 if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M)); 597 } 598 if (!pch2opus->M) { 599 const char *prefix; 600 PetscCall(MatDuplicate(pch2opus->A, MAT_COPY_VALUES, &pch2opus->M)); 601 PetscCall(PCGetOptionsPrefix(pc, &prefix)); 602 PetscCall(MatSetOptionsPrefix(pch2opus->M, prefix)); 603 PetscCall(MatAppendOptionsPrefix(pch2opus->M, "pc_h2opus_inv_")); 604 PetscCall(MatSetFromOptions(pch2opus->M)); 605 PetscCall(PCH2OpusSetUpInit(pc)); 606 PetscCall(MatScale(pch2opus->M, pch2opus->s0)); 607 } 608 /* A and M have the same h2 matrix nonzero structure, save on reordering routines */ 609 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_TRUE)); 610 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_TRUE)); 611 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &initerr)); 612 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 613 err = initerr; 614 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", 0, NormTypes[norm], (double)err, (double)(err / initerr))); 615 if (initerr > pch2opus->atol && !pc->failedreason) { 616 PetscInt i; 617 618 PetscCall(PCH2OpusSetUpSampler_Private(pc)); 619 for (i = 0; i < pch2opus->maxits; i++) { 620 Mat M; 621 const char *prefix; 622 623 PetscCall(MatDuplicate(pch2opus->M, MAT_SHARE_NONZERO_PATTERN, &M)); 624 PetscCall(MatGetOptionsPrefix(pch2opus->M, &prefix)); 625 PetscCall(MatSetOptionsPrefix(M, prefix)); 626 PetscCall(MatH2OpusSetSamplingMat(M, pch2opus->S, PETSC_DECIDE, PETSC_DECIDE)); 627 PetscCall(MatSetFromOptions(M)); 628 PetscCall(MatH2OpusSetNativeMult(M, PETSC_TRUE)); 629 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 630 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 631 PetscCall(MatDestroy(&pch2opus->M)); 632 pch2opus->M = M; 633 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T, norm, &err)); 634 if (pch2opus->monitor) PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc), "%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n", i + 1, NormTypes[norm], (double)err, (double)(err / initerr))); 635 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 636 if (err < pch2opus->atol || err < pch2opus->rtol * initerr || pc->failedreason) break; 637 } 638 } 639 /* cleanup setup workspace */ 640 PetscCall(MatH2OpusSetNativeMult(pch2opus->A, PETSC_FALSE)); 641 PetscCall(MatH2OpusSetNativeMult(pch2opus->M, PETSC_FALSE)); 642 PetscCall(VecDestroy(&pch2opus->wns[0])); 643 PetscCall(VecDestroy(&pch2opus->wns[1])); 644 PetscCall(VecDestroy(&pch2opus->wns[2])); 645 PetscCall(VecDestroy(&pch2opus->wns[3])); 646 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 647 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 648 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 649 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 654 { 655 PC_H2OPUS *pch2opus = (PC_H2OPUS *)pc->data; 656 PetscBool isascii; 657 658 PetscFunctionBegin; 659 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 660 if (isascii) { 661 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 662 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial approximation matrix\n")); 663 PetscCall(PetscViewerASCIIPushTab(viewer)); 664 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 665 PetscCall(MatView(pch2opus->A, viewer)); 666 PetscCall(PetscViewerPopFormat(viewer)); 667 PetscCall(PetscViewerASCIIPopTab(viewer)); 668 } 669 if (pch2opus->M) { 670 PetscCall(PetscViewerASCIIPrintf(viewer, "Inner matrix constructed\n")); 671 PetscCall(PetscViewerASCIIPushTab(viewer)); 672 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO_DETAIL)); 673 PetscCall(MatView(pch2opus->M, viewer)); 674 PetscCall(PetscViewerPopFormat(viewer)); 675 PetscCall(PetscViewerASCIIPopTab(viewer)); 676 } 677 PetscCall(PetscViewerASCIIPrintf(viewer, "Initial scaling: %g\n", pch2opus->s0)); 678 } 679 PetscFunctionReturn(PETSC_SUCCESS); 680 } 681 682 /*MC 683 PCH2OPUS = "h2opus" - A preconditioner type for, `MATH2OPUS`, hierarchical matrices using the H2Opus package. 684 685 Options Database Keys: 686 + -pc_type h2opus - pc type to "h2opus" during a call to `PCSetFromOptions()` 687 . -pc_h2opus_maxits - maximum number of iterations for Newton-Schultz 688 . -pc_h2opus_monitor - monitor Newton-Schultz convergence 689 . -pc_h2opus_atol - absolute tolerance 690 . -pc_h2opus_rtol - relative tolerance 691 . -pc_h2opus_norm_type - normtype 692 . -pc_h2opus_hyperorder - Hyper power order of sampling 693 . -pc_h2opus_leafsize - leaf size when constructed from kernel 694 . -pc_h2opus_eta - admissibility condition tolerance 695 . -pc_h2opus_maxrank - maximum rank when constructed from matvecs 696 . -pc_h2opus_samples - number of samples to be taken concurrently when constructing from matvecs 697 . -pc_h2opus_mrtol - relative tolerance for construction from sampling 698 - -pc_h2opus_forcecpu - force construction of preconditioner on CPU 699 700 Level: intermediate 701 702 .seealso: [](ch_ksp), `MATH2OPUS`, `MATHTOOL`, `MATDENSE`, `MatCreateH2OpusFromKernel()`, `MatCreateH2OpusFromMat()` 703 M*/ 704 705 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 706 { 707 PC_H2OPUS *pch2opus; 708 709 PetscFunctionBegin; 710 PetscCall(PetscNew(&pch2opus)); 711 pc->data = (void *)pch2opus; 712 713 pch2opus->atol = 1.e-2; 714 pch2opus->rtol = 1.e-6; 715 pch2opus->maxits = 50; 716 pch2opus->hyperorder = 1; /* defaults to basic Newton-Schultz */ 717 pch2opus->normtype = NORM_2; 718 719 /* these are needed when we are sampling the pmat */ 720 pch2opus->eta = PETSC_DECIDE; 721 pch2opus->leafsize = PETSC_DECIDE; 722 pch2opus->max_rank = PETSC_DECIDE; 723 pch2opus->bs = PETSC_DECIDE; 724 pch2opus->mrtol = PETSC_DECIDE; 725 pch2opus->boundtocpu = PetscDefined(H2OPUS_USE_GPU) ? PETSC_FALSE : PETSC_TRUE; 726 pc->ops->destroy = PCDestroy_H2OPUS; 727 pc->ops->setup = PCSetUp_H2OPUS; 728 pc->ops->apply = PCApply_H2OPUS; 729 pc->ops->matapply = PCApplyMat_H2OPUS; 730 pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 731 pc->ops->reset = PCReset_H2OPUS; 732 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 733 pc->ops->view = PCView_H2OPUS; 734 735 PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_H2OPUS)); 736 PetscFunctionReturn(PETSC_SUCCESS); 737 } 738