1 #include <petsc/private/pcimpl.h> 2 #include <petsc/private/matimpl.h> 3 #include <h2opusconf.h> 4 5 /* Use GPU only if H2OPUS is configured for GPU */ 6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7 #define PETSC_H2OPUS_USE_GPU 8 #endif 9 10 typedef struct { 11 Mat A; 12 Mat M; 13 PetscScalar s0; 14 15 /* sampler for Newton-Schultz */ 16 Mat S; 17 PetscInt hyperorder; 18 Vec wns[4]; 19 Mat wnsmat[4]; 20 21 /* convergence testing */ 22 Mat T; 23 Vec w; 24 25 /* Support for PCSetCoordinates */ 26 PetscInt sdim; 27 PetscInt nlocc; 28 PetscReal *coords; 29 30 /* Newton-Schultz customization */ 31 PetscInt maxits; 32 PetscReal rtol,atol; 33 PetscBool monitor; 34 PetscBool useapproximatenorms; 35 NormType normtype; 36 37 /* Used when pmat != MATH2OPUS */ 38 PetscReal eta; 39 PetscInt leafsize; 40 PetscInt max_rank; 41 PetscInt bs; 42 PetscReal mrtol; 43 44 /* CPU/GPU */ 45 PetscBool forcecpu; 46 PetscBool boundtocpu; 47 } PC_H2OPUS; 48 49 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*); 50 51 static PetscErrorCode PCReset_H2OPUS(PC pc) 52 { 53 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 54 55 PetscFunctionBegin; 56 pch2opus->sdim = 0; 57 pch2opus->nlocc = 0; 58 PetscCall(PetscFree(pch2opus->coords)); 59 PetscCall(MatDestroy(&pch2opus->A)); 60 PetscCall(MatDestroy(&pch2opus->M)); 61 PetscCall(MatDestroy(&pch2opus->T)); 62 PetscCall(VecDestroy(&pch2opus->w)); 63 PetscCall(MatDestroy(&pch2opus->S)); 64 PetscCall(VecDestroy(&pch2opus->wns[0])); 65 PetscCall(VecDestroy(&pch2opus->wns[1])); 66 PetscCall(VecDestroy(&pch2opus->wns[2])); 67 PetscCall(VecDestroy(&pch2opus->wns[3])); 68 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 69 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 70 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 71 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 72 PetscFunctionReturn(0); 73 } 74 75 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 76 { 77 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 78 PetscBool reset = PETSC_TRUE; 79 80 PetscFunctionBegin; 81 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 82 PetscCall(PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset)); 83 reset = (PetscBool)!reset; 84 } 85 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 86 if (reset) { 87 PetscCall(PCReset_H2OPUS(pc)); 88 PetscCall(PetscMalloc1(sdim*nlocc,&pch2opus->coords)); 89 PetscCall(PetscArraycpy(pch2opus->coords,coords,sdim*nlocc)); 90 pch2opus->sdim = sdim; 91 pch2opus->nlocc = nlocc; 92 } 93 PetscFunctionReturn(0); 94 } 95 96 static PetscErrorCode PCDestroy_H2OPUS(PC pc) 97 { 98 PetscFunctionBegin; 99 PetscCall(PCReset_H2OPUS(pc)); 100 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL)); 101 PetscCall(PetscFree(pc->data)); 102 PetscFunctionReturn(0); 103 } 104 105 static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc) 106 { 107 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 108 109 PetscFunctionBegin; 110 PetscOptionsHeadBegin(PetscOptionsObject,"H2OPUS options"); 111 PetscCall(PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL)); 112 PetscCall(PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL)); 113 PetscCall(PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL)); 114 PetscCall(PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL)); 115 PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL)); 116 PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL)); 117 PetscCall(PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL)); 118 PetscCall(PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL)); 119 PetscCall(PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL)); 120 PetscCall(PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL)); 121 PetscCall(PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL)); 122 PetscCall(PetscOptionsBool("-pc_h2opus_forcecpu","Force construction on CPU",NULL,pch2opus->forcecpu,&pch2opus->forcecpu,NULL)); 123 PetscOptionsHeadEnd(); 124 PetscFunctionReturn(0); 125 } 126 127 typedef struct { 128 Mat A; 129 Mat M; 130 Vec w; 131 } AAtCtx; 132 133 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 134 { 135 AAtCtx *aat; 136 137 PetscFunctionBegin; 138 PetscCall(MatShellGetContext(A,(void*)&aat)); 139 /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */ 140 PetscCall(MatMultTranspose(aat->A,x,aat->w)); 141 PetscCall(MatMult(aat->A,aat->w,y)); 142 PetscFunctionReturn(0); 143 } 144 145 static PetscErrorCode PCH2OpusSetUpInit(PC pc) 146 { 147 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 148 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 149 PetscInt M,m; 150 VecType vtype; 151 PetscReal n; 152 AAtCtx aat; 153 154 PetscFunctionBegin; 155 aat.A = A; 156 aat.M = pch2opus->M; /* unused so far */ 157 PetscCall(MatCreateVecs(A,NULL,&aat.w)); 158 PetscCall(MatGetSize(A,&M,NULL)); 159 PetscCall(MatGetLocalSize(A,&m,NULL)); 160 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt)); 161 PetscCall(MatBindToCPU(AAt,pch2opus->boundtocpu)); 162 PetscCall(MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt)); 163 PetscCall(MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt)); 164 PetscCall(MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS)); 165 PetscCall(MatGetVecType(A,&vtype)); 166 PetscCall(MatShellSetVecType(AAt,vtype)); 167 PetscCall(MatNorm(AAt,NORM_1,&n)); 168 pch2opus->s0 = 1./n; 169 PetscCall(VecDestroy(&aat.w)); 170 PetscCall(MatDestroy(&AAt)); 171 PetscFunctionReturn(0); 172 } 173 174 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 175 { 176 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 177 178 PetscFunctionBegin; 179 if (t) PetscCall(MatMultTranspose(pch2opus->M,x,y)); 180 else PetscCall(MatMult(pch2opus->M,x,y)); 181 PetscFunctionReturn(0); 182 } 183 184 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 185 { 186 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 187 188 PetscFunctionBegin; 189 if (t) PetscCall(MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 190 else PetscCall(MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 191 PetscFunctionReturn(0); 192 } 193 194 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 195 { 196 PetscFunctionBegin; 197 PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE)); 198 PetscFunctionReturn(0); 199 } 200 201 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 202 { 203 PetscFunctionBegin; 204 PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 209 { 210 PetscFunctionBegin; 211 PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE)); 212 PetscFunctionReturn(0); 213 } 214 215 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 216 { 217 PetscFunctionBegin; 218 PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE)); 219 PetscFunctionReturn(0); 220 } 221 222 /* used to test the norm of (M^-1 A - I) */ 223 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 224 { 225 PC pc; 226 Mat A; 227 PC_H2OPUS *pch2opus; 228 PetscBool sideleft = PETSC_TRUE; 229 230 PetscFunctionBegin; 231 PetscCall(MatShellGetContext(M,(void*)&pc)); 232 pch2opus = (PC_H2OPUS*)pc->data; 233 if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M,&pch2opus->w,NULL)); 234 A = pch2opus->A; 235 PetscCall(VecBindToCPU(pch2opus->w,pch2opus->boundtocpu)); 236 if (t) { 237 if (sideleft) { 238 PetscCall(PCApplyTranspose_H2OPUS(pc,x,pch2opus->w)); 239 PetscCall(MatMultTranspose(A,pch2opus->w,y)); 240 } else { 241 PetscCall(MatMultTranspose(A,x,pch2opus->w)); 242 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->w,y)); 243 } 244 } else { 245 if (sideleft) { 246 PetscCall(MatMult(A,x,pch2opus->w)); 247 PetscCall(PCApply_H2OPUS(pc,pch2opus->w,y)); 248 } else { 249 PetscCall(PCApply_H2OPUS(pc,x,pch2opus->w)); 250 PetscCall(MatMult(A,pch2opus->w,y)); 251 } 252 } 253 PetscCall(VecAXPY(y,-1.0,x)); 254 PetscFunctionReturn(0); 255 } 256 257 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 258 { 259 PetscFunctionBegin; 260 PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_FALSE)); 261 PetscFunctionReturn(0); 262 } 263 264 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 265 { 266 PetscFunctionBegin; 267 PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_TRUE)); 268 PetscFunctionReturn(0); 269 } 270 271 /* HyperPower kernel: 272 Y = R = x 273 for i = 1 . . . l - 1 do 274 R = (I - A * Xk) * R 275 Y = Y + R 276 Y = Xk * Y 277 */ 278 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 279 { 280 PC pc; 281 Mat A; 282 PC_H2OPUS *pch2opus; 283 284 PetscFunctionBegin; 285 PetscCall(MatShellGetContext(M,(void*)&pc)); 286 pch2opus = (PC_H2OPUS*)pc->data; 287 A = pch2opus->A; 288 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 289 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3])); 290 PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu)); 291 PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu)); 292 PetscCall(VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu)); 293 PetscCall(VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu)); 294 PetscCall(VecCopy(x,pch2opus->wns[0])); 295 PetscCall(VecCopy(x,pch2opus->wns[3])); 296 if (t) { 297 for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) { 298 PetscCall(MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1])); 299 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2])); 300 PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2])); 301 PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0])); 302 } 303 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y)); 304 } else { 305 for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) { 306 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1])); 307 PetscCall(MatMult(A,pch2opus->wns[1],pch2opus->wns[2])); 308 PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2])); 309 PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0])); 310 } 311 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[3],y)); 312 } 313 PetscFunctionReturn(0); 314 } 315 316 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 317 { 318 PetscFunctionBegin; 319 PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_FALSE)); 320 PetscFunctionReturn(0); 321 } 322 323 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 324 { 325 PetscFunctionBegin; 326 PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_TRUE)); 327 PetscFunctionReturn(0); 328 } 329 330 /* Hyper power kernel, MatMat version */ 331 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 332 { 333 PC pc; 334 Mat A; 335 PC_H2OPUS *pch2opus; 336 PetscInt i; 337 338 PetscFunctionBegin; 339 PetscCall(MatShellGetContext(M,(void*)&pc)); 340 pch2opus = (PC_H2OPUS*)pc->data; 341 A = pch2opus->A; 342 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 343 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 344 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 345 } 346 if (!pch2opus->wnsmat[0]) { 347 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0])); 348 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1])); 349 } 350 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 351 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 352 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 353 } 354 if (!pch2opus->wnsmat[2]) { 355 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2])); 356 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3])); 357 } 358 PetscCall(MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 359 PetscCall(MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN)); 360 if (t) { 361 for (i=0;i<pch2opus->hyperorder-1;i++) { 362 PetscCall(MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1])); 363 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2])); 364 PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN)); 365 PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 366 } 367 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y)); 368 } else { 369 for (i=0;i<pch2opus->hyperorder-1;i++) { 370 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1])); 371 PetscCall(MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2])); 372 PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN)); 373 PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 374 } 375 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y)); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx) 381 { 382 PetscFunctionBegin; 383 PetscCall(MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE)); 384 PetscFunctionReturn(0); 385 } 386 387 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 388 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 389 { 390 PC pc; 391 Mat A; 392 PC_H2OPUS *pch2opus; 393 394 PetscFunctionBegin; 395 PetscCall(MatShellGetContext(M,(void*)&pc)); 396 pch2opus = (PC_H2OPUS*)pc->data; 397 A = pch2opus->A; 398 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 399 PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu)); 400 PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu)); 401 if (t) { 402 PetscCall(PCApplyTranspose_H2OPUS(pc,x,y)); 403 PetscCall(MatMultTranspose(A,y,pch2opus->wns[1])); 404 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0])); 405 PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[0])); 406 } else { 407 PetscCall(PCApply_H2OPUS(pc,x,y)); 408 PetscCall(MatMult(A,y,pch2opus->wns[0])); 409 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1])); 410 PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[1])); 411 } 412 PetscFunctionReturn(0); 413 } 414 415 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 416 { 417 PetscFunctionBegin; 418 PetscCall(MatMultKernel_NS(M,x,y,PETSC_FALSE)); 419 PetscFunctionReturn(0); 420 } 421 422 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 423 { 424 PetscFunctionBegin; 425 PetscCall(MatMultKernel_NS(M,x,y,PETSC_TRUE)); 426 PetscFunctionReturn(0); 427 } 428 429 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 430 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 431 { 432 PC pc; 433 Mat A; 434 PC_H2OPUS *pch2opus; 435 436 PetscFunctionBegin; 437 PetscCall(MatShellGetContext(M,(void*)&pc)); 438 pch2opus = (PC_H2OPUS*)pc->data; 439 A = pch2opus->A; 440 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 441 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 442 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 443 } 444 if (!pch2opus->wnsmat[0]) { 445 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0])); 446 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1])); 447 } 448 if (t) { 449 PetscCall(PCApplyTransposeMat_H2OPUS(pc,X,Y)); 450 PetscCall(MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1])); 451 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0])); 452 PetscCall(MatScale(Y,2.)); 453 PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 454 } else { 455 PetscCall(PCApplyMat_H2OPUS(pc,X,Y)); 456 PetscCall(MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0])); 457 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1])); 458 PetscCall(MatScale(Y,2.)); 459 PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN)); 460 } 461 PetscFunctionReturn(0); 462 } 463 464 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) 465 { 466 PetscFunctionBegin; 467 PetscCall(MatMatMultKernel_NS(M,X,Y,PETSC_FALSE)); 468 PetscFunctionReturn(0); 469 } 470 471 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 472 { 473 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 474 Mat A = pc->useAmat ? pc->mat : pc->pmat; 475 476 PetscFunctionBegin; 477 if (!pch2opus->S) { 478 PetscInt M,N,m,n; 479 480 PetscCall(MatGetSize(A,&M,&N)); 481 PetscCall(MatGetLocalSize(A,&m,&n)); 482 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S)); 483 PetscCall(MatSetBlockSizesFromMats(pch2opus->S,A,A)); 484 #if defined(PETSC_H2OPUS_USE_GPU) 485 PetscCall(MatShellSetVecType(pch2opus->S,VECCUDA)); 486 #endif 487 } 488 if (pch2opus->hyperorder >= 2) { 489 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper)); 490 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper)); 491 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE)); 492 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA)); 493 } else { 494 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS)); 495 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS)); 496 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE)); 497 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA)); 498 } 499 PetscCall(MatPropagateSymmetryOptions(A,pch2opus->S)); 500 PetscCall(MatBindToCPU(pch2opus->S,pch2opus->boundtocpu)); 501 /* XXX */ 502 PetscCall(MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE)); 503 PetscFunctionReturn(0); 504 } 505 506 static PetscErrorCode PCSetUp_H2OPUS(PC pc) 507 { 508 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 509 Mat A = pc->useAmat ? pc->mat : pc->pmat; 510 NormType norm = pch2opus->normtype; 511 PetscReal initerr = 0.0,err; 512 PetscBool ish2opus; 513 514 PetscFunctionBegin; 515 if (!pch2opus->T) { 516 PetscInt M,N,m,n; 517 const char *prefix; 518 519 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 520 PetscCall(MatGetSize(A,&M,&N)); 521 PetscCall(MatGetLocalSize(A,&m,&n)); 522 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T)); 523 PetscCall(MatSetBlockSizesFromMats(pch2opus->T,A,A)); 524 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI)); 525 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI)); 526 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS)); 527 #if defined(PETSC_H2OPUS_USE_GPU) 528 PetscCall(MatShellSetVecType(pch2opus->T,VECCUDA)); 529 #endif 530 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T)); 531 PetscCall(MatSetOptionsPrefix(pch2opus->T,prefix)); 532 PetscCall(MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_")); 533 } 534 PetscCall(MatDestroy(&pch2opus->A)); 535 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus)); 536 if (ish2opus) { 537 PetscCall(PetscObjectReference((PetscObject)A)); 538 pch2opus->A = A; 539 } else { 540 const char *prefix; 541 PetscCall(MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A)); 542 /* XXX */ 543 PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE)); 544 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 545 PetscCall(MatSetOptionsPrefix(pch2opus->A,prefix)); 546 PetscCall(MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_")); 547 PetscCall(MatSetFromOptions(pch2opus->A)); 548 PetscCall(MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY)); 549 PetscCall(MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY)); 550 /* XXX */ 551 PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE)); 552 553 /* always perform construction on the GPU unless forcecpu is true */ 554 PetscCall(MatBindToCPU(pch2opus->A,pch2opus->forcecpu)); 555 } 556 #if defined(PETSC_H2OPUS_USE_GPU) 557 pch2opus->boundtocpu = pch2opus->forcecpu ? PETSC_TRUE : pch2opus->A->boundtocpu; 558 #endif 559 PetscCall(MatBindToCPU(pch2opus->T,pch2opus->boundtocpu)); 560 if (pch2opus->M) { /* see if we can reuse M as initial guess */ 561 PetscReal reuse; 562 563 PetscCall(MatBindToCPU(pch2opus->M,pch2opus->boundtocpu)); 564 PetscCall(MatNorm(pch2opus->T,norm,&reuse)); 565 if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M)); 566 } 567 if (!pch2opus->M) { 568 const char *prefix; 569 PetscCall(MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M)); 570 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 571 PetscCall(MatSetOptionsPrefix(pch2opus->M,prefix)); 572 PetscCall(MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_")); 573 PetscCall(MatSetFromOptions(pch2opus->M)); 574 PetscCall(PCH2OpusSetUpInit(pc)); 575 PetscCall(MatScale(pch2opus->M,pch2opus->s0)); 576 } 577 /* A and M have the same h2 matrix structure, save on reordering routines */ 578 PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE)); 579 PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE)); 580 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T,norm,&initerr)); 581 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 582 err = initerr; 583 if (pch2opus->monitor) { 584 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr))); 585 } 586 if (initerr > pch2opus->atol && !pc->failedreason) { 587 PetscInt i; 588 589 PetscCall(PCH2OpusSetUpSampler_Private(pc)); 590 for (i = 0; i < pch2opus->maxits; i++) { 591 Mat M; 592 const char* prefix; 593 594 PetscCall(MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M)); 595 PetscCall(MatGetOptionsPrefix(pch2opus->M,&prefix)); 596 PetscCall(MatSetOptionsPrefix(M,prefix)); 597 PetscCall(MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE)); 598 PetscCall(MatSetFromOptions(M)); 599 PetscCall(MatH2OpusSetNativeMult(M,PETSC_TRUE)); 600 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 601 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 602 PetscCall(MatDestroy(&pch2opus->M)); 603 pch2opus->M = M; 604 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) PetscCall(MatNorm(pch2opus->T,norm,&err)); 605 if (pch2opus->monitor) { 606 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%" PetscInt_FMT ": ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr))); 607 } 608 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 609 if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break; 610 } 611 } 612 /* cleanup setup workspace */ 613 PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE)); 614 PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE)); 615 PetscCall(VecDestroy(&pch2opus->wns[0])); 616 PetscCall(VecDestroy(&pch2opus->wns[1])); 617 PetscCall(VecDestroy(&pch2opus->wns[2])); 618 PetscCall(VecDestroy(&pch2opus->wns[3])); 619 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 620 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 621 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 622 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 623 PetscFunctionReturn(0); 624 } 625 626 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 627 { 628 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 629 PetscBool isascii; 630 631 PetscFunctionBegin; 632 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 633 if (isascii) { 634 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 635 PetscCall(PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n")); 636 PetscCall(PetscViewerASCIIPushTab(viewer)); 637 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL)); 638 PetscCall(MatView(pch2opus->A,viewer)); 639 PetscCall(PetscViewerPopFormat(viewer)); 640 PetscCall(PetscViewerASCIIPopTab(viewer)); 641 } 642 if (pch2opus->M) { 643 PetscCall(PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n")); 644 PetscCall(PetscViewerASCIIPushTab(viewer)); 645 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL)); 646 PetscCall(MatView(pch2opus->M,viewer)); 647 PetscCall(PetscViewerPopFormat(viewer)); 648 PetscCall(PetscViewerASCIIPopTab(viewer)); 649 } 650 PetscCall(PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0)); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 656 { 657 PC_H2OPUS *pch2opus; 658 659 PetscFunctionBegin; 660 PetscCall(PetscNewLog(pc,&pch2opus)); 661 pc->data = (void*)pch2opus; 662 663 pch2opus->atol = 1.e-2; 664 pch2opus->rtol = 1.e-6; 665 pch2opus->maxits = 50; 666 pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */ 667 pch2opus->normtype = NORM_2; 668 669 /* these are needed when we are sampling the pmat */ 670 pch2opus->eta = PETSC_DECIDE; 671 pch2opus->leafsize = PETSC_DECIDE; 672 pch2opus->max_rank = PETSC_DECIDE; 673 pch2opus->bs = PETSC_DECIDE; 674 pch2opus->mrtol = PETSC_DECIDE; 675 #if defined(PETSC_H2OPUS_USE_GPU) 676 pch2opus->boundtocpu = PETSC_FALSE; 677 #else 678 pch2opus->boundtocpu = PETSC_TRUE; 679 #endif 680 pc->ops->destroy = PCDestroy_H2OPUS; 681 pc->ops->setup = PCSetUp_H2OPUS; 682 pc->ops->apply = PCApply_H2OPUS; 683 pc->ops->matapply = PCApplyMat_H2OPUS; 684 pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 685 pc->ops->reset = PCReset_H2OPUS; 686 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 687 pc->ops->view = PCView_H2OPUS; 688 689 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS)); 690 PetscFunctionReturn(0); 691 } 692