1 #include <petsc/private/pcimpl.h> 2 #include <petsc/private/matimpl.h> 3 #include <h2opusconf.h> 4 5 /* Use GPU only if H2OPUS is configured for GPU */ 6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7 #define PETSC_H2OPUS_USE_GPU 8 #endif 9 10 typedef struct { 11 Mat A; 12 Mat M; 13 PetscScalar s0; 14 15 /* sampler for Newton-Schultz */ 16 Mat S; 17 PetscInt hyperorder; 18 Vec wns[4]; 19 Mat wnsmat[4]; 20 21 /* convergence testing */ 22 Mat T; 23 Vec w; 24 PetscBool testMA; 25 26 /* Support for PCSetCoordinates */ 27 PetscInt sdim; 28 PetscInt nlocc; 29 PetscReal *coords; 30 31 /* Newton-Schultz customization */ 32 PetscInt maxits; 33 PetscReal rtol,atol; 34 PetscBool monitor; 35 PetscBool useapproximatenorms; 36 NormType normtype; 37 38 /* Used when pmat != MATH2OPUS */ 39 PetscReal eta; 40 PetscInt leafsize; 41 PetscInt max_rank; 42 PetscInt bs; 43 PetscReal mrtol; 44 45 PetscBool boundtocpu; 46 } PC_H2OPUS; 47 48 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*); 49 50 static PetscErrorCode PCReset_H2OPUS(PC pc) 51 { 52 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 53 54 PetscFunctionBegin; 55 pch2opus->sdim = 0; 56 pch2opus->nlocc = 0; 57 PetscCall(PetscFree(pch2opus->coords)); 58 PetscCall(MatDestroy(&pch2opus->A)); 59 PetscCall(MatDestroy(&pch2opus->M)); 60 PetscCall(MatDestroy(&pch2opus->T)); 61 PetscCall(VecDestroy(&pch2opus->w)); 62 PetscCall(MatDestroy(&pch2opus->S)); 63 PetscCall(VecDestroy(&pch2opus->wns[0])); 64 PetscCall(VecDestroy(&pch2opus->wns[1])); 65 PetscCall(VecDestroy(&pch2opus->wns[2])); 66 PetscCall(VecDestroy(&pch2opus->wns[3])); 67 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 68 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 69 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 70 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 71 PetscFunctionReturn(0); 72 } 73 74 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 75 { 76 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 77 PetscBool reset = PETSC_TRUE; 78 79 PetscFunctionBegin; 80 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 81 PetscCall(PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset)); 82 reset = (PetscBool)!reset; 83 } 84 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc))); 85 if (reset) { 86 PetscCall(PCReset_H2OPUS(pc)); 87 PetscCall(PetscMalloc1(sdim*nlocc,&pch2opus->coords)); 88 PetscCall(PetscArraycpy(pch2opus->coords,coords,sdim*nlocc)); 89 pch2opus->sdim = sdim; 90 pch2opus->nlocc = nlocc; 91 } 92 PetscFunctionReturn(0); 93 } 94 95 static PetscErrorCode PCDestroy_H2OPUS(PC pc) 96 { 97 PetscFunctionBegin; 98 PetscCall(PCReset_H2OPUS(pc)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL)); 100 PetscCall(PetscFree(pc->data)); 101 PetscFunctionReturn(0); 102 } 103 104 static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc) 105 { 106 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 107 108 PetscFunctionBegin; 109 PetscCall(PetscOptionsHead(PetscOptionsObject,"H2OPUS options")); 110 PetscCall(PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL)); 111 PetscCall(PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL)); 112 PetscCall(PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL)); 113 PetscCall(PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL)); 114 PetscCall(PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL)); 115 PetscCall(PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL)); 116 PetscCall(PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL)); 117 PetscCall(PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL)); 118 PetscCall(PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL)); 119 PetscCall(PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL)); 120 PetscCall(PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL)); 121 PetscCall(PetscOptionsTail()); 122 PetscFunctionReturn(0); 123 } 124 125 typedef struct { 126 Mat A; 127 Mat M; 128 Vec w; 129 } AAtCtx; 130 131 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 132 { 133 AAtCtx *aat; 134 135 PetscFunctionBegin; 136 PetscCall(MatShellGetContext(A,(void*)&aat)); 137 /* PetscCall(MatMultTranspose(aat->M,x,aat->w)); */ 138 PetscCall(MatMultTranspose(aat->A,x,aat->w)); 139 PetscCall(MatMult(aat->A,aat->w,y)); 140 PetscFunctionReturn(0); 141 } 142 143 static PetscErrorCode PCH2OpusSetUpInit(PC pc) 144 { 145 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 146 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 147 PetscInt M,m; 148 VecType vtype; 149 PetscReal n; 150 AAtCtx aat; 151 152 PetscFunctionBegin; 153 aat.A = A; 154 aat.M = pch2opus->M; /* unused so far */ 155 PetscCall(MatCreateVecs(A,NULL,&aat.w)); 156 PetscCall(MatGetSize(A,&M,NULL)); 157 PetscCall(MatGetLocalSize(A,&m,NULL)); 158 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt)); 159 PetscCall(MatBindToCPU(AAt,pch2opus->boundtocpu)); 160 PetscCall(MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt)); 161 PetscCall(MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt)); 162 PetscCall(MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS)); 163 PetscCall(MatGetVecType(A,&vtype)); 164 PetscCall(MatShellSetVecType(AAt,vtype)); 165 PetscCall(MatNorm(AAt,NORM_1,&n)); 166 pch2opus->s0 = 1./n; 167 PetscCall(VecDestroy(&aat.w)); 168 PetscCall(MatDestroy(&AAt)); 169 PetscFunctionReturn(0); 170 } 171 172 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 173 { 174 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 175 176 PetscFunctionBegin; 177 if (t) PetscCall(MatMultTranspose(pch2opus->M,x,y)); 178 else PetscCall(MatMult(pch2opus->M,x,y)); 179 PetscFunctionReturn(0); 180 } 181 182 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 183 { 184 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 185 186 PetscFunctionBegin; 187 if (t) PetscCall(MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 188 else PetscCall(MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y)); 189 PetscFunctionReturn(0); 190 } 191 192 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 193 { 194 PetscFunctionBegin; 195 PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE)); 196 PetscFunctionReturn(0); 197 } 198 199 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 200 { 201 PetscFunctionBegin; 202 PetscCall(PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE)); 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 207 { 208 PetscFunctionBegin; 209 PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE)); 210 PetscFunctionReturn(0); 211 } 212 213 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 214 { 215 PetscFunctionBegin; 216 PetscCall(PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE)); 217 PetscFunctionReturn(0); 218 } 219 220 /* used to test the norm of (M^-1 A - I) */ 221 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 222 { 223 PC pc; 224 Mat A; 225 PC_H2OPUS *pch2opus; 226 PetscBool sideleft = PETSC_TRUE; 227 228 PetscFunctionBegin; 229 PetscCall(MatShellGetContext(M,(void*)&pc)); 230 pch2opus = (PC_H2OPUS*)pc->data; 231 if (!pch2opus->w) PetscCall(MatCreateVecs(pch2opus->M,&pch2opus->w,NULL)); 232 A = pch2opus->A; 233 PetscCall(VecBindToCPU(pch2opus->w,pch2opus->boundtocpu)); 234 if (t) { 235 if (sideleft) { 236 PetscCall(PCApplyTranspose_H2OPUS(pc,x,pch2opus->w)); 237 PetscCall(MatMultTranspose(A,pch2opus->w,y)); 238 } else { 239 PetscCall(MatMultTranspose(A,x,pch2opus->w)); 240 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->w,y)); 241 } 242 } else { 243 if (sideleft) { 244 PetscCall(MatMult(A,x,pch2opus->w)); 245 PetscCall(PCApply_H2OPUS(pc,pch2opus->w,y)); 246 } else { 247 PetscCall(PCApply_H2OPUS(pc,x,pch2opus->w)); 248 PetscCall(MatMult(A,pch2opus->w,y)); 249 } 250 } 251 if (!pch2opus->testMA) PetscCall(VecAXPY(y,-1.0,x)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 256 { 257 PetscFunctionBegin; 258 PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_FALSE)); 259 PetscFunctionReturn(0); 260 } 261 262 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 263 { 264 PetscFunctionBegin; 265 PetscCall(MatMultKernel_MAmI(A,x,y,PETSC_TRUE)); 266 PetscFunctionReturn(0); 267 } 268 269 /* HyperPower kernel: 270 Y = R = x 271 for i = 1 . . . l - 1 do 272 R = (I - A * Xk) * R 273 Y = Y + R 274 Y = Xk * Y 275 */ 276 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 277 { 278 PC pc; 279 Mat A; 280 PC_H2OPUS *pch2opus; 281 282 PetscFunctionBegin; 283 PetscCall(MatShellGetContext(M,(void*)&pc)); 284 pch2opus = (PC_H2OPUS*)pc->data; 285 A = pch2opus->A; 286 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 287 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3])); 288 PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu)); 289 PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu)); 290 PetscCall(VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu)); 291 PetscCall(VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu)); 292 PetscCall(VecCopy(x,pch2opus->wns[0])); 293 PetscCall(VecCopy(x,pch2opus->wns[3])); 294 if (t) { 295 for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) { 296 PetscCall(MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1])); 297 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2])); 298 PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2])); 299 PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0])); 300 } 301 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y)); 302 } else { 303 for (PetscInt i=0;i<pch2opus->hyperorder-1;i++) { 304 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1])); 305 PetscCall(MatMult(A,pch2opus->wns[1],pch2opus->wns[2])); 306 PetscCall(VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2])); 307 PetscCall(VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0])); 308 } 309 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[3],y)); 310 } 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 315 { 316 PetscFunctionBegin; 317 PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_FALSE)); 318 PetscFunctionReturn(0); 319 } 320 321 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 322 { 323 PetscFunctionBegin; 324 PetscCall(MatMultKernel_Hyper(M,x,y,PETSC_TRUE)); 325 PetscFunctionReturn(0); 326 } 327 328 /* Hyper power kernel, MatMat version */ 329 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 330 { 331 PC pc; 332 Mat A; 333 PC_H2OPUS *pch2opus; 334 PetscInt i; 335 336 PetscFunctionBegin; 337 PetscCall(MatShellGetContext(M,(void*)&pc)); 338 pch2opus = (PC_H2OPUS*)pc->data; 339 A = pch2opus->A; 340 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 341 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 342 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 343 } 344 if (!pch2opus->wnsmat[0]) { 345 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0])); 346 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1])); 347 } 348 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 349 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 350 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 351 } 352 if (!pch2opus->wnsmat[2]) { 353 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2])); 354 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3])); 355 } 356 PetscCall(MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 357 PetscCall(MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN)); 358 if (t) { 359 for (i=0;i<pch2opus->hyperorder-1;i++) { 360 PetscCall(MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1])); 361 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2])); 362 PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN)); 363 PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 364 } 365 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y)); 366 } else { 367 for (i=0;i<pch2opus->hyperorder-1;i++) { 368 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1])); 369 PetscCall(MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2])); 370 PetscCall(MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN)); 371 PetscCall(MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 372 } 373 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y)); 374 } 375 PetscFunctionReturn(0); 376 } 377 378 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx) 379 { 380 PetscFunctionBegin; 381 PetscCall(MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE)); 382 PetscFunctionReturn(0); 383 } 384 385 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 386 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 387 { 388 PC pc; 389 Mat A; 390 PC_H2OPUS *pch2opus; 391 392 PetscFunctionBegin; 393 PetscCall(MatShellGetContext(M,(void*)&pc)); 394 pch2opus = (PC_H2OPUS*)pc->data; 395 A = pch2opus->A; 396 PetscCall(MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1])); 397 PetscCall(VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu)); 398 PetscCall(VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu)); 399 if (t) { 400 PetscCall(PCApplyTranspose_H2OPUS(pc,x,y)); 401 PetscCall(MatMultTranspose(A,y,pch2opus->wns[1])); 402 PetscCall(PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0])); 403 PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[0])); 404 } else { 405 PetscCall(PCApply_H2OPUS(pc,x,y)); 406 PetscCall(MatMult(A,y,pch2opus->wns[0])); 407 PetscCall(PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1])); 408 PetscCall(VecAXPBY(y,-1.,2.,pch2opus->wns[1])); 409 } 410 PetscFunctionReturn(0); 411 } 412 413 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 414 { 415 PetscFunctionBegin; 416 PetscCall(MatMultKernel_NS(M,x,y,PETSC_FALSE)); 417 PetscFunctionReturn(0); 418 } 419 420 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 421 { 422 PetscFunctionBegin; 423 PetscCall(MatMultKernel_NS(M,x,y,PETSC_TRUE)); 424 PetscFunctionReturn(0); 425 } 426 427 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 428 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 429 { 430 PC pc; 431 Mat A; 432 PC_H2OPUS *pch2opus; 433 434 PetscFunctionBegin; 435 PetscCall(MatShellGetContext(M,(void*)&pc)); 436 pch2opus = (PC_H2OPUS*)pc->data; 437 A = pch2opus->A; 438 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 439 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 440 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 441 } 442 if (!pch2opus->wnsmat[0]) { 443 PetscCall(MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0])); 444 PetscCall(MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1])); 445 } 446 if (t) { 447 PetscCall(PCApplyTransposeMat_H2OPUS(pc,X,Y)); 448 PetscCall(MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1])); 449 PetscCall(PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0])); 450 PetscCall(MatScale(Y,2.)); 451 PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN)); 452 } else { 453 PetscCall(PCApplyMat_H2OPUS(pc,X,Y)); 454 PetscCall(MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0])); 455 PetscCall(PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1])); 456 PetscCall(MatScale(Y,2.)); 457 PetscCall(MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN)); 458 } 459 PetscFunctionReturn(0); 460 } 461 462 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) 463 { 464 PetscFunctionBegin; 465 PetscCall(MatMatMultKernel_NS(M,X,Y,PETSC_FALSE)); 466 PetscFunctionReturn(0); 467 } 468 469 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 470 { 471 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 472 Mat A = pc->useAmat ? pc->mat : pc->pmat; 473 474 PetscFunctionBegin; 475 if (!pch2opus->S) { 476 PetscInt M,N,m,n; 477 478 PetscCall(MatGetSize(A,&M,&N)); 479 PetscCall(MatGetLocalSize(A,&m,&n)); 480 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S)); 481 PetscCall(MatSetBlockSizesFromMats(pch2opus->S,A,A)); 482 #if defined(PETSC_H2OPUS_USE_GPU) 483 PetscCall(MatShellSetVecType(pch2opus->S,VECCUDA)); 484 #endif 485 } 486 if (pch2opus->hyperorder >= 2) { 487 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper)); 488 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper)); 489 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE)); 490 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA)); 491 } else { 492 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS)); 493 PetscCall(MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS)); 494 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE)); 495 PetscCall(MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA)); 496 } 497 PetscCall(MatPropagateSymmetryOptions(A,pch2opus->S)); 498 PetscCall(MatBindToCPU(pch2opus->S,pch2opus->boundtocpu)); 499 /* XXX */ 500 PetscCall(MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE)); 501 PetscFunctionReturn(0); 502 } 503 504 static PetscErrorCode PCSetUp_H2OPUS(PC pc) 505 { 506 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 507 Mat A = pc->useAmat ? pc->mat : pc->pmat; 508 NormType norm = pch2opus->normtype; 509 PetscReal initerr = 0.0,err; 510 PetscReal initerrMA = 0.0,errMA; 511 PetscBool ish2opus; 512 513 PetscFunctionBegin; 514 if (!pch2opus->T) { 515 PetscInt M,N,m,n; 516 const char *prefix; 517 518 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 519 PetscCall(MatGetSize(A,&M,&N)); 520 PetscCall(MatGetLocalSize(A,&m,&n)); 521 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T)); 522 PetscCall(MatSetBlockSizesFromMats(pch2opus->T,A,A)); 523 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI)); 524 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI)); 525 PetscCall(MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS)); 526 #if defined(PETSC_H2OPUS_USE_GPU) 527 PetscCall(MatShellSetVecType(pch2opus->T,VECCUDA)); 528 #endif 529 PetscCall(PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T)); 530 PetscCall(MatSetOptionsPrefix(pch2opus->T,prefix)); 531 PetscCall(MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_")); 532 } 533 PetscCall(MatDestroy(&pch2opus->A)); 534 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus)); 535 if (ish2opus) { 536 PetscCall(PetscObjectReference((PetscObject)A)); 537 pch2opus->A = A; 538 } else { 539 const char *prefix; 540 PetscCall(MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A)); 541 /* XXX */ 542 PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE)); 543 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 544 PetscCall(MatSetOptionsPrefix(pch2opus->A,prefix)); 545 PetscCall(MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_")); 546 PetscCall(MatSetFromOptions(pch2opus->A)); 547 PetscCall(MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY)); 548 PetscCall(MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY)); 549 /* XXX */ 550 PetscCall(MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE)); 551 } 552 #if defined(PETSC_H2OPUS_USE_GPU) 553 pch2opus->boundtocpu = pch2opus->A->boundtocpu; 554 #endif 555 PetscCall(MatBindToCPU(pch2opus->T,pch2opus->boundtocpu)); 556 if (pch2opus->M) { /* see if we can reuse M as initial guess */ 557 PetscReal reuse; 558 559 PetscCall(MatBindToCPU(pch2opus->M,pch2opus->boundtocpu)); 560 PetscCall(MatNorm(pch2opus->T,norm,&reuse)); 561 if (reuse >= 1.0) PetscCall(MatDestroy(&pch2opus->M)); 562 } 563 if (!pch2opus->M) { 564 const char *prefix; 565 PetscCall(MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M)); 566 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 567 PetscCall(MatSetOptionsPrefix(pch2opus->M,prefix)); 568 PetscCall(MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_")); 569 PetscCall(MatSetFromOptions(pch2opus->M)); 570 PetscCall(PCH2OpusSetUpInit(pc)); 571 PetscCall(MatScale(pch2opus->M,pch2opus->s0)); 572 } 573 /* A and M have the same h2 matrix structure, save on reordering routines */ 574 PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE)); 575 PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE)); 576 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 577 PetscCall(MatNorm(pch2opus->T,norm,&initerr)); 578 pch2opus->testMA = PETSC_TRUE; 579 PetscCall(MatNorm(pch2opus->T,norm,&initerrMA)); 580 pch2opus->testMA = PETSC_FALSE; 581 } 582 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 583 err = initerr; 584 errMA = initerrMA; 585 if (pch2opus->monitor) { 586 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr))); 587 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA))); 588 } 589 if (initerr > pch2opus->atol && !pc->failedreason) { 590 PetscInt i; 591 592 PetscCall(PCH2OpusSetUpSampler_Private(pc)); 593 for (i = 0; i < pch2opus->maxits; i++) { 594 Mat M; 595 const char* prefix; 596 597 PetscCall(MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M)); 598 PetscCall(MatGetOptionsPrefix(pch2opus->M,&prefix)); 599 PetscCall(MatSetOptionsPrefix(M,prefix)); 600 PetscCall(MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE)); 601 PetscCall(MatSetFromOptions(M)); 602 PetscCall(MatH2OpusSetNativeMult(M,PETSC_TRUE)); 603 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 604 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 605 606 PetscCall(MatDestroy(&pch2opus->M)); 607 pch2opus->M = M; 608 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 609 PetscCall(MatNorm(pch2opus->T,norm,&err)); 610 pch2opus->testMA = PETSC_TRUE; 611 PetscCall(MatNorm(pch2opus->T,norm,&errMA)); 612 pch2opus->testMA = PETSC_FALSE; 613 } 614 if (pch2opus->monitor) { 615 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr))); 616 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA))); 617 } 618 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 619 if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break; 620 } 621 } 622 /* cleanup setup workspace */ 623 PetscCall(MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE)); 624 PetscCall(MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE)); 625 PetscCall(VecDestroy(&pch2opus->wns[0])); 626 PetscCall(VecDestroy(&pch2opus->wns[1])); 627 PetscCall(VecDestroy(&pch2opus->wns[2])); 628 PetscCall(VecDestroy(&pch2opus->wns[3])); 629 PetscCall(MatDestroy(&pch2opus->wnsmat[0])); 630 PetscCall(MatDestroy(&pch2opus->wnsmat[1])); 631 PetscCall(MatDestroy(&pch2opus->wnsmat[2])); 632 PetscCall(MatDestroy(&pch2opus->wnsmat[3])); 633 PetscFunctionReturn(0); 634 } 635 636 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 637 { 638 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 639 PetscBool isascii; 640 641 PetscFunctionBegin; 642 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 643 if (isascii) { 644 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 645 PetscCall(PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n")); 646 PetscCall(PetscViewerASCIIPushTab(viewer)); 647 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL)); 648 PetscCall(MatView(pch2opus->A,viewer)); 649 PetscCall(PetscViewerPopFormat(viewer)); 650 PetscCall(PetscViewerASCIIPopTab(viewer)); 651 } 652 if (pch2opus->M) { 653 PetscCall(PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n")); 654 PetscCall(PetscViewerASCIIPushTab(viewer)); 655 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL)); 656 PetscCall(MatView(pch2opus->M,viewer)); 657 PetscCall(PetscViewerPopFormat(viewer)); 658 PetscCall(PetscViewerASCIIPopTab(viewer)); 659 } 660 PetscCall(PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0)); 661 } 662 PetscFunctionReturn(0); 663 } 664 665 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 666 { 667 PC_H2OPUS *pch2opus; 668 669 PetscFunctionBegin; 670 PetscCall(PetscNewLog(pc,&pch2opus)); 671 pc->data = (void*)pch2opus; 672 673 pch2opus->atol = 1.e-2; 674 pch2opus->rtol = 1.e-6; 675 pch2opus->maxits = 50; 676 pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */ 677 pch2opus->normtype = NORM_2; 678 679 /* these are needed when we are sampling the pmat */ 680 pch2opus->eta = PETSC_DECIDE; 681 pch2opus->leafsize = PETSC_DECIDE; 682 pch2opus->max_rank = PETSC_DECIDE; 683 pch2opus->bs = PETSC_DECIDE; 684 pch2opus->mrtol = PETSC_DECIDE; 685 #if defined(PETSC_H2OPUS_USE_GPU) 686 pch2opus->boundtocpu = PETSC_FALSE; 687 #else 688 pch2opus->boundtocpu = PETSC_TRUE; 689 #endif 690 pc->ops->destroy = PCDestroy_H2OPUS; 691 pc->ops->setup = PCSetUp_H2OPUS; 692 pc->ops->apply = PCApply_H2OPUS; 693 pc->ops->matapply = PCApplyMat_H2OPUS; 694 pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 695 pc->ops->reset = PCReset_H2OPUS; 696 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 697 pc->ops->view = PCView_H2OPUS; 698 699 PetscCall(PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS)); 700 PetscFunctionReturn(0); 701 } 702