1 #include <petsc/private/pcimpl.h> 2 #include <petsc/private/matimpl.h> 3 #include <h2opusconf.h> 4 5 /* Use GPU only if H2OPUS is configured for GPU */ 6 #if defined(PETSC_HAVE_CUDA) && defined(H2OPUS_USE_GPU) 7 #define PETSC_H2OPUS_USE_GPU 8 #endif 9 10 typedef struct { 11 Mat A; 12 Mat M; 13 PetscScalar s0; 14 15 /* sampler for Newton-Schultz */ 16 Mat S; 17 PetscInt hyperorder; 18 Vec wns[4]; 19 Mat wnsmat[4]; 20 21 /* convergence testing */ 22 Mat T; 23 Vec w; 24 PetscBool testMA; 25 26 /* Support for PCSetCoordinates */ 27 PetscInt sdim; 28 PetscInt nlocc; 29 PetscReal *coords; 30 31 /* Newton-Schultz customization */ 32 PetscInt maxits; 33 PetscReal rtol,atol; 34 PetscBool monitor; 35 PetscBool useapproximatenorms; 36 NormType normtype; 37 38 /* Used when pmat != MATH2OPUS */ 39 PetscReal eta; 40 PetscInt leafsize; 41 PetscInt max_rank; 42 PetscInt bs; 43 PetscReal mrtol; 44 45 PetscBool boundtocpu; 46 } PC_H2OPUS; 47 48 PETSC_EXTERN PetscErrorCode MatNorm_H2OPUS(Mat,NormType,PetscReal*); 49 50 static PetscErrorCode PCReset_H2OPUS(PC pc) 51 { 52 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 53 PetscErrorCode ierr; 54 55 PetscFunctionBegin; 56 pch2opus->sdim = 0; 57 pch2opus->nlocc = 0; 58 ierr = PetscFree(pch2opus->coords);CHKERRQ(ierr); 59 ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr); 60 ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 61 ierr = MatDestroy(&pch2opus->T);CHKERRQ(ierr); 62 ierr = VecDestroy(&pch2opus->w);CHKERRQ(ierr); 63 ierr = MatDestroy(&pch2opus->S);CHKERRQ(ierr); 64 ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr); 65 ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr); 66 ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr); 67 ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr); 68 ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 69 ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 70 ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 71 ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 static PetscErrorCode PCSetCoordinates_H2OPUS(PC pc, PetscInt sdim, PetscInt nlocc, PetscReal *coords) 76 { 77 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 78 PetscBool reset = PETSC_TRUE; 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 if (pch2opus->sdim && sdim == pch2opus->sdim && nlocc == pch2opus->nlocc) { 83 ierr = PetscArraycmp(pch2opus->coords,coords,sdim*nlocc,&reset);CHKERRQ(ierr); 84 reset = (PetscBool)!reset; 85 } 86 ierr = MPIU_Allreduce(MPI_IN_PLACE,&reset,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRMPI(ierr); 87 if (reset) { 88 ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr); 89 ierr = PetscMalloc1(sdim*nlocc,&pch2opus->coords);CHKERRQ(ierr); 90 ierr = PetscArraycpy(pch2opus->coords,coords,sdim*nlocc);CHKERRQ(ierr); 91 pch2opus->sdim = sdim; 92 pch2opus->nlocc = nlocc; 93 } 94 PetscFunctionReturn(0); 95 } 96 97 static PetscErrorCode PCDestroy_H2OPUS(PC pc) 98 { 99 PetscErrorCode ierr; 100 101 PetscFunctionBegin; 102 ierr = PCReset_H2OPUS(pc);CHKERRQ(ierr); 103 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",NULL);CHKERRQ(ierr); 104 ierr = PetscFree(pc->data);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 static PetscErrorCode PCSetFromOptions_H2OPUS(PetscOptionItems *PetscOptionsObject,PC pc) 109 { 110 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = PetscOptionsHead(PetscOptionsObject,"H2OPUS options");CHKERRQ(ierr); 115 ierr = PetscOptionsInt("-pc_h2opus_maxits","Maximum number of iterations for Newton-Schultz",NULL,pch2opus->maxits,&pch2opus->maxits,NULL);CHKERRQ(ierr); 116 ierr = PetscOptionsBool("-pc_h2opus_monitor","Monitor Newton-Schultz convergence",NULL,pch2opus->monitor,&pch2opus->monitor,NULL);CHKERRQ(ierr); 117 ierr = PetscOptionsReal("-pc_h2opus_atol","Absolute tolerance",NULL,pch2opus->atol,&pch2opus->atol,NULL);CHKERRQ(ierr); 118 ierr = PetscOptionsReal("-pc_h2opus_rtol","Relative tolerance",NULL,pch2opus->rtol,&pch2opus->rtol,NULL);CHKERRQ(ierr); 119 ierr = PetscOptionsEnum("-pc_h2opus_norm_type","Norm type for convergence monitoring",NULL,NormTypes,(PetscEnum)pch2opus->normtype,(PetscEnum*)&pch2opus->normtype,NULL);CHKERRQ(ierr); 120 ierr = PetscOptionsInt("-pc_h2opus_hyperorder","Hyper power order of sampling",NULL,pch2opus->hyperorder,&pch2opus->hyperorder,NULL);CHKERRQ(ierr); 121 ierr = PetscOptionsInt("-pc_h2opus_leafsize","Leaf size when constructed from kernel",NULL,pch2opus->leafsize,&pch2opus->leafsize,NULL);CHKERRQ(ierr); 122 ierr = PetscOptionsReal("-pc_h2opus_eta","Admissibility condition tolerance",NULL,pch2opus->eta,&pch2opus->eta,NULL);CHKERRQ(ierr); 123 ierr = PetscOptionsInt("-pc_h2opus_maxrank","Maximum rank when constructed from matvecs",NULL,pch2opus->max_rank,&pch2opus->max_rank,NULL);CHKERRQ(ierr); 124 ierr = PetscOptionsInt("-pc_h2opus_samples","Number of samples to be taken concurrently when constructing from matvecs",NULL,pch2opus->bs,&pch2opus->bs,NULL);CHKERRQ(ierr); 125 ierr = PetscOptionsReal("-pc_h2opus_mrtol","Relative tolerance for construction from sampling",NULL,pch2opus->mrtol,&pch2opus->mrtol,NULL);CHKERRQ(ierr); 126 ierr = PetscOptionsTail();CHKERRQ(ierr); 127 PetscFunctionReturn(0); 128 } 129 130 typedef struct { 131 Mat A; 132 Mat M; 133 Vec w; 134 } AAtCtx; 135 136 static PetscErrorCode MatMult_AAt(Mat A, Vec x, Vec y) 137 { 138 AAtCtx *aat; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 ierr = MatShellGetContext(A,(void*)&aat);CHKERRQ(ierr); 143 /* ierr = MatMultTranspose(aat->M,x,aat->w);CHKERRQ(ierr); */ 144 ierr = MatMultTranspose(aat->A,x,aat->w);CHKERRQ(ierr); 145 ierr = MatMult(aat->A,aat->w,y);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 static PetscErrorCode PCH2OpusSetUpInit(PC pc) 150 { 151 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 152 Mat A = pc->useAmat ? pc->mat : pc->pmat, AAt; 153 PetscInt M,m; 154 VecType vtype; 155 PetscReal n; 156 AAtCtx aat; 157 PetscErrorCode ierr; 158 159 PetscFunctionBegin; 160 aat.A = A; 161 aat.M = pch2opus->M; /* unused so far */ 162 ierr = MatCreateVecs(A,NULL,&aat.w);CHKERRQ(ierr); 163 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr); 164 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 165 ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,m,M,M,&aat,&AAt);CHKERRQ(ierr); 166 ierr = MatBindToCPU(AAt,pch2opus->boundtocpu);CHKERRQ(ierr); 167 ierr = MatShellSetOperation(AAt,MATOP_MULT,(void (*)(void))MatMult_AAt);CHKERRQ(ierr); 168 ierr = MatShellSetOperation(AAt,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMult_AAt);CHKERRQ(ierr); 169 ierr = MatShellSetOperation(AAt,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr); 170 ierr = MatGetVecType(A,&vtype);CHKERRQ(ierr); 171 ierr = MatShellSetVecType(AAt,vtype);CHKERRQ(ierr); 172 ierr = MatNorm(AAt,NORM_1,&n);CHKERRQ(ierr); 173 pch2opus->s0 = 1./n; 174 ierr = VecDestroy(&aat.w);CHKERRQ(ierr); 175 ierr = MatDestroy(&AAt);CHKERRQ(ierr); 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode PCApplyKernel_H2OPUS(PC pc, Vec x, Vec y, PetscBool t) 180 { 181 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 182 PetscErrorCode ierr; 183 184 PetscFunctionBegin; 185 if (t) { 186 ierr = MatMultTranspose(pch2opus->M,x,y);CHKERRQ(ierr); 187 } else { 188 ierr = MatMult(pch2opus->M,x,y);CHKERRQ(ierr); 189 } 190 PetscFunctionReturn(0); 191 } 192 193 static PetscErrorCode PCApplyMatKernel_H2OPUS(PC pc, Mat X, Mat Y, PetscBool t) 194 { 195 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 196 PetscErrorCode ierr; 197 198 PetscFunctionBegin; 199 if (t) { 200 ierr = MatTransposeMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 201 } else { 202 ierr = MatMatMult(pch2opus->M,X,MAT_REUSE_MATRIX,PETSC_DEFAULT,&Y);CHKERRQ(ierr); 203 } 204 PetscFunctionReturn(0); 205 } 206 207 static PetscErrorCode PCApplyMat_H2OPUS(PC pc, Mat X, Mat Y) 208 { 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_FALSE);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 static PetscErrorCode PCApplyTransposeMat_H2OPUS(PC pc, Mat X, Mat Y) 217 { 218 PetscErrorCode ierr; 219 220 PetscFunctionBegin; 221 ierr = PCApplyMatKernel_H2OPUS(pc,X,Y,PETSC_TRUE);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 static PetscErrorCode PCApply_H2OPUS(PC pc, Vec x, Vec y) 226 { 227 PetscErrorCode ierr; 228 229 PetscFunctionBegin; 230 ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_FALSE);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static PetscErrorCode PCApplyTranspose_H2OPUS(PC pc, Vec x, Vec y) 235 { 236 PetscErrorCode ierr; 237 238 PetscFunctionBegin; 239 ierr = PCApplyKernel_H2OPUS(pc,x,y,PETSC_TRUE);CHKERRQ(ierr); 240 PetscFunctionReturn(0); 241 } 242 243 /* used to test the norm of (M^-1 A - I) */ 244 static PetscErrorCode MatMultKernel_MAmI(Mat M, Vec x, Vec y, PetscBool t) 245 { 246 PC pc; 247 Mat A; 248 PC_H2OPUS *pch2opus; 249 PetscErrorCode ierr; 250 PetscBool sideleft = PETSC_TRUE; 251 252 PetscFunctionBegin; 253 ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 254 pch2opus = (PC_H2OPUS*)pc->data; 255 if (!pch2opus->w) { 256 ierr = MatCreateVecs(pch2opus->M,&pch2opus->w,NULL);CHKERRQ(ierr); 257 } 258 A = pch2opus->A; 259 ierr = VecBindToCPU(pch2opus->w,pch2opus->boundtocpu);CHKERRQ(ierr); 260 if (t) { 261 if (sideleft) { 262 ierr = PCApplyTranspose_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr); 263 ierr = MatMultTranspose(A,pch2opus->w,y);CHKERRQ(ierr); 264 } else { 265 ierr = MatMultTranspose(A,x,pch2opus->w);CHKERRQ(ierr); 266 ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr); 267 } 268 } else { 269 if (sideleft) { 270 ierr = MatMult(A,x,pch2opus->w);CHKERRQ(ierr); 271 ierr = PCApply_H2OPUS(pc,pch2opus->w,y);CHKERRQ(ierr); 272 } else { 273 ierr = PCApply_H2OPUS(pc,x,pch2opus->w);CHKERRQ(ierr); 274 ierr = MatMult(A,pch2opus->w,y);CHKERRQ(ierr); 275 } 276 } 277 if (!pch2opus->testMA) { 278 ierr = VecAXPY(y,-1.0,x);CHKERRQ(ierr); 279 } 280 PetscFunctionReturn(0); 281 } 282 283 static PetscErrorCode MatMult_MAmI(Mat A, Vec x, Vec y) 284 { 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = MatMultKernel_MAmI(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 static PetscErrorCode MatMultTranspose_MAmI(Mat A, Vec x, Vec y) 293 { 294 PetscErrorCode ierr; 295 296 PetscFunctionBegin; 297 ierr = MatMultKernel_MAmI(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 298 PetscFunctionReturn(0); 299 } 300 301 /* HyperPower kernel: 302 Y = R = x 303 for i = 1 . . . l - 1 do 304 R = (I - A * Xk) * R 305 Y = Y + R 306 Y = Xk * Y 307 */ 308 static PetscErrorCode MatMultKernel_Hyper(Mat M, Vec x, Vec y, PetscBool t) 309 { 310 PC pc; 311 Mat A; 312 PC_H2OPUS *pch2opus; 313 PetscInt i; 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 318 pch2opus = (PC_H2OPUS*)pc->data; 319 A = pch2opus->A; 320 ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr); 321 ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[2] ? NULL : &pch2opus->wns[2],pch2opus->wns[3] ? NULL : &pch2opus->wns[3]);CHKERRQ(ierr); 322 ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr); 323 ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr); 324 ierr = VecBindToCPU(pch2opus->wns[2],pch2opus->boundtocpu);CHKERRQ(ierr); 325 ierr = VecBindToCPU(pch2opus->wns[3],pch2opus->boundtocpu);CHKERRQ(ierr); 326 ierr = VecCopy(x,pch2opus->wns[0]);CHKERRQ(ierr); 327 ierr = VecCopy(x,pch2opus->wns[3]);CHKERRQ(ierr); 328 if (t) { 329 for (i=0;i<pch2opus->hyperorder-1;i++) { 330 ierr = MatMultTranspose(A,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 331 ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr); 332 ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr); 333 ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr); 334 } 335 ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr); 336 } else { 337 for (i=0;i<pch2opus->hyperorder-1;i++) { 338 ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 339 ierr = MatMult(A,pch2opus->wns[1],pch2opus->wns[2]);CHKERRQ(ierr); 340 ierr = VecAXPY(pch2opus->wns[0],-1.,pch2opus->wns[2]);CHKERRQ(ierr); 341 ierr = VecAXPY(pch2opus->wns[3], 1.,pch2opus->wns[0]);CHKERRQ(ierr); 342 } 343 ierr = PCApply_H2OPUS(pc,pch2opus->wns[3],y);CHKERRQ(ierr); 344 } 345 PetscFunctionReturn(0); 346 } 347 348 static PetscErrorCode MatMult_Hyper(Mat M, Vec x, Vec y) 349 { 350 PetscErrorCode ierr; 351 352 PetscFunctionBegin; 353 ierr = MatMultKernel_Hyper(M,x,y,PETSC_FALSE);CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 static PetscErrorCode MatMultTranspose_Hyper(Mat M, Vec x, Vec y) 358 { 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 ierr = MatMultKernel_Hyper(M,x,y,PETSC_TRUE);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 /* Hyper power kernel, MatMat version */ 367 static PetscErrorCode MatMatMultKernel_Hyper(Mat M, Mat X, Mat Y, PetscBool t) 368 { 369 PC pc; 370 Mat A; 371 PC_H2OPUS *pch2opus; 372 PetscInt i; 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 377 pch2opus = (PC_H2OPUS*)pc->data; 378 A = pch2opus->A; 379 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 380 ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 381 ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 382 } 383 if (!pch2opus->wnsmat[0]) { 384 ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 385 ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 386 } 387 if (pch2opus->wnsmat[2] && pch2opus->wnsmat[2]->cmap->N != X->cmap->N) { 388 ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 389 ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 390 } 391 if (!pch2opus->wnsmat[2]) { 392 ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[2]);CHKERRQ(ierr); 393 ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[3]);CHKERRQ(ierr); 394 } 395 ierr = MatCopy(X,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 396 ierr = MatCopy(X,pch2opus->wnsmat[3],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 397 if (t) { 398 for (i=0;i<pch2opus->hyperorder-1;i++) { 399 ierr = MatTransposeMatMult(A,pch2opus->wnsmat[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 400 ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[2]);CHKERRQ(ierr); 401 ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 402 ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 403 } 404 ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr); 405 } else { 406 for (i=0;i<pch2opus->hyperorder-1;i++) { 407 ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr); 408 ierr = MatMatMult(A,pch2opus->wnsmat[1],MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[2]);CHKERRQ(ierr); 409 ierr = MatAXPY(pch2opus->wnsmat[0],-1.,pch2opus->wnsmat[2],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 410 ierr = MatAXPY(pch2opus->wnsmat[3],1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 411 } 412 ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[3],Y);CHKERRQ(ierr); 413 } 414 PetscFunctionReturn(0); 415 } 416 417 static PetscErrorCode MatMatMultNumeric_Hyper(Mat M, Mat X, Mat Y,void *ctx) 418 { 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = MatMatMultKernel_Hyper(M,X,Y,PETSC_FALSE);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M */ 427 static PetscErrorCode MatMultKernel_NS(Mat M, Vec x, Vec y, PetscBool t) 428 { 429 PC pc; 430 Mat A; 431 PC_H2OPUS *pch2opus; 432 PetscErrorCode ierr; 433 434 PetscFunctionBegin; 435 ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 436 pch2opus = (PC_H2OPUS*)pc->data; 437 A = pch2opus->A; 438 ierr = MatCreateVecs(pch2opus->M,pch2opus->wns[0] ? NULL : &pch2opus->wns[0],pch2opus->wns[1] ? NULL : &pch2opus->wns[1]);CHKERRQ(ierr); 439 ierr = VecBindToCPU(pch2opus->wns[0],pch2opus->boundtocpu);CHKERRQ(ierr); 440 ierr = VecBindToCPU(pch2opus->wns[1],pch2opus->boundtocpu);CHKERRQ(ierr); 441 if (t) { 442 ierr = PCApplyTranspose_H2OPUS(pc,x,y);CHKERRQ(ierr); 443 ierr = MatMultTranspose(A,y,pch2opus->wns[1]);CHKERRQ(ierr); 444 ierr = PCApplyTranspose_H2OPUS(pc,pch2opus->wns[1],pch2opus->wns[0]);CHKERRQ(ierr); 445 ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[0]);CHKERRQ(ierr); 446 } else { 447 ierr = PCApply_H2OPUS(pc,x,y);CHKERRQ(ierr); 448 ierr = MatMult(A,y,pch2opus->wns[0]);CHKERRQ(ierr); 449 ierr = PCApply_H2OPUS(pc,pch2opus->wns[0],pch2opus->wns[1]);CHKERRQ(ierr); 450 ierr = VecAXPBY(y,-1.,2.,pch2opus->wns[1]);CHKERRQ(ierr); 451 } 452 PetscFunctionReturn(0); 453 } 454 455 static PetscErrorCode MatMult_NS(Mat M, Vec x, Vec y) 456 { 457 PetscErrorCode ierr; 458 459 PetscFunctionBegin; 460 ierr = MatMultKernel_NS(M,x,y,PETSC_FALSE);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 static PetscErrorCode MatMultTranspose_NS(Mat M, Vec x, Vec y) 465 { 466 PetscErrorCode ierr; 467 468 PetscFunctionBegin; 469 ierr = MatMultKernel_NS(M,x,y,PETSC_TRUE);CHKERRQ(ierr); 470 PetscFunctionReturn(0); 471 } 472 473 /* Basic Newton-Schultz sampler: (2 * I - M * A)*M, MatMat version */ 474 static PetscErrorCode MatMatMultKernel_NS(Mat M, Mat X, Mat Y, PetscBool t) 475 { 476 PC pc; 477 Mat A; 478 PC_H2OPUS *pch2opus; 479 PetscErrorCode ierr; 480 481 PetscFunctionBegin; 482 ierr = MatShellGetContext(M,(void*)&pc);CHKERRQ(ierr); 483 pch2opus = (PC_H2OPUS*)pc->data; 484 A = pch2opus->A; 485 if (pch2opus->wnsmat[0] && pch2opus->wnsmat[0]->cmap->N != X->cmap->N) { 486 ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 487 ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 488 } 489 if (!pch2opus->wnsmat[0]) { 490 ierr = MatDuplicate(X,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 491 ierr = MatDuplicate(Y,MAT_SHARE_NONZERO_PATTERN,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 492 } 493 if (t) { 494 ierr = PCApplyTransposeMat_H2OPUS(pc,X,Y);CHKERRQ(ierr); 495 ierr = MatTransposeMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[1]);CHKERRQ(ierr); 496 ierr = PCApplyTransposeMat_H2OPUS(pc,pch2opus->wnsmat[1],pch2opus->wnsmat[0]);CHKERRQ(ierr); 497 ierr = MatScale(Y,2.);CHKERRQ(ierr); 498 ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[0],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 499 } else { 500 ierr = PCApplyMat_H2OPUS(pc,X,Y);CHKERRQ(ierr); 501 ierr = MatMatMult(A,Y,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pch2opus->wnsmat[0]);CHKERRQ(ierr); 502 ierr = PCApplyMat_H2OPUS(pc,pch2opus->wnsmat[0],pch2opus->wnsmat[1]);CHKERRQ(ierr); 503 ierr = MatScale(Y,2.);CHKERRQ(ierr); 504 ierr = MatAXPY(Y,-1.,pch2opus->wnsmat[1],SAME_NONZERO_PATTERN);CHKERRQ(ierr); 505 } 506 PetscFunctionReturn(0); 507 } 508 509 static PetscErrorCode MatMatMultNumeric_NS(Mat M, Mat X, Mat Y, void *ctx) 510 { 511 PetscErrorCode ierr; 512 513 PetscFunctionBegin; 514 ierr = MatMatMultKernel_NS(M,X,Y,PETSC_FALSE);CHKERRQ(ierr); 515 PetscFunctionReturn(0); 516 } 517 518 static PetscErrorCode PCH2OpusSetUpSampler_Private(PC pc) 519 { 520 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 521 Mat A = pc->useAmat ? pc->mat : pc->pmat; 522 PetscErrorCode ierr; 523 524 PetscFunctionBegin; 525 if (!pch2opus->S) { 526 PetscInt M,N,m,n; 527 528 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 529 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 530 ierr = MatCreateShell(PetscObjectComm((PetscObject)A),m,n,M,N,pc,&pch2opus->S);CHKERRQ(ierr); 531 ierr = MatSetBlockSizesFromMats(pch2opus->S,A,A);CHKERRQ(ierr); 532 #if defined(PETSC_H2OPUS_USE_GPU) 533 ierr = MatShellSetVecType(pch2opus->S,VECCUDA);CHKERRQ(ierr); 534 #endif 535 } 536 if (pch2opus->hyperorder >= 2) { 537 ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_Hyper);CHKERRQ(ierr); 538 ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_Hyper);CHKERRQ(ierr); 539 ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr); 540 ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_Hyper,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr); 541 } else { 542 ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT,(void (*)(void))MatMult_NS);CHKERRQ(ierr); 543 ierr = MatShellSetOperation(pch2opus->S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_NS);CHKERRQ(ierr); 544 ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSE,MATDENSE);CHKERRQ(ierr); 545 ierr = MatShellSetMatProductOperation(pch2opus->S,MATPRODUCT_AB,NULL,MatMatMultNumeric_NS,NULL,MATDENSECUDA,MATDENSECUDA);CHKERRQ(ierr); 546 } 547 ierr = MatPropagateSymmetryOptions(A,pch2opus->S);CHKERRQ(ierr); 548 ierr = MatBindToCPU(pch2opus->S,pch2opus->boundtocpu);CHKERRQ(ierr); 549 /* XXX */ 550 ierr = MatSetOption(pch2opus->S,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 551 PetscFunctionReturn(0); 552 } 553 554 static PetscErrorCode PCSetUp_H2OPUS(PC pc) 555 { 556 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 557 Mat A = pc->useAmat ? pc->mat : pc->pmat; 558 NormType norm = pch2opus->normtype; 559 PetscReal initerr = 0.0,err; 560 PetscReal initerrMA = 0.0,errMA; 561 PetscBool ish2opus; 562 PetscErrorCode ierr; 563 564 PetscFunctionBegin; 565 if (!pch2opus->T) { 566 PetscInt M,N,m,n; 567 const char *prefix; 568 569 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 570 ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr); 571 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 572 ierr = MatCreateShell(PetscObjectComm((PetscObject)pc->pmat),m,n,M,N,pc,&pch2opus->T);CHKERRQ(ierr); 573 ierr = MatSetBlockSizesFromMats(pch2opus->T,A,A);CHKERRQ(ierr); 574 ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT,(void (*)(void))MatMult_MAmI);CHKERRQ(ierr); 575 ierr = MatShellSetOperation(pch2opus->T,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_MAmI);CHKERRQ(ierr); 576 ierr = MatShellSetOperation(pch2opus->T,MATOP_NORM,(void (*)(void))MatNorm_H2OPUS);CHKERRQ(ierr); 577 #if defined(PETSC_H2OPUS_USE_GPU) 578 ierr = MatShellSetVecType(pch2opus->T,VECCUDA);CHKERRQ(ierr); 579 #endif 580 ierr = PetscLogObjectParent((PetscObject)pc,(PetscObject)pch2opus->T);CHKERRQ(ierr); 581 ierr = MatSetOptionsPrefix(pch2opus->T,prefix);CHKERRQ(ierr); 582 ierr = MatAppendOptionsPrefix(pch2opus->T,"pc_h2opus_est_");CHKERRQ(ierr); 583 } 584 ierr = MatDestroy(&pch2opus->A);CHKERRQ(ierr); 585 ierr = PetscObjectTypeCompare((PetscObject)A,MATH2OPUS,&ish2opus);CHKERRQ(ierr); 586 if (ish2opus) { 587 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 588 pch2opus->A = A; 589 } else { 590 const char *prefix; 591 ierr = MatCreateH2OpusFromMat(A,pch2opus->sdim,pch2opus->coords,PETSC_FALSE,pch2opus->eta,pch2opus->leafsize,pch2opus->max_rank,pch2opus->bs,pch2opus->mrtol,&pch2opus->A);CHKERRQ(ierr); 592 /* XXX */ 593 ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 594 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 595 ierr = MatSetOptionsPrefix(pch2opus->A,prefix);CHKERRQ(ierr); 596 ierr = MatAppendOptionsPrefix(pch2opus->A,"pc_h2opus_init_");CHKERRQ(ierr); 597 ierr = MatSetFromOptions(pch2opus->A);CHKERRQ(ierr); 598 ierr = MatAssemblyBegin(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 599 ierr = MatAssemblyEnd(pch2opus->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 600 /* XXX */ 601 ierr = MatSetOption(pch2opus->A,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 602 } 603 #if defined(PETSC_H2OPUS_USE_GPU) 604 pch2opus->boundtocpu = pch2opus->A->boundtocpu; 605 #endif 606 ierr = MatBindToCPU(pch2opus->T,pch2opus->boundtocpu);CHKERRQ(ierr); 607 if (pch2opus->M) { /* see if we can reuse M as initial guess */ 608 PetscReal reuse; 609 610 ierr = MatBindToCPU(pch2opus->M,pch2opus->boundtocpu);CHKERRQ(ierr); 611 ierr = MatNorm(pch2opus->T,norm,&reuse);CHKERRQ(ierr); 612 if (reuse >= 1.0) { 613 ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 614 } 615 } 616 if (!pch2opus->M) { 617 const char *prefix; 618 ierr = MatDuplicate(pch2opus->A,MAT_COPY_VALUES,&pch2opus->M);CHKERRQ(ierr); 619 ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr); 620 ierr = MatSetOptionsPrefix(pch2opus->M,prefix);CHKERRQ(ierr); 621 ierr = MatAppendOptionsPrefix(pch2opus->M,"pc_h2opus_inv_");CHKERRQ(ierr); 622 ierr = MatSetFromOptions(pch2opus->M);CHKERRQ(ierr); 623 ierr = PCH2OpusSetUpInit(pc);CHKERRQ(ierr); 624 ierr = MatScale(pch2opus->M,pch2opus->s0);CHKERRQ(ierr); 625 } 626 /* A and M have the same h2 matrix structure, save on reordering routines */ 627 ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_TRUE);CHKERRQ(ierr); 628 ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_TRUE);CHKERRQ(ierr); 629 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 630 ierr = MatNorm(pch2opus->T,norm,&initerr);CHKERRQ(ierr); 631 pch2opus->testMA = PETSC_TRUE; 632 ierr = MatNorm(pch2opus->T,norm,&initerrMA);CHKERRQ(ierr); 633 pch2opus->testMA = PETSC_FALSE; 634 } 635 if (PetscIsInfOrNanReal(initerr)) pc->failedreason = PC_SETUP_ERROR; 636 err = initerr; 637 errMA = initerrMA; 638 if (pch2opus->monitor) { 639 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr); 640 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",0,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr); 641 } 642 if (initerr > pch2opus->atol && !pc->failedreason) { 643 PetscInt i; 644 645 ierr = PCH2OpusSetUpSampler_Private(pc);CHKERRQ(ierr); 646 for (i = 0; i < pch2opus->maxits; i++) { 647 Mat M; 648 const char* prefix; 649 650 ierr = MatDuplicate(pch2opus->M,MAT_SHARE_NONZERO_PATTERN,&M);CHKERRQ(ierr); 651 ierr = MatGetOptionsPrefix(pch2opus->M,&prefix);CHKERRQ(ierr); 652 ierr = MatSetOptionsPrefix(M,prefix);CHKERRQ(ierr); 653 ierr = MatH2OpusSetSamplingMat(M,pch2opus->S,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 654 ierr = MatSetFromOptions(M);CHKERRQ(ierr); 655 ierr = MatH2OpusSetNativeMult(M,PETSC_TRUE);CHKERRQ(ierr); 656 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 657 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 658 659 ierr = MatDestroy(&pch2opus->M);CHKERRQ(ierr); 660 pch2opus->M = M; 661 if (norm == NORM_1 || norm == NORM_2 || norm == NORM_INFINITY) { 662 ierr = MatNorm(pch2opus->T,norm,&err);CHKERRQ(ierr); 663 pch2opus->testMA = PETSC_TRUE; 664 ierr = MatNorm(pch2opus->T,norm,&errMA);CHKERRQ(ierr); 665 pch2opus->testMA = PETSC_FALSE; 666 } 667 if (pch2opus->monitor) { 668 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A - I|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)err,(double)(err/initerr));CHKERRQ(ierr); 669 ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"%D: ||M*A|| NORM%s abs %g rel %g\n",i+1,NormTypes[norm],(double)errMA,(double)(errMA/initerrMA));CHKERRQ(ierr); 670 } 671 if (PetscIsInfOrNanReal(err)) pc->failedreason = PC_SETUP_ERROR; 672 if (err < pch2opus->atol || err < pch2opus->rtol*initerr || pc->failedreason) break; 673 } 674 } 675 /* cleanup setup workspace */ 676 ierr = MatH2OpusSetNativeMult(pch2opus->A,PETSC_FALSE);CHKERRQ(ierr); 677 ierr = MatH2OpusSetNativeMult(pch2opus->M,PETSC_FALSE);CHKERRQ(ierr); 678 ierr = VecDestroy(&pch2opus->wns[0]);CHKERRQ(ierr); 679 ierr = VecDestroy(&pch2opus->wns[1]);CHKERRQ(ierr); 680 ierr = VecDestroy(&pch2opus->wns[2]);CHKERRQ(ierr); 681 ierr = VecDestroy(&pch2opus->wns[3]);CHKERRQ(ierr); 682 ierr = MatDestroy(&pch2opus->wnsmat[0]);CHKERRQ(ierr); 683 ierr = MatDestroy(&pch2opus->wnsmat[1]);CHKERRQ(ierr); 684 ierr = MatDestroy(&pch2opus->wnsmat[2]);CHKERRQ(ierr); 685 ierr = MatDestroy(&pch2opus->wnsmat[3]);CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 static PetscErrorCode PCView_H2OPUS(PC pc, PetscViewer viewer) 690 { 691 PetscErrorCode ierr; 692 PC_H2OPUS *pch2opus = (PC_H2OPUS*)pc->data; 693 PetscBool isascii; 694 695 PetscFunctionBegin; 696 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 697 if (isascii) { 698 if (pch2opus->A && pch2opus->A != pc->mat && pch2opus->A != pc->pmat) { 699 ierr = PetscViewerASCIIPrintf(viewer,"Initial approximation matrix\n");CHKERRQ(ierr); 700 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 701 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 702 ierr = MatView(pch2opus->A,viewer);CHKERRQ(ierr); 703 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 704 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 705 } 706 if (pch2opus->M) { 707 ierr = PetscViewerASCIIPrintf(viewer,"Inner matrix constructed\n");CHKERRQ(ierr); 708 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 709 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO_DETAIL);CHKERRQ(ierr); 710 ierr = MatView(pch2opus->M,viewer);CHKERRQ(ierr); 711 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 712 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 713 } 714 ierr = PetscViewerASCIIPrintf(viewer,"Initial scaling: %g\n",pch2opus->s0);CHKERRQ(ierr); 715 } 716 PetscFunctionReturn(0); 717 } 718 719 PETSC_EXTERN PetscErrorCode PCCreate_H2OPUS(PC pc) 720 { 721 PetscErrorCode ierr; 722 PC_H2OPUS *pch2opus; 723 724 PetscFunctionBegin; 725 ierr = PetscNewLog(pc,&pch2opus);CHKERRQ(ierr); 726 pc->data = (void*)pch2opus; 727 728 pch2opus->atol = 1.e-2; 729 pch2opus->rtol = 1.e-6; 730 pch2opus->maxits = 50; 731 pch2opus->hyperorder = 1; /* defaults to basic NewtonSchultz */ 732 pch2opus->normtype = NORM_2; 733 734 /* these are needed when we are sampling the pmat */ 735 pch2opus->eta = PETSC_DECIDE; 736 pch2opus->leafsize = PETSC_DECIDE; 737 pch2opus->max_rank = PETSC_DECIDE; 738 pch2opus->bs = PETSC_DECIDE; 739 pch2opus->mrtol = PETSC_DECIDE; 740 #if defined(PETSC_H2OPUS_USE_GPU) 741 pch2opus->boundtocpu = PETSC_FALSE; 742 #else 743 pch2opus->boundtocpu = PETSC_TRUE; 744 #endif 745 pc->ops->destroy = PCDestroy_H2OPUS; 746 pc->ops->setup = PCSetUp_H2OPUS; 747 pc->ops->apply = PCApply_H2OPUS; 748 pc->ops->matapply = PCApplyMat_H2OPUS; 749 pc->ops->applytranspose = PCApplyTranspose_H2OPUS; 750 pc->ops->reset = PCReset_H2OPUS; 751 pc->ops->setfromoptions = PCSetFromOptions_H2OPUS; 752 pc->ops->view = PCView_H2OPUS; 753 754 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSetCoordinates_C",PCSetCoordinates_H2OPUS);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757