1 2 /* 3 3/99 Modified by Stephen Barnard to support SPAI version 3.0 4 */ 5 6 /* 7 Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner 8 Code written by Stephen Barnard. 9 10 Note: there is some BAD memory bleeding below! 11 12 This code needs work 13 14 1) get rid of all memory bleeding 15 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix 16 rather than having the sp flag for PC_SPAI 17 3) fix to set the block size based on the matrix block size 18 19 */ 20 #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */ 21 22 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/ 23 #include <../src/ksp/pc/impls/spai/petscspai.h> 24 25 /* 26 These are the SPAI include files 27 */ 28 EXTERN_C_BEGIN 29 #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */ 30 #include <spai.h> 31 #include <matrix.h> 32 EXTERN_C_END 33 34 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**); 35 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*); 36 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv); 37 extern PetscErrorCode MM_to_PETSC(char*,char*,char*); 38 39 typedef struct { 40 41 matrix *B; /* matrix in SPAI format */ 42 matrix *BT; /* transpose of matrix in SPAI format */ 43 matrix *M; /* the approximate inverse in SPAI format */ 44 45 Mat PM; /* the approximate inverse PETSc format */ 46 47 double epsilon; /* tolerance */ 48 int nbsteps; /* max number of "improvement" steps per line */ 49 int max; /* max dimensions of is_I, q, etc. */ 50 int maxnew; /* max number of new entries per step */ 51 int block_size; /* constant block size */ 52 int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */ 53 int verbose; /* SPAI prints timing and statistics */ 54 55 int sp; /* symmetric nonzero pattern */ 56 MPI_Comm comm_spai; /* communicator to be used with spai */ 57 } PC_SPAI; 58 59 /**********************************************************************/ 60 61 static PetscErrorCode PCSetUp_SPAI(PC pc) 62 { 63 PC_SPAI *ispai = (PC_SPAI*)pc->data; 64 PetscErrorCode ierr; 65 Mat AT; 66 67 PetscFunctionBegin; 68 init_SPAI(); 69 70 if (ispai->sp) { 71 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr); 72 } else { 73 /* Use the transpose to get the column nonzero structure. */ 74 ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr); 75 ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr); 76 ierr = MatDestroy(&AT);CHKERRQ(ierr); 77 } 78 79 /* Destroy the transpose */ 80 /* Don't know how to do it. PETSc developers? */ 81 82 /* construct SPAI preconditioner */ 83 /* FILE *messages */ /* file for warning messages */ 84 /* double epsilon */ /* tolerance */ 85 /* int nbsteps */ /* max number of "improvement" steps per line */ 86 /* int max */ /* max dimensions of is_I, q, etc. */ 87 /* int maxnew */ /* max number of new entries per step */ 88 /* int block_size */ /* block_size == 1 specifies scalar elments 89 block_size == n specifies nxn constant-block elements 90 block_size == 0 specifies variable-block elements */ 91 /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */ 92 /* int verbose */ /* verbose == 0 specifies that SPAI is silent 93 verbose == 1 prints timing and matrix statistics */ 94 95 ierr = bspai(ispai->B,&ispai->M, 96 stdout, 97 ispai->epsilon, 98 ispai->nbsteps, 99 ispai->max, 100 ispai->maxnew, 101 ispai->block_size, 102 ispai->cache_size, 103 ispai->verbose);CHKERRQ(ierr); 104 105 ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr); 106 107 /* free the SPAI matrices */ 108 sp_free_matrix(ispai->B); 109 sp_free_matrix(ispai->M); 110 PetscFunctionReturn(0); 111 } 112 113 /**********************************************************************/ 114 115 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y) 116 { 117 PC_SPAI *ispai = (PC_SPAI*)pc->data; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 /* Now using PETSc's multiply */ 122 ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 126 /**********************************************************************/ 127 128 static PetscErrorCode PCDestroy_SPAI(PC pc) 129 { 130 PetscErrorCode ierr; 131 PC_SPAI *ispai = (PC_SPAI*)pc->data; 132 133 PetscFunctionBegin; 134 ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr); 135 ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr); 136 ierr = PetscFree(pc->data);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 /**********************************************************************/ 141 142 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer) 143 { 144 PC_SPAI *ispai = (PC_SPAI*)pc->data; 145 PetscErrorCode ierr; 146 PetscBool iascii; 147 148 PetscFunctionBegin; 149 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 150 if (iascii) { 151 ierr = PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");CHKERRQ(ierr); 152 ierr = PetscViewerASCIIPrintf(viewer," epsilon %g\n", (double)ispai->epsilon);CHKERRQ(ierr); 153 ierr = PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);CHKERRQ(ierr); 154 ierr = PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);CHKERRQ(ierr); 155 ierr = PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);CHKERRQ(ierr); 156 ierr = PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);CHKERRQ(ierr); 157 ierr = PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);CHKERRQ(ierr); 158 ierr = PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);CHKERRQ(ierr); 159 ierr = PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);CHKERRQ(ierr); 160 } 161 PetscFunctionReturn(0); 162 } 163 164 static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1) 165 { 166 PC_SPAI *ispai = (PC_SPAI*)pc->data; 167 168 PetscFunctionBegin; 169 ispai->epsilon = epsilon1; 170 PetscFunctionReturn(0); 171 } 172 173 /**********************************************************************/ 174 175 static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1) 176 { 177 PC_SPAI *ispai = (PC_SPAI*)pc->data; 178 179 PetscFunctionBegin; 180 ispai->nbsteps = nbsteps1; 181 PetscFunctionReturn(0); 182 } 183 184 /**********************************************************************/ 185 186 /* added 1/7/99 g.h. */ 187 static PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1) 188 { 189 PC_SPAI *ispai = (PC_SPAI*)pc->data; 190 191 PetscFunctionBegin; 192 ispai->max = max1; 193 PetscFunctionReturn(0); 194 } 195 196 /**********************************************************************/ 197 198 static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1) 199 { 200 PC_SPAI *ispai = (PC_SPAI*)pc->data; 201 202 PetscFunctionBegin; 203 ispai->maxnew = maxnew1; 204 PetscFunctionReturn(0); 205 } 206 207 /**********************************************************************/ 208 209 static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1) 210 { 211 PC_SPAI *ispai = (PC_SPAI*)pc->data; 212 213 PetscFunctionBegin; 214 ispai->block_size = block_size1; 215 PetscFunctionReturn(0); 216 } 217 218 /**********************************************************************/ 219 220 static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size) 221 { 222 PC_SPAI *ispai = (PC_SPAI*)pc->data; 223 224 PetscFunctionBegin; 225 ispai->cache_size = cache_size; 226 PetscFunctionReturn(0); 227 } 228 229 /**********************************************************************/ 230 231 static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose) 232 { 233 PC_SPAI *ispai = (PC_SPAI*)pc->data; 234 235 PetscFunctionBegin; 236 ispai->verbose = verbose; 237 PetscFunctionReturn(0); 238 } 239 240 /**********************************************************************/ 241 242 static PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp) 243 { 244 PC_SPAI *ispai = (PC_SPAI*)pc->data; 245 246 PetscFunctionBegin; 247 ispai->sp = sp; 248 PetscFunctionReturn(0); 249 } 250 251 /* -------------------------------------------------------------------*/ 252 253 /*@ 254 PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner 255 256 Input Parameters: 257 + pc - the preconditioner 258 - eps - epsilon (default .4) 259 260 Notes: Espilon must be between 0 and 1. It controls the 261 quality of the approximation of M to the inverse of 262 A. Higher values of epsilon lead to more work, more 263 fill, and usually better preconditioners. In many 264 cases the best choice of epsilon is the one that 265 divides the total solution time equally between the 266 preconditioner and the solver. 267 268 Level: intermediate 269 270 .seealso: PCSPAI, PCSetType() 271 @*/ 272 PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 /**********************************************************************/ 282 283 /*@ 284 PCSPAISetNBSteps - set maximum number of improvement steps per row in 285 the SPAI preconditioner 286 287 Input Parameters: 288 + pc - the preconditioner 289 - n - number of steps (default 5) 290 291 Notes: SPAI constructs to approximation to every column of 292 the exact inverse of A in a series of improvement 293 steps. The quality of the approximation is determined 294 by epsilon. If an approximation achieving an accuracy 295 of epsilon is not obtained after ns steps, SPAI simply 296 uses the best approximation constructed so far. 297 298 Level: intermediate 299 300 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew() 301 @*/ 302 PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1) 303 { 304 PetscErrorCode ierr; 305 306 PetscFunctionBegin; 307 ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr); 308 PetscFunctionReturn(0); 309 } 310 311 /**********************************************************************/ 312 313 /* added 1/7/99 g.h. */ 314 /*@ 315 PCSPAISetMax - set the size of various working buffers in 316 the SPAI preconditioner 317 318 Input Parameters: 319 + pc - the preconditioner 320 - n - size (default is 5000) 321 322 Level: intermediate 323 324 .seealso: PCSPAI, PCSetType() 325 @*/ 326 PetscErrorCode PCSPAISetMax(PC pc,int max1) 327 { 328 PetscErrorCode ierr; 329 330 PetscFunctionBegin; 331 ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 /**********************************************************************/ 336 337 /*@ 338 PCSPAISetMaxNew - set maximum number of new nonzero candidates per step 339 in SPAI preconditioner 340 341 Input Parameters: 342 + pc - the preconditioner 343 - n - maximum number (default 5) 344 345 Level: intermediate 346 347 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps() 348 @*/ 349 PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1) 350 { 351 PetscErrorCode ierr; 352 353 PetscFunctionBegin; 354 ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr); 355 PetscFunctionReturn(0); 356 } 357 358 /**********************************************************************/ 359 360 /*@ 361 PCSPAISetBlockSize - set the block size for the SPAI preconditioner 362 363 Input Parameters: 364 + pc - the preconditioner 365 - n - block size (default 1) 366 367 Notes: A block 368 size of 1 treats A as a matrix of scalar elements. A 369 block size of s > 1 treats A as a matrix of sxs 370 blocks. A block size of 0 treats A as a matrix with 371 variable sized blocks, which are determined by 372 searching for dense square diagonal blocks in A. 373 This can be very effective for finite-element 374 matrices. 375 376 SPAI will convert A to block form, use a block 377 version of the preconditioner algorithm, and then 378 convert the result back to scalar form. 379 380 In many cases the a block-size parameter other than 1 381 can lead to very significant improvement in 382 performance. 383 384 385 Level: intermediate 386 387 .seealso: PCSPAI, PCSetType() 388 @*/ 389 PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1) 390 { 391 PetscErrorCode ierr; 392 393 PetscFunctionBegin; 394 ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /**********************************************************************/ 399 400 /*@ 401 PCSPAISetCacheSize - specify cache size in the SPAI preconditioner 402 403 Input Parameters: 404 + pc - the preconditioner 405 - n - cache size {0,1,2,3,4,5} (default 5) 406 407 Notes: SPAI uses a hash table to cache messages and avoid 408 redundant communication. If suggest always using 409 5. This parameter is irrelevant in the serial 410 version. 411 412 Level: intermediate 413 414 .seealso: PCSPAI, PCSetType() 415 @*/ 416 PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size) 417 { 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 /**********************************************************************/ 426 427 /*@ 428 PCSPAISetVerbose - verbosity level for the SPAI preconditioner 429 430 Input Parameters: 431 + pc - the preconditioner 432 - n - level (default 1) 433 434 Notes: print parameters, timings and matrix statistics 435 436 Level: intermediate 437 438 .seealso: PCSPAI, PCSetType() 439 @*/ 440 PetscErrorCode PCSPAISetVerbose(PC pc,int verbose) 441 { 442 PetscErrorCode ierr; 443 444 PetscFunctionBegin; 445 ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 /**********************************************************************/ 450 451 /*@ 452 PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner 453 454 Input Parameters: 455 + pc - the preconditioner 456 - n - 0 or 1 457 458 Notes: If A has a symmetric nonzero pattern use -sp 1 to 459 improve performance by eliminating some communication 460 in the parallel version. Even if A does not have a 461 symmetric nonzero pattern -sp 1 may well lead to good 462 results, but the code will not follow the published 463 SPAI algorithm exactly. 464 465 466 Level: intermediate 467 468 .seealso: PCSPAI, PCSetType() 469 @*/ 470 PetscErrorCode PCSPAISetSp(PC pc,int sp) 471 { 472 PetscErrorCode ierr; 473 474 PetscFunctionBegin; 475 ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 /**********************************************************************/ 480 481 /**********************************************************************/ 482 483 static PetscErrorCode PCSetFromOptions_SPAI(PetscOptionItems *PetscOptionsObject,PC pc) 484 { 485 PC_SPAI *ispai = (PC_SPAI*)pc->data; 486 PetscErrorCode ierr; 487 int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp; 488 double epsilon1; 489 PetscBool flg; 490 491 PetscFunctionBegin; 492 ierr = PetscOptionsHead(PetscOptionsObject,"SPAI options");CHKERRQ(ierr); 493 ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr); 494 if (flg) { 495 ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr); 496 } 497 ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr); 498 if (flg) { 499 ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr); 500 } 501 /* added 1/7/99 g.h. */ 502 ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr); 503 if (flg) { 504 ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr); 505 } 506 ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr); 507 if (flg) { 508 ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr); 509 } 510 ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr); 511 if (flg) { 512 ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr); 513 } 514 ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr); 515 if (flg) { 516 ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr); 517 } 518 ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr); 519 if (flg) { 520 ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr); 521 } 522 ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr); 523 if (flg) { 524 ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr); 525 } 526 ierr = PetscOptionsTail();CHKERRQ(ierr); 527 PetscFunctionReturn(0); 528 } 529 530 /**********************************************************************/ 531 532 /*MC 533 PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard 534 as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3) 535 536 Options Database Keys: 537 + -pc_spai_epsilon <eps> - set tolerance 538 . -pc_spai_nbstep <n> - set nbsteps 539 . -pc_spai_max <m> - set max 540 . -pc_spai_max_new <m> - set maxnew 541 . -pc_spai_block_size <n> - set block size 542 . -pc_spai_cache_size <n> - set cache size 543 . -pc_spai_sp <m> - set sp 544 - -pc_spai_set_verbose <true,false> - verbose output 545 546 Notes: This only works with AIJ matrices. 547 548 Level: beginner 549 550 Concepts: approximate inverse 551 552 .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC, 553 PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(), 554 PCSPAISetVerbose(), PCSPAISetSp() 555 M*/ 556 557 PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc) 558 { 559 PC_SPAI *ispai; 560 PetscErrorCode ierr; 561 562 PetscFunctionBegin; 563 ierr = PetscNewLog(pc,&ispai);CHKERRQ(ierr); 564 pc->data = ispai; 565 566 pc->ops->destroy = PCDestroy_SPAI; 567 pc->ops->apply = PCApply_SPAI; 568 pc->ops->applyrichardson = 0; 569 pc->ops->setup = PCSetUp_SPAI; 570 pc->ops->view = PCView_SPAI; 571 pc->ops->setfromoptions = PCSetFromOptions_SPAI; 572 573 ispai->epsilon = .4; 574 ispai->nbsteps = 5; 575 ispai->max = 5000; 576 ispai->maxnew = 5; 577 ispai->block_size = 1; 578 ispai->cache_size = 5; 579 ispai->verbose = 0; 580 581 ispai->sp = 1; 582 ierr = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr); 583 584 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C",PCSPAISetMax_SPAI);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C",PCSPAISetVerbose_SPAI);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C",PCSPAISetSp_SPAI);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 /**********************************************************************/ 596 597 /* 598 Converts from a PETSc matrix to an SPAI matrix 599 */ 600 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B) 601 { 602 matrix *M; 603 int i,j,col; 604 int row_indx; 605 int len,pe,local_indx,start_indx; 606 int *mapping; 607 PetscErrorCode ierr; 608 const int *cols; 609 const double *vals; 610 int n,mnl,nnl,nz,rstart,rend; 611 PetscMPIInt size,rank; 612 struct compressed_lines *rows; 613 614 PetscFunctionBegin; 615 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 616 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 617 ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr); 618 ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr); 619 620 /* 621 not sure why a barrier is required. commenting out 622 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 623 */ 624 625 M = new_matrix((SPAI_Comm)comm); 626 627 M->n = n; 628 M->bs = 1; 629 M->max_block_size = 1; 630 631 M->mnls = (int*)malloc(sizeof(int)*size); 632 M->start_indices = (int*)malloc(sizeof(int)*size); 633 M->pe = (int*)malloc(sizeof(int)*n); 634 M->block_sizes = (int*)malloc(sizeof(int)*n); 635 for (i=0; i<n; i++) M->block_sizes[i] = 1; 636 637 ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr); 638 639 M->start_indices[0] = 0; 640 for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1]; 641 642 M->mnl = M->mnls[M->myid]; 643 M->my_start_index = M->start_indices[M->myid]; 644 645 for (i=0; i<size; i++) { 646 start_indx = M->start_indices[i]; 647 for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i; 648 } 649 650 if (AT) { 651 M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr); 652 } else { 653 M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr); 654 } 655 656 rows = M->lines; 657 658 /* Determine the mapping from global indices to pointers */ 659 ierr = PetscMalloc1(M->n,&mapping);CHKERRQ(ierr); 660 pe = 0; 661 local_indx = 0; 662 for (i=0; i<M->n; i++) { 663 if (local_indx >= M->mnls[pe]) { 664 pe++; 665 local_indx = 0; 666 } 667 mapping[i] = local_indx + M->start_indices[pe]; 668 local_indx++; 669 } 670 671 /*********************************************************/ 672 /************** Set up the row structure *****************/ 673 /*********************************************************/ 674 675 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 676 for (i=rstart; i<rend; i++) { 677 row_indx = i - rstart; 678 ierr = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 679 /* allocate buffers */ 680 rows->ptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 681 rows->A[row_indx] = (double*)malloc(nz*sizeof(double)); 682 /* copy the matrix */ 683 for (j=0; j<nz; j++) { 684 col = cols[j]; 685 len = rows->len[row_indx]++; 686 687 rows->ptrs[row_indx][len] = mapping[col]; 688 rows->A[row_indx][len] = vals[j]; 689 } 690 rows->slen[row_indx] = rows->len[row_indx]; 691 692 ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr); 693 } 694 695 696 /************************************************************/ 697 /************** Set up the column structure *****************/ 698 /*********************************************************/ 699 700 if (AT) { 701 702 for (i=rstart; i<rend; i++) { 703 row_indx = i - rstart; 704 ierr = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 705 /* allocate buffers */ 706 rows->rptrs[row_indx] = (int*)malloc(nz*sizeof(int)); 707 /* copy the matrix (i.e., the structure) */ 708 for (j=0; j<nz; j++) { 709 col = cols[j]; 710 len = rows->rlen[row_indx]++; 711 712 rows->rptrs[row_indx][len] = mapping[col]; 713 } 714 ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr); 715 } 716 } 717 718 ierr = PetscFree(mapping);CHKERRQ(ierr); 719 720 order_pointers(M); 721 M->maxnz = calc_maxnz(M); 722 *B = M; 723 PetscFunctionReturn(0); 724 } 725 726 /**********************************************************************/ 727 728 /* 729 Converts from an SPAI matrix B to a PETSc matrix PB. 730 This assumes that the SPAI matrix B is stored in 731 COMPRESSED-ROW format. 732 */ 733 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB) 734 { 735 PetscMPIInt size,rank; 736 PetscErrorCode ierr; 737 int m,n,M,N; 738 int d_nz,o_nz; 739 int *d_nnz,*o_nnz; 740 int i,k,global_row,global_col,first_diag_col,last_diag_col; 741 PetscScalar val; 742 743 PetscFunctionBegin; 744 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 745 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 746 747 m = n = B->mnls[rank]; 748 d_nz = o_nz = 0; 749 750 /* Determine preallocation for MatCreateMPIAIJ */ 751 ierr = PetscMalloc1(m,&d_nnz);CHKERRQ(ierr); 752 ierr = PetscMalloc1(m,&o_nnz);CHKERRQ(ierr); 753 for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0; 754 first_diag_col = B->start_indices[rank]; 755 last_diag_col = first_diag_col + B->mnls[rank]; 756 for (i=0; i<B->mnls[rank]; i++) { 757 for (k=0; k<B->lines->len[i]; k++) { 758 global_col = B->lines->ptrs[i][k]; 759 if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++; 760 else o_nnz[i]++; 761 } 762 } 763 764 M = N = B->n; 765 /* Here we only know how to create AIJ format */ 766 ierr = MatCreate(comm,PB);CHKERRQ(ierr); 767 ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr); 768 ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr); 769 ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr); 770 ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 771 772 for (i=0; i<B->mnls[rank]; i++) { 773 global_row = B->start_indices[rank]+i; 774 for (k=0; k<B->lines->len[i]; k++) { 775 global_col = B->lines->ptrs[i][k]; 776 777 val = B->lines->A[i][k]; 778 ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr); 779 } 780 } 781 782 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 783 ierr = PetscFree(o_nnz);CHKERRQ(ierr); 784 785 ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 786 ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 787 PetscFunctionReturn(0); 788 } 789 790 /**********************************************************************/ 791 792 /* 793 Converts from an SPAI vector v to a PETSc vec Pv. 794 */ 795 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv) 796 { 797 PetscErrorCode ierr; 798 PetscMPIInt size,rank; 799 int m,M,i,*mnls,*start_indices,*global_indices; 800 801 PetscFunctionBegin; 802 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 803 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 804 805 m = v->mnl; 806 M = v->n; 807 808 809 ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr); 810 811 ierr = PetscMalloc1(size,&mnls);CHKERRQ(ierr); 812 ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr); 813 814 ierr = PetscMalloc1(size,&start_indices);CHKERRQ(ierr); 815 816 start_indices[0] = 0; 817 for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1]; 818 819 ierr = PetscMalloc1(v->mnl,&global_indices);CHKERRQ(ierr); 820 for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i; 821 822 ierr = PetscFree(mnls);CHKERRQ(ierr); 823 ierr = PetscFree(start_indices);CHKERRQ(ierr); 824 825 ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr); 826 ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr); 827 ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr); 828 829 ierr = PetscFree(global_indices);CHKERRQ(ierr); 830 PetscFunctionReturn(0); 831 } 832 833 834 835 836