1 /* 2 This file creates an MPI parallel KSP from a sequential PC that lives on MPI rank 0. 3 It is intended to allow using PETSc MPI parallel linear solvers from non-MPI codes. 4 5 That program may use OpenMP to compute the right hand side and matrix for the linear system 6 7 The code uses MPI_COMM_WORLD below but maybe it should be PETSC_COMM_WORLD 8 9 The resulting KSP and PC can only be controlled via the options database, though some common commands 10 could be passed through the server. 11 12 */ 13 #include <petsc/private/pcimpl.h> 14 #include <petsc/private/kspimpl.h> 15 16 #define PC_MPI_MAX_RANKS 256 17 #define PC_MPI_COMM_WORLD MPI_COMM_WORLD 18 19 typedef struct { 20 KSP ksps[PC_MPI_MAX_RANKS]; /* The addresses of the MPI parallel KSP on each rank, NULL when not on a rank. */ 21 PetscMPIInt sendcount[PC_MPI_MAX_RANKS],displ[PC_MPI_MAX_RANKS]; /* For scatter/gather of rhs/solution */ 22 PetscMPIInt NZ[PC_MPI_MAX_RANKS],NZdispl[PC_MPI_MAX_RANKS]; /* For scatter of nonzero values in matrix (and nonzero column indices initially */ 23 PetscInt mincntperrank; /* minimum number of desired nonzeros per active rank in MPI parallel KSP solve */ 24 PetscBool alwaysuseserver; /* for debugging use the server infrastructure even if only one MPI rank is used for the solve */ 25 } PC_MPI; 26 27 typedef enum { PCMPI_EXIT, /* exit the PC server loop, means the controlling sequential program is done */ 28 PCMPI_CREATE, 29 PCMPI_SET_MAT, /* set original matrix (or one with different nonzero pattern) */ 30 PCMPI_UPDATE_MAT_VALUES, /* update current matrix with new nonzero values */ 31 PCMPI_SOLVE, 32 PCMPI_VIEW, 33 PCMPI_DESTROY /* destroy a KSP that is no longer needed */ 34 } PCMPICommand; 35 36 static MPI_Comm PCMPIComms[PC_MPI_MAX_RANKS]; 37 static PetscBool PCMPICommSet = PETSC_FALSE; 38 static PetscInt PCMPISolveCounts[PC_MPI_MAX_RANKS],PCMPIKSPCounts[PC_MPI_MAX_RANKS],PCMPIMatCounts[PC_MPI_MAX_RANKS],PCMPISolveCountsSeq = 0, PCMPIKSPCountsSeq = 0; 39 40 static PetscErrorCode PCMPICommsCreate(void) 41 { 42 MPI_Comm comm = PC_MPI_COMM_WORLD; 43 PetscMPIInt size,rank,i; 44 45 PetscFunctionBegin; 46 PetscCallMPI(MPI_Comm_size(comm,&size)); 47 PetscCheck(size <= PC_MPI_MAX_RANKS,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for using more than PC_MPI_MAX_RANKS MPI ranks in an MPI linear solver server solve"); 48 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 49 /* comm for size 1 is useful only for debugging */ 50 for (i=0; i<size; i++) { 51 PetscMPIInt color = rank < i+1 ? 0 : MPI_UNDEFINED; 52 PetscCallMPI(MPI_Comm_split(comm,color,0,&PCMPIComms[i])); 53 PCMPISolveCounts[i] = 0; 54 PCMPIKSPCounts[i] = 0; 55 } 56 PCMPICommSet = PETSC_TRUE; 57 PetscFunctionReturn(0); 58 } 59 60 PetscErrorCode PCMPICommsDestroy(void) 61 { 62 MPI_Comm comm = PC_MPI_COMM_WORLD; 63 PetscMPIInt size,rank,i; 64 65 PetscFunctionBegin; 66 if (!PCMPICommSet) PetscFunctionReturn(0); 67 PetscCallMPI(MPI_Comm_size(comm,&size)); 68 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 69 for (i=0; i<size; i++) { 70 if (PCMPIComms[i] != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&PCMPIComms[i])); 71 } 72 PCMPICommSet = PETSC_FALSE; 73 PetscFunctionReturn(0); 74 } 75 76 static PetscErrorCode PCMPICreate(PC pc) 77 { 78 PC_MPI *km = pc ? (PC_MPI*)pc->data : NULL; 79 MPI_Comm comm = PC_MPI_COMM_WORLD; 80 KSP ksp; 81 PetscInt N[2],mincntperrank = 0; 82 PetscMPIInt size; 83 Mat sA; 84 char *prefix; 85 PetscMPIInt len = 0; 86 87 PetscFunctionBegin; 88 if (!PCMPICommSet) PetscCall(PCMPICommsCreate()); 89 PetscCallMPI(MPI_Comm_size(comm,&size)); 90 if (pc) { 91 if (size == 1) PetscCall(PetscPrintf(PETSC_COMM_SELF,"Warning: Running KSP type of MPI on a one rank MPI run, this will be less efficient then not using this type\n")); 92 PetscCall(PCGetOperators(pc,&sA,&sA)); 93 PetscCall(MatGetSize(sA,&N[0],&N[1])); 94 } 95 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 96 97 /* choose a suitable sized MPI_Comm for the problem to be solved on */ 98 if (km) mincntperrank = km->mincntperrank; 99 PetscCallMPI(MPI_Bcast(&mincntperrank, 1, MPI_INT, 0, comm)); 100 comm = PCMPIComms[PetscMin(size,PetscMax(1,N[0]/mincntperrank)) - 1]; 101 if (comm == MPI_COMM_NULL) { 102 ksp = NULL; 103 PetscFunctionReturn(0); 104 } 105 PetscCall(KSPCreate(comm,&ksp)); 106 PetscCallMPI(MPI_Gather(&ksp, 1, MPI_AINT, pc ? km->ksps : NULL, 1, MPI_AINT, 0, comm)); 107 if (pc) { 108 size_t slen; 109 110 PetscCallMPI(MPI_Comm_size(comm,&size)); 111 PCMPIKSPCounts[size]++; 112 PetscCall(PCGetOptionsPrefix(pc,(const char**)&prefix)); 113 PetscCall(PetscStrlen(prefix,&slen)); 114 len = (PetscMPIInt)slen; 115 } 116 PetscCallMPI(MPI_Bcast(&len, 1, MPI_INT, 0, comm)); 117 if (len) { 118 if (!pc) { 119 PetscCall(PetscMalloc1(len+1,&prefix)); 120 } 121 PetscCallMPI(MPI_Bcast(prefix, len+1, MPI_CHAR, 0, comm)); 122 PetscCall(KSPSetOptionsPrefix(ksp,prefix)); 123 } 124 PetscCall(KSPAppendOptionsPrefix(ksp,"mpi_")); 125 PetscFunctionReturn(0); 126 } 127 128 static PetscErrorCode PCMPISetMat(PC pc) 129 { 130 PC_MPI *km = pc ? (PC_MPI*)pc->data : NULL; 131 Mat A; 132 PetscInt N[2],n,*ia,*ja,j; 133 Mat sA; 134 MPI_Comm comm = PC_MPI_COMM_WORLD; 135 KSP ksp; 136 PetscLayout layout; 137 const PetscInt *IA = NULL,*JA = NULL; 138 const PetscInt *range; 139 PetscMPIInt *NZ = NULL,sendcounti[PC_MPI_MAX_RANKS],displi[PC_MPI_MAX_RANKS],*NZdispl = NULL,nz,size,i; 140 PetscScalar *a; 141 const PetscScalar *sa = NULL; 142 143 PetscFunctionBegin; 144 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 145 if (!ksp) PetscFunctionReturn(0); 146 PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm)); 147 if (pc) { 148 PetscCallMPI(MPI_Comm_size(comm,&size)); 149 PCMPIMatCounts[size]++; 150 PetscCall(PCGetOperators(pc,&sA,&sA)); 151 PetscCall(MatGetSize(sA,&N[0],&N[1])); 152 } 153 PetscCallMPI(MPI_Bcast(N, 2, MPIU_INT, 0, comm)); 154 155 /* determine ownership ranges of matrix */ 156 PetscCall(PetscLayoutCreate(comm,&layout)); 157 PetscCall(PetscLayoutSetBlockSize(layout,1)); 158 PetscCall(PetscLayoutSetSize(layout,N[0])); 159 PetscCall(PetscLayoutSetUp(layout)); 160 PetscCall(PetscLayoutGetLocalSize(layout,&n)); 161 162 /* copy over the matrix nonzero structure and values */ 163 if (pc) { 164 NZ = km->NZ; 165 NZdispl = km->NZdispl; 166 PetscCall(PetscLayoutGetRanges(layout,&range)); 167 PetscCall(MatGetRowIJ(sA,0,PETSC_FALSE,PETSC_FALSE,NULL,&IA,&JA,NULL)); 168 for (i=0; i<size; i++) { 169 sendcounti[i] = (PetscMPIInt) (1 + range[i+1] - range[i]); 170 NZ[i] = (PetscMPIInt) (IA[range[i+1]] - IA[range[i]]); 171 } 172 displi[0] = 0; 173 NZdispl[0] = 0; 174 for (j=1; j<size; j++) { 175 displi[j] = displi[j-1] + sendcounti[j-1] - 1; 176 NZdispl[j] = NZdispl[j-1] + NZ[j-1]; 177 } 178 PetscCall(MatSeqAIJGetArrayRead(sA,&sa)); 179 } 180 PetscCall(PetscLayoutDestroy(&layout)); 181 PetscCallMPI(MPI_Scatter(NZ, 1, MPI_INT, &nz, 1, MPI_INT, 0, comm)); 182 183 PetscCall(PetscMalloc3(n+1,&ia,nz,&ja,nz,&a)); 184 PetscCallMPI(MPI_Scatterv(IA,sendcounti, displi, MPIU_INT, ia, n+1, MPIU_INT, 0,comm)); 185 PetscCallMPI(MPI_Scatterv(JA,NZ, NZdispl, MPIU_INT, ja, nz, MPIU_INT, 0,comm)); 186 PetscCallMPI(MPI_Scatterv(sa,NZ, NZdispl, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0,comm)); 187 188 if (pc) { 189 PetscCall(MatSeqAIJRestoreArrayRead(sA,&sa)); 190 PetscCall(MatRestoreRowIJ(sA,0,PETSC_FALSE,PETSC_FALSE,NULL,&IA,&JA,NULL)); 191 } 192 193 for (j=1; j<n+1; j++) { 194 ia[j] -= ia[0]; 195 } 196 ia[0] = 0; 197 PetscCall(MatCreateMPIAIJWithArrays(comm,n,n,N[0],N[0],ia,ja,a,&A)); 198 PetscCall(MatSetOptionsPrefix(A,"mpi_")); 199 200 PetscCall(PetscFree3(ia,ja,a)); 201 PetscCall(KSPSetOperators(ksp,A,A)); 202 if (!ksp->vec_sol) PetscCall(MatCreateVecs(A,&ksp->vec_sol,&ksp->vec_rhs)); 203 if (pc) { /* needed for scatterv/gatherv of rhs and solution */ 204 const PetscInt *range; 205 206 PetscCall(VecGetOwnershipRanges(ksp->vec_sol,&range)); 207 for (i=0; i<size; i++) { 208 km->sendcount[i] = (PetscMPIInt) (range[i+1] - range[i]); 209 km->displ[i] = (PetscMPIInt) range[i]; 210 } 211 } 212 PetscCall(MatDestroy(&A)); 213 PetscCall(KSPSetFromOptions(ksp)); 214 PetscFunctionReturn(0); 215 } 216 217 static PetscErrorCode PCMPIUpdateMatValues(PC pc) 218 { 219 PC_MPI *km = pc ? (PC_MPI*)pc->data : NULL; 220 KSP ksp; 221 Mat sA,A; 222 MPI_Comm comm = PC_MPI_COMM_WORLD; 223 PetscScalar *a; 224 PetscCount nz; 225 const PetscScalar *sa = NULL; 226 227 PetscFunctionBegin; 228 if (pc) { 229 PetscMPIInt size; 230 231 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 232 PCMPIMatCounts[size]++; 233 PetscCall(PCGetOperators(pc,&sA,&sA)); 234 PetscCall(MatSeqAIJGetArrayRead(sA,&sa)); 235 } 236 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 237 if (!ksp) PetscFunctionReturn(0); 238 PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm)); 239 PetscCall(KSPGetOperators(ksp,NULL,&A)); 240 PetscCall(MatMPIAIJGetNumberNonzeros(A,&nz)); 241 PetscCall(PetscMalloc1(nz,&a)); 242 PetscCallMPI(MPI_Scatterv(sa,pc ? km->NZ : NULL, pc ? km->NZdispl : NULL, MPIU_SCALAR, a, nz, MPIU_SCALAR, 0,comm)); 243 if (pc) PetscCall(MatSeqAIJRestoreArrayRead(sA,&sa)); 244 PetscCall(MatUpdateMPIAIJWithArray(A,a)); 245 PetscCall(PetscFree(a)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode PCMPISolve(PC pc,Vec B,Vec X) 250 { 251 PC_MPI *km = pc ? (PC_MPI*)pc->data : NULL; 252 KSP ksp; 253 MPI_Comm comm = PC_MPI_COMM_WORLD; 254 const PetscScalar *sb = NULL,*x; 255 PetscScalar *b,*sx = NULL; 256 PetscInt n; 257 258 PetscFunctionBegin; 259 PetscCallMPI(MPI_Scatter(pc ? km->ksps : &ksp, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 260 if (!ksp) PetscFunctionReturn(0); 261 PetscCall(PetscObjectGetComm((PetscObject)ksp,&comm)); 262 263 /* TODO: optimize code to not require building counts/displ everytime */ 264 265 /* scatterv rhs */ 266 if (pc) { 267 PetscMPIInt size; 268 269 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size)); 270 PCMPISolveCounts[size]++; 271 PetscCall(VecGetArrayRead(B,&sb)); 272 } 273 PetscCall(VecGetLocalSize(ksp->vec_rhs,&n)); 274 PetscCall(VecGetArray(ksp->vec_rhs,&b)); 275 PetscCallMPI(MPI_Scatterv(sb,pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, b, n, MPIU_SCALAR, 0,comm)); 276 PetscCall(VecRestoreArray(ksp->vec_rhs,&b)); 277 if (pc) PetscCall(VecRestoreArrayRead(B,&sb)); 278 279 PetscCall(KSPSolve(ksp,NULL,NULL)); 280 281 /* gather solution */ 282 PetscCall(VecGetArrayRead(ksp->vec_sol,&x)); 283 if (pc) PetscCall(VecGetArray(X,&sx)); 284 PetscCallMPI(MPI_Gatherv(x,n,MPIU_SCALAR, sx, pc ? km->sendcount : NULL, pc ? km->displ : NULL, MPIU_SCALAR, 0,comm)); 285 if (pc) PetscCall(VecRestoreArray(X,&sx)); 286 PetscCall(VecRestoreArrayRead(ksp->vec_sol,&x)); 287 PetscFunctionReturn(0); 288 } 289 290 static PetscErrorCode PCMPIDestroy(PC pc) 291 { 292 PC_MPI *km = pc ? (PC_MPI*)pc->data : NULL; 293 KSP ksp; 294 MPI_Comm comm = PC_MPI_COMM_WORLD; 295 296 PetscFunctionBegin; 297 PetscCallMPI(MPI_Scatter(pc ? km->ksps : NULL, 1, MPI_AINT, &ksp, 1, MPI_AINT, 0, comm)); 298 if (!ksp) PetscFunctionReturn(0); 299 PetscCall(KSPDestroy(&ksp)); 300 PetscFunctionReturn(0); 301 } 302 303 /*@C 304 PCMPIServerBegin - starts a server that runs on the rank != 0 MPI processes waiting to process requests for 305 parallel KSP solves and management of parallel KSP objects. 306 307 Logically collective on all MPI ranks except 0 308 309 Options Database: 310 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 311 - -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 312 313 Notes: 314 This is normally started automatically in `PetscInitialize()` when the option is provided 315 316 Developer Notes: 317 When called on rank zero this sets PETSC_COMM_WORLD to PETSC_COMM_SELF to allow a main program 318 written with PETSC_COMM_WORLD to run correctly on the single rank while all the ranks 319 (that would normally be sharing PETSC_COMM_WORLD) to run the solver server. 320 321 Can this be integrated into the PetscDevice abstraction that is currently being developed? 322 323 Level: developer 324 325 .seealso: PCMPIServerEnd(), PCMPI 326 @*/ 327 PetscErrorCode PCMPIServerBegin(void) 328 { 329 PetscMPIInt rank; 330 331 PetscFunctionBegin; 332 PetscCall(PetscInfo(NULL,"Starting MPI Linear Solver Server")); 333 PetscCallMPI(MPI_Comm_rank(PC_MPI_COMM_WORLD,&rank)); 334 if (rank == 0) { 335 PETSC_COMM_WORLD = PETSC_COMM_SELF; 336 PetscFunctionReturn(0); 337 } 338 339 while (PETSC_TRUE) { 340 PCMPICommand request = PCMPI_CREATE; 341 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 342 switch (request) { 343 case PCMPI_CREATE: 344 PetscCall(PCMPICreate(NULL)); 345 break; 346 case PCMPI_SET_MAT: 347 PetscCall(PCMPISetMat(NULL)); 348 break; 349 case PCMPI_UPDATE_MAT_VALUES: 350 PetscCall(PCMPIUpdateMatValues(NULL)); 351 break; 352 case PCMPI_VIEW: 353 // PetscCall(PCMPIView(NULL)); 354 break; 355 case PCMPI_SOLVE: 356 PetscCall(PCMPISolve(NULL,NULL,NULL)); 357 break; 358 case PCMPI_DESTROY: 359 PetscCall(PCMPIDestroy(NULL)); 360 break; 361 case PCMPI_EXIT: 362 PetscCall(PetscFinalize()); 363 exit(0); /* not sure if this is a good idea, but cannot return because it will run users main program */ 364 break; 365 default: 366 break; 367 } 368 } 369 PetscFunctionReturn(0); 370 } 371 372 /*@C 373 PCMPIServerEnd - ends a server that runs on the rank != 0 MPI processes waiting to process requests for 374 parallel KSP solves and management of parallel KSP objects. 375 376 Logically collective on all MPI ranks except 0 377 378 Notes: 379 This is normally ended automatically in `PetscFinalize()` when the option is provided 380 381 Level: developer 382 383 .seealso: PCMPIServerBegin(), PCMPI 384 @*/ 385 PetscErrorCode PCMPIServerEnd(void) 386 { 387 PCMPICommand request = PCMPI_EXIT; 388 389 PetscFunctionBegin; 390 if (PetscGlobalRank == 0) { 391 PetscViewer viewer = NULL; 392 PetscViewerFormat format; 393 394 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, PC_MPI_COMM_WORLD)); 395 PETSC_COMM_WORLD = MPI_COMM_WORLD; /* could use PC_MPI_COMM_WORLD */ 396 PetscOptionsBegin(PETSC_COMM_SELF,NULL,"MPI linear solver server options",NULL); 397 PetscCall(PetscOptionsViewer("-mpi_linear_solver_server_view","View information about system solved with the server","PCMPI",&viewer,&format,NULL)); 398 PetscOptionsEnd(); 399 if (viewer) { 400 PetscBool isascii; 401 402 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 403 if (isascii) { 404 PetscMPIInt size; 405 PetscInt i, ksp = 0, mat = 0, solve = 0; 406 407 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 408 for (i=1; i<size; i++) { 409 ksp += PCMPIKSPCounts[i]; 410 mat += PCMPIMatCounts[i]; 411 solve += PCMPISolveCounts[i]; 412 } 413 PetscCall(PetscViewerASCIIPrintf(viewer,"MPI linear solver server:\n")); 414 PetscCall(PetscViewerASCIIPrintf(viewer," Parallel KSPs %" PetscInt_FMT " Mats %" PetscInt_FMT " Solves %" PetscInt_FMT "\n",ksp,mat,solve)); 415 PetscCall(PetscViewerASCIIPrintf(viewer," Sequential KSPs %" PetscInt_FMT " Solves %" PetscInt_FMT "\n",PCMPIKSPCountsSeq,PCMPISolveCountsSeq)); 416 } 417 PetscCall(PetscViewerDestroy(&viewer)); 418 } 419 } 420 PetscCall(PCMPICommsDestroy()); 421 PetscFunctionReturn(0); 422 } 423 424 /* 425 This version is used in the trivial case when the MPI parallel solver server is running on just the original MPI rank 0 426 because, for example, the problem is small. This version is more efficient because it does not require copying any data 427 */ 428 static PetscErrorCode PCSetUp_Seq(PC pc) 429 { 430 PC_MPI *km = (PC_MPI*)pc->data; 431 Mat sA; 432 const char* prefix; 433 434 PetscFunctionBegin; 435 PetscCall(PCGetOperators(pc,NULL,&sA)); 436 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 437 PetscCall(KSPCreate(PETSC_COMM_SELF,&km->ksps[0])); 438 PetscCall(KSPSetOptionsPrefix(km->ksps[0],prefix)); 439 PetscCall(KSPAppendOptionsPrefix(km->ksps[0],"mpi_")); 440 PetscCall(KSPSetOperators(km->ksps[0],sA,sA)); 441 PetscCall(KSPSetFromOptions(km->ksps[0])); 442 PetscCall(KSPSetUp(km->ksps[0])); 443 PetscInfo((PetscObject)pc,"MPI parallel linear solver system is being solved directly on rank 0 due to its small size\n"); 444 PCMPIKSPCountsSeq++; 445 PetscFunctionReturn(0); 446 } 447 448 static PetscErrorCode PCApply_Seq(PC pc,Vec b,Vec x) 449 { 450 PC_MPI *km = (PC_MPI*)pc->data; 451 452 PetscFunctionBegin; 453 PetscCall(KSPSolve(km->ksps[0],b,x)); 454 PCMPISolveCountsSeq++; 455 PetscFunctionReturn(0); 456 } 457 458 static PetscErrorCode PCView_Seq(PC pc,PetscViewer viewer) 459 { 460 PC_MPI *km = (PC_MPI*)pc->data; 461 const char *prefix; 462 463 PetscFunctionBegin; 464 PetscCall(PetscViewerASCIIPrintf(viewer,"Running MPI linear solver server directly on rank 0 due to its small size\n")); 465 PetscCall(PetscViewerASCIIPrintf(viewer,"Desired minimum number of nonzeros per rank for MPI parallel solve %d\n",(int)km->mincntperrank)); 466 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 467 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n",prefix)); 468 else PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 469 PetscFunctionReturn(0); 470 } 471 472 static PetscErrorCode PCDestroy_Seq(PC pc) 473 { 474 PC_MPI *km = (PC_MPI*)pc->data; 475 476 PetscFunctionBegin; 477 PetscCall(KSPDestroy(&km->ksps[0])); 478 PetscCall(PetscFree(pc->data)); 479 PetscFunctionReturn(0); 480 } 481 482 /* 483 PCSetUp_MPI - Trigger the creation of the MPI parallel PC and copy parts of the matrix and 484 right hand side to the parallel PC 485 */ 486 static PetscErrorCode PCSetUp_MPI(PC pc) 487 { 488 PC_MPI *km = (PC_MPI*)pc->data; 489 PetscMPIInt rank,size; 490 PCMPICommand request; 491 PetscBool newmatrix = PETSC_FALSE; 492 493 PetscFunctionBegin; 494 PetscCallMPI(MPI_Comm_rank(MPI_COMM_WORLD,&rank)); 495 PetscCheck(rank == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"PCMPI can only be used from 0th rank of MPI_COMM_WORLD. Perhaps a missing -mpi_linear_solver_server?"); 496 PetscCallMPI(MPI_Comm_size(MPI_COMM_WORLD,&size)); 497 498 if (!pc->setupcalled) { 499 if (!km->alwaysuseserver) { 500 PetscInt n; 501 Mat sA; 502 /* short circuit for small systems */ 503 PetscCall(PCGetOperators(pc,&sA,&sA)); 504 PetscCall(MatGetSize(sA,&n,NULL)); 505 if (n < 2*km->mincntperrank-1 || size == 1) { 506 pc->ops->setup = NULL; 507 pc->ops->apply = PCApply_Seq; 508 pc->ops->destroy = PCDestroy_Seq; 509 pc->ops->view = PCView_Seq; 510 PetscCall(PCSetUp_Seq(pc)); 511 PetscFunctionReturn(0); 512 } 513 } 514 515 request = PCMPI_CREATE; 516 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 517 PetscCall(PCMPICreate(pc)); 518 newmatrix = PETSC_TRUE; 519 } if (pc->flag == DIFFERENT_NONZERO_PATTERN) newmatrix = PETSC_TRUE; 520 521 if (newmatrix) { 522 PetscInfo((PetscObject)pc,"New matrix or matrix has changed nonzero structure\n"); 523 request = PCMPI_SET_MAT; 524 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 525 PetscCall(PCMPISetMat(pc)); 526 } else { 527 PetscInfo((PetscObject)pc,"Matrix has only changed nozero values\n"); 528 request = PCMPI_UPDATE_MAT_VALUES; 529 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 530 PetscCall(PCMPIUpdateMatValues(pc)); 531 } 532 PetscFunctionReturn(0); 533 } 534 535 static PetscErrorCode PCApply_MPI(PC pc,Vec b,Vec x) 536 { 537 PCMPICommand request = PCMPI_SOLVE; 538 539 PetscFunctionBegin; 540 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 541 PetscCall(PCMPISolve(pc,b,x)); 542 PetscFunctionReturn(0); 543 } 544 545 PetscErrorCode PCDestroy_MPI(PC pc) 546 { 547 PCMPICommand request = PCMPI_DESTROY; 548 549 PetscFunctionBegin; 550 PetscCallMPI(MPI_Bcast(&request, 1, MPIU_ENUM, 0, MPI_COMM_WORLD)); 551 PetscCall(PCMPIDestroy(pc)); 552 PetscCall(PetscFree(pc->data)); 553 PetscFunctionReturn(0); 554 } 555 556 /* 557 PCView_MPI - Cannot call view on the MPI parallel KSP because other ranks do not have access to the viewer 558 */ 559 PetscErrorCode PCView_MPI(PC pc,PetscViewer viewer) 560 { 561 PC_MPI *km = (PC_MPI*)pc->data; 562 MPI_Comm comm; 563 PetscMPIInt size; 564 const char *prefix; 565 566 PetscFunctionBegin; 567 PetscCall(PetscObjectGetComm((PetscObject)km->ksps[0],&comm)); 568 PetscCallMPI(MPI_Comm_size(comm,&size)); 569 PetscCall(PetscViewerASCIIPrintf(viewer,"Size of MPI communicator used for MPI parallel KSP solve %d\n",size)); 570 PetscCall(PetscViewerASCIIPrintf(viewer,"Desired minimum number of nonzeros per rank for MPI parallel solve %d\n",(int)km->mincntperrank)); 571 PetscCall(PCGetOptionsPrefix(pc,&prefix)); 572 if (prefix) PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -%smpi_ksp_view to see the MPI KSP parameters***\n",prefix)); 573 else PetscCall(PetscViewerASCIIPrintf(viewer,"*** Use -mpi_ksp_view to see the MPI KSP parameters***\n")); 574 PetscFunctionReturn(0); 575 } 576 577 PetscErrorCode PCSetFromOptions_MPI(PetscOptionItems *PetscOptionsObject,PC pc) 578 { 579 PC_MPI *km = (PC_MPI*)pc->data; 580 581 PetscFunctionBegin; 582 PetscOptionsHeadBegin(PetscOptionsObject,"MPI linear solver server options"); 583 PetscCall(PetscOptionsInt("-pc_mpi_minimum_count_per_rank","Desired minimum number of nonzeros per rank","None",km->mincntperrank,&km->mincntperrank,NULL)); 584 PetscCall(PetscOptionsBool("-pc_mpi_always_use_server","Use the server even if only one rank is used for the solve (for debugging)","None",km->alwaysuseserver,&km->alwaysuseserver,NULL)); 585 PetscOptionsHeadEnd(); 586 PetscFunctionReturn(0); 587 } 588 589 /*MC 590 PCMPI - Calls an MPI parallel KSP to solve a linear system from user code running on one process 591 592 Level: beginner 593 594 Options Database: 595 + -mpi_linear_solver_server - causes the PETSc program to start in MPI linear solver server mode where only the first MPI rank runs user code 596 . -mpi_linear_solver_server_view - displays information about the linear systems solved by the MPI linear solver server 597 . -pc_mpi_minimum_count_per_rank - sets the minimum size of the linear system per MPI rank that the solver will strive for 598 - -pc_mpi_always_use_server - use the server solver code even if the particular system is only solved on the process, this option is only for debugging and testing purposes 599 600 Notes: 601 The options database prefix for the MPI parallel KSP and PC is -mpi_ 602 603 The MPI linear solver server will not support scaling user code to utilize extremely large numbers of MPI ranks but should give reasonable speedup for 604 potentially 4 to 8 MPI ranks depending on the linear system being solved, solver algorithm, and the hardware. 605 606 It can be particularly useful for user OpenMP code or potentially user GPU code. 607 608 When the program is running with a single MPI rank then this directly uses the provided matrix and right hand side (still in a KSP with the options prefix of -mpi) 609 and does not need to distribute the matrix and vector to the various MPI ranks; thus it incurs no extra overhead over just using the KSP directly. 610 611 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `PC`, `PCMPIServerBegin()`, `PCMPIServerEnd()` 612 613 M*/ 614 PETSC_EXTERN PetscErrorCode PCCreate_MPI(PC pc) 615 { 616 PC_MPI *km; 617 618 PetscFunctionBegin; 619 PetscCall(PetscNewLog(pc,&km)); 620 pc->data = (void*)km; 621 622 km->mincntperrank = 10000; 623 624 pc->ops->setup = PCSetUp_MPI; 625 pc->ops->apply = PCApply_MPI; 626 pc->ops->destroy = PCDestroy_MPI; 627 pc->ops->view = PCView_MPI; 628 pc->ops->setfromoptions = PCSetFromOptions_MPI; 629 PetscFunctionReturn(0); 630 } 631