1 2 static char help[] = "Test PC redistribute on matrix with load imbalance. \n\ 3 Modified from src/ksp/ksp/tutorials/ex2.c.\n\ 4 Input parameters include:\n\ 5 -random_exact_sol : use a random exact solution vector\n\ 6 -view_exact_sol : write exact solution vector to stdout\n\ 7 -n <mesh_y> : number of mesh points\n\n"; 8 /* 9 Example: 10 mpiexec -n 8 ./ex3 -n 10000 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 -log_view 11 mpiexec -n 8 ./ex3 -n 10000 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 -log_view 12 */ 13 14 #include <petscksp.h> 15 16 int main(int argc,char **args) 17 { 18 Vec x,b,u; /* approx solution, RHS, exact solution */ 19 Mat A; /* linear system matrix */ 20 KSP ksp; /* linear solver context */ 21 PetscRandom rctx; /* random number generator context */ 22 PetscReal norm; /* norm of solution error */ 23 PetscInt i,j,Ii,J,Istart,Iend,m,n = 7,its,nloc,matdistribute=0; 24 PetscBool flg = PETSC_FALSE; 25 PetscScalar v; 26 PetscMPIInt rank,size; 27 #if defined(PETSC_USE_LOG) 28 PetscLogStage stage; 29 #endif 30 31 PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 32 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 33 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 34 PetscCheck(size > 1,PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"This example requires at least 2 MPI processes!"); 35 36 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL)); 37 PetscCall(PetscOptionsGetInt(NULL,NULL,"-matdistribute",&matdistribute,NULL)); 38 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 39 Compute the matrix and right-hand-side vector that define 40 the linear system, Ax = b. 41 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 42 switch(matdistribute) { 43 case 1: /* very imbalanced process load for matrix A */ 44 m = (1+size)*size; 45 nloc = (rank+1)*n; 46 if (rank == size-1) { /* proc[size-1] stores all remaining rows */ 47 nloc = m*n; 48 for (i=0; i<size-1; i++) { 49 nloc -= (i+1)*n; 50 } 51 } 52 break; 53 default: /* proc[0] and proc[1] load much smaller row blocks, the rest processes have same loads */ 54 if (rank == 0 || rank == 1) { 55 nloc = n; 56 } else { 57 nloc = 10*n; /* 10x larger load */ 58 } 59 m = 2 + (size-2)*10; 60 break; 61 } 62 PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 63 PetscCall(MatSetSizes(A,nloc,nloc,PETSC_DECIDE,PETSC_DECIDE)); 64 PetscCall(MatSetFromOptions(A)); 65 PetscCall(MatMPIAIJSetPreallocation(A,5,NULL,5,NULL)); 66 PetscCall(MatSeqAIJSetPreallocation(A,5,NULL)); 67 PetscCall(MatSetUp(A)); 68 69 PetscCall(MatGetOwnershipRange(A,&Istart,&Iend)); 70 nloc = Iend-Istart; 71 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] A Istart,Iend: %D %D; nloc %D\n",rank,Istart,Iend,nloc)); 72 PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT)); 73 74 PetscCall(PetscLogStageRegister("Assembly", &stage)); 75 PetscCall(PetscLogStagePush(stage)); 76 for (Ii=Istart; Ii<Iend; Ii++) { 77 v = -1.0; i = Ii/n; j = Ii - i*n; 78 if (i>0) {J = Ii - n; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 79 if (i<m-1) {J = Ii + n; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 80 if (j>0) {J = Ii - 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 81 if (j<n-1) {J = Ii + 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES));} 82 v = 4.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES)); 83 } 84 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 85 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 86 PetscCall(PetscLogStagePop()); 87 88 /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */ 89 PetscCall(MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE)); 90 91 /* Create parallel vectors. */ 92 PetscCall(VecCreate(PETSC_COMM_WORLD,&u)); 93 PetscCall(VecSetSizes(u,nloc,PETSC_DECIDE)); 94 PetscCall(VecSetFromOptions(u)); 95 PetscCall(VecDuplicate(u,&b)); 96 PetscCall(VecDuplicate(b,&x)); 97 98 /* Set exact solution; then compute right-hand-side vector. */ 99 PetscCall(PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL)); 100 if (flg) { 101 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD,&rctx)); 102 PetscCall(PetscRandomSetFromOptions(rctx)); 103 PetscCall(VecSetRandom(u,rctx)); 104 PetscCall(PetscRandomDestroy(&rctx)); 105 } else { 106 PetscCall(VecSet(u,1.0)); 107 } 108 PetscCall(MatMult(A,u,b)); 109 110 /* View the exact solution vector if desired */ 111 flg = PETSC_FALSE; 112 PetscCall(PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL)); 113 if (flg) PetscCall(VecView(u,PETSC_VIEWER_STDOUT_WORLD)); 114 115 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 116 Create the linear solver and set various options 117 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 118 PetscCall(KSPCreate(PETSC_COMM_WORLD,&ksp)); 119 PetscCall(KSPSetOperators(ksp,A,A)); 120 PetscCall(KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT)); 121 PetscCall(KSPSetFromOptions(ksp)); 122 123 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 124 Solve the linear system 125 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 126 PetscCall(KSPSolve(ksp,b,x)); 127 128 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 129 Check solution and clean up 130 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 131 PetscCall(VecAXPY(x,-1.0,u)); 132 PetscCall(VecNorm(x,NORM_2,&norm)); 133 PetscCall(KSPGetIterationNumber(ksp,&its)); 134 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its)); 135 136 /* Free work space. */ 137 PetscCall(KSPDestroy(&ksp)); 138 PetscCall(VecDestroy(&u)); PetscCall(VecDestroy(&x)); 139 PetscCall(VecDestroy(&b)); PetscCall(MatDestroy(&A)); 140 PetscCall(PetscFinalize()); 141 return 0; 142 } 143 144 /*TEST 145 146 test: 147 nsize: 8 148 args: -n 100 -ksp_type cg -pc_type bjacobi -sub_pc_type icc -ksp_rtol 1.e-8 149 150 test: 151 suffix: 2 152 nsize: 8 153 args: -n 100 -ksp_type preonly -pc_type redistribute -redistribute_ksp_type cg -redistribute_pc_type bjacobi -redistribute_sub_pc_type icc -redistribute_ksp_rtol 1.e-8 154 155 TEST*/ 156