1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc,char **args) 7 { 8 Mat A; 9 Vec x, rhs, y; 10 PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J; 11 PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 12 PetscMPIInt rank,size; 13 PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0; 14 PetscReal norm; 15 char convname[64]; 16 PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 17 18 PetscFunctionBeginUser; 19 PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 20 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 21 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 22 n = nlocal*size; 23 24 PetscCall(PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL)); 25 PetscCall(PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL)); 26 PetscCall(PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL)); 27 PetscCall(PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert)); 28 PetscCall(PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL)); 29 30 PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 31 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs)); 32 PetscCall(MatSetFromOptions(A)); 33 PetscCall(MatSetUp(A)); 34 35 PetscCall(MatCreateVecs(A, NULL, &rhs)); 36 PetscCall(VecSetFromOptions(rhs)); 37 PetscCall(VecSetUp(rhs)); 38 39 rhsval = 0.0; 40 for (i=0; i<m; i++) { 41 for (j=nlocal*rank; j<nlocal*(rank+1); j++) { 42 a = a0; 43 for (b=0; b<bs; b++) { 44 /* let's start with a 5-point stencil diffusion term */ 45 v = -1.0; Ii = (j + n*i)*bs + b; 46 if (i>0) {J = Ii - n*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 47 if (i<m-1) {J = Ii + n*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 48 if (j>0) {J = Ii - 1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 49 if (j<n-1) {J = Ii + 1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 50 v = 4.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES)); 51 if (upwind) { 52 /* now add a 2nd order upwind advection term to add a little asymmetry */ 53 if (j>2) { 54 J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a; 55 PetscCall(MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES)); 56 } else { 57 /* fall back to 1st order upwind */ 58 v1 = -1.0*a; v0 = 1.0*a; 59 }; 60 if (j>1) {J = Ii-1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES));} 61 PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES)); 62 a /= 10.; /* use a different velocity for the next component */ 63 /* add a coupling to the previous and next components */ 64 v = 0.5; 65 if (b>0) {J = Ii - 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 66 if (b<bs-1) {J = Ii + 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));} 67 } 68 /* make up some rhs */ 69 PetscCall(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES)); 70 rhsval += 1.0; 71 } 72 } 73 } 74 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 75 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 76 77 if (convert) { /* Test different Mat implementations */ 78 Mat B; 79 80 PetscCall(MatConvert(A,convname,MAT_INITIAL_MATRIX,&B)); 81 PetscCall(MatDestroy(&A)); 82 A = B; 83 } 84 85 PetscCall(VecAssemblyBegin(rhs)); 86 PetscCall(VecAssemblyEnd(rhs)); 87 /* set rhs to zero to simplify */ 88 if (zerorhs) PetscCall(VecZeroEntries(rhs)); 89 90 if (nonlocalBC) { 91 /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 92 if (rank == 0) { 93 nboundary_nodes = size>m ? nlocal : m-size+nlocal; 94 PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 95 k = 0; 96 for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;}; 97 } else if (rank < m) { 98 nboundary_nodes = nlocal+1; 99 PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 100 boundary_nodes[0] = rank*n; 101 k = 1; 102 } else { 103 nboundary_nodes = nlocal; 104 PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes)); 105 k = 0; 106 } 107 for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;}; 108 } else { 109 /*version where boundary conditions are set by the node owners only */ 110 PetscCall(PetscMalloc1(m*n,&boundary_nodes)); 111 k=0; 112 for (j=0; j<n; j++) { 113 Ii = j; 114 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 115 } 116 for (i=1; i<m; i++) { 117 Ii = n*i; 118 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 119 } 120 nboundary_nodes = k; 121 } 122 123 PetscCall(VecDuplicate(rhs, &x)); 124 PetscCall(VecZeroEntries(x)); 125 PetscCall(PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values)); 126 for (k=0; k<nboundary_nodes; k++) { 127 Ii = boundary_nodes[k]*bs; 128 v = 1.0*boundary_nodes[k]; 129 for (b=0; b<bs; b++, Ii++) { 130 boundary_indices[k*bs+b] = Ii; 131 boundary_values[k*bs+b] = v; 132 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v))); 133 v += 0.1; 134 } 135 } 136 PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL)); 137 PetscCall(VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES)); 138 PetscCall(VecAssemblyBegin(x)); 139 PetscCall(VecAssemblyEnd(x)); 140 141 /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 142 PetscCall(VecDuplicate(x, &y)); 143 PetscCall(MatMult(A, x, y)); 144 PetscCall(VecAYPX(y, -1.0, rhs)); 145 for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag; 146 PetscCall(VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES)); 147 PetscCall(VecAssemblyBegin(y)); 148 PetscCall(VecAssemblyEnd(y)); 149 150 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n")); 151 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 152 PetscCall(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 153 154 PetscCall(MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs)); 155 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n")); 156 PetscCall(VecView(rhs,PETSC_VIEWER_STDOUT_WORLD)); 157 PetscCall(VecAXPY(y, -1.0, rhs)); 158 PetscCall(VecNorm(y, NORM_INFINITY, &norm)); 159 if (norm > 1.0e-10) { 160 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm)); 161 PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 162 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 163 } 164 165 PetscCall(PetscFree(boundary_nodes)); 166 PetscCall(PetscFree2(boundary_indices,boundary_values)); 167 PetscCall(VecDestroy(&x)); 168 PetscCall(VecDestroy(&y)); 169 PetscCall(VecDestroy(&rhs)); 170 PetscCall(MatDestroy(&A)); 171 172 PetscCall(PetscFinalize()); 173 return 0; 174 } 175 176 /*TEST 177 178 test: 179 diff_args: -j 180 suffix: 0 181 182 test: 183 diff_args: -j 184 suffix: 1 185 nsize: 2 186 187 test: 188 diff_args: -j 189 suffix: 10 190 nsize: 2 191 args: -bs 2 -nonlocal_bc 192 193 test: 194 diff_args: -j 195 suffix: 11 196 nsize: 7 197 args: -bs 2 -nonlocal_bc 198 199 test: 200 diff_args: -j 201 suffix: 12 202 args: -bs 2 -nonlocal_bc -mat_type baij 203 204 test: 205 diff_args: -j 206 suffix: 13 207 nsize: 2 208 args: -bs 2 -nonlocal_bc -mat_type baij 209 210 test: 211 diff_args: -j 212 suffix: 14 213 nsize: 7 214 args: -bs 2 -nonlocal_bc -mat_type baij 215 216 test: 217 diff_args: -j 218 suffix: 2 219 nsize: 7 220 221 test: 222 diff_args: -j 223 suffix: 3 224 args: -mat_type baij 225 226 test: 227 diff_args: -j 228 suffix: 4 229 nsize: 2 230 args: -mat_type baij 231 232 test: 233 diff_args: -j 234 suffix: 5 235 nsize: 7 236 args: -mat_type baij 237 238 test: 239 diff_args: -j 240 suffix: 6 241 args: -bs 2 -mat_type baij 242 243 test: 244 diff_args: -j 245 suffix: 7 246 nsize: 2 247 args: -bs 2 -mat_type baij 248 249 test: 250 diff_args: -j 251 suffix: 8 252 nsize: 7 253 args: -bs 2 -mat_type baij 254 255 test: 256 diff_args: -j 257 suffix: 9 258 args: -bs 2 -nonlocal_bc 259 260 test: 261 diff_args: -j 262 suffix: 15 263 args: -bs 2 -nonlocal_bc -convname shell 264 265 test: 266 diff_args: -j 267 suffix: 16 268 nsize: 2 269 args: -bs 2 -nonlocal_bc -convname shell 270 271 test: 272 diff_args: -j 273 suffix: 17 274 args: -bs 2 -nonlocal_bc -convname dense 275 276 testset: 277 diff_args: -j 278 suffix: full 279 nsize: {{1 3}separate output} 280 args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 281 282 test: 283 diff_args: -j 284 requires: cuda 285 suffix: cusparse_1 286 nsize: 1 287 args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 288 289 test: 290 diff_args: -j 291 requires: cuda 292 suffix: cusparse_3 293 nsize: 3 294 args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 295 296 TEST*/ 297