1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc, char **args) 7 { 8 Mat A; 9 Vec x, rhs, y; 10 PetscInt i, j, k, b, m = 3, n, nlocal = 2, bs = 1, Ii, J; 11 PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 12 PetscMPIInt rank, size; 13 PetscScalar v, v0, v1, v2, a0 = 0.1, a, rhsval, *boundary_values, diag = 1.0; 14 PetscReal norm; 15 char convname[64]; 16 PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 17 18 PetscFunctionBeginUser; 19 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 20 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 21 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 22 n = nlocal * size; 23 24 PetscCall(PetscOptionsGetInt(NULL, NULL, "-bs", &bs, NULL)); 25 PetscCall(PetscOptionsGetBool(NULL, NULL, "-nonlocal_bc", &nonlocalBC, NULL)); 26 PetscCall(PetscOptionsGetScalar(NULL, NULL, "-diag", &diag, NULL)); 27 PetscCall(PetscOptionsGetString(NULL, NULL, "-convname", convname, sizeof(convname), &convert)); 28 PetscCall(PetscOptionsGetBool(NULL, NULL, "-zerorhs", &zerorhs, NULL)); 29 30 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 31 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n * bs, m * n * bs)); 32 PetscCall(MatSetFromOptions(A)); 33 PetscCall(MatSetUp(A)); 34 35 PetscCall(MatCreateVecs(A, NULL, &rhs)); 36 PetscCall(VecSetFromOptions(rhs)); 37 PetscCall(VecSetUp(rhs)); 38 39 rhsval = 0.0; 40 for (i = 0; i < m; i++) { 41 for (j = nlocal * rank; j < nlocal * (rank + 1); j++) { 42 a = a0; 43 for (b = 0; b < bs; b++) { 44 /* let's start with a 5-point stencil diffusion term */ 45 v = -1.0; 46 Ii = (j + n * i) * bs + b; 47 if (i > 0) { 48 J = Ii - n * bs; 49 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 50 } 51 if (i < m - 1) { 52 J = Ii + n * bs; 53 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 54 } 55 if (j > 0) { 56 J = Ii - 1 * bs; 57 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 58 } 59 if (j < n - 1) { 60 J = Ii + 1 * bs; 61 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 62 } 63 v = 4.0; 64 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES)); 65 if (upwind) { 66 /* now add a 2nd order upwind advection term to add a little asymmetry */ 67 if (j > 2) { 68 J = Ii - 2 * bs; 69 v2 = 0.5 * a; 70 v1 = -2.0 * a; 71 v0 = 1.5 * a; 72 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v2, ADD_VALUES)); 73 } else { 74 /* fall back to 1st order upwind */ 75 v1 = -1.0 * a; 76 v0 = 1.0 * a; 77 } 78 if (j > 1) { 79 J = Ii - 1 * bs; 80 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES)); 81 } 82 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v0, ADD_VALUES)); 83 a /= 10.; /* use a different velocity for the next component */ 84 /* add a coupling to the previous and next components */ 85 v = 0.5; 86 if (b > 0) { 87 J = Ii - 1; 88 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 89 } 90 if (b < bs - 1) { 91 J = Ii + 1; 92 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 93 } 94 } 95 /* make up some rhs */ 96 PetscCall(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES)); 97 rhsval += 1.0; 98 } 99 } 100 } 101 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 102 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 103 104 if (convert) { /* Test different Mat implementations */ 105 Mat B; 106 107 PetscCall(MatConvert(A, convname, MAT_INITIAL_MATRIX, &B)); 108 PetscCall(MatDestroy(&A)); 109 A = B; 110 } 111 112 PetscCall(VecAssemblyBegin(rhs)); 113 PetscCall(VecAssemblyEnd(rhs)); 114 /* set rhs to zero to simplify */ 115 if (zerorhs) PetscCall(VecZeroEntries(rhs)); 116 117 if (nonlocalBC) { 118 /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 119 if (rank == 0) { 120 nboundary_nodes = size > m ? nlocal : m - size + nlocal; 121 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 122 k = 0; 123 for (i = size; i < m; i++, k++) boundary_nodes[k] = n * i; 124 } else if (rank < m) { 125 nboundary_nodes = nlocal + 1; 126 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 127 boundary_nodes[0] = rank * n; 128 k = 1; 129 } else { 130 nboundary_nodes = nlocal; 131 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 132 k = 0; 133 } 134 for (j = nlocal * rank; j < nlocal * (rank + 1); j++, k++) boundary_nodes[k] = j; 135 } else { 136 /*version where boundary conditions are set by the node owners only */ 137 PetscCall(PetscMalloc1(m * n, &boundary_nodes)); 138 k = 0; 139 for (j = 0; j < n; j++) { 140 Ii = j; 141 if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 142 } 143 for (i = 1; i < m; i++) { 144 Ii = n * i; 145 if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 146 } 147 nboundary_nodes = k; 148 } 149 150 PetscCall(VecDuplicate(rhs, &x)); 151 PetscCall(VecZeroEntries(x)); 152 PetscCall(PetscMalloc2(nboundary_nodes * bs, &boundary_indices, nboundary_nodes * bs, &boundary_values)); 153 for (k = 0; k < nboundary_nodes; k++) { 154 Ii = boundary_nodes[k] * bs; 155 v = 1.0 * boundary_nodes[k]; 156 for (b = 0; b < bs; b++, Ii++) { 157 boundary_indices[k * bs + b] = Ii; 158 boundary_values[k * bs + b] = v; 159 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v))); 160 v += 0.1; 161 } 162 } 163 PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL)); 164 PetscCall(VecSetValues(x, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 165 PetscCall(VecAssemblyBegin(x)); 166 PetscCall(VecAssemblyEnd(x)); 167 168 /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 169 PetscCall(VecDuplicate(x, &y)); 170 PetscCall(MatMult(A, x, y)); 171 PetscCall(VecAYPX(y, -1.0, rhs)); 172 for (k = 0; k < nboundary_nodes * bs; k++) boundary_values[k] *= diag; 173 PetscCall(VecSetValues(y, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 174 PetscCall(VecAssemblyBegin(y)); 175 PetscCall(VecAssemblyEnd(y)); 176 177 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n")); 178 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 179 PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 180 181 PetscCall(MatZeroRowsColumns(A, nboundary_nodes * bs, boundary_indices, diag, x, rhs)); 182 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n")); 183 PetscCall(VecView(rhs, PETSC_VIEWER_STDOUT_WORLD)); 184 PetscCall(VecAXPY(y, -1.0, rhs)); 185 PetscCall(VecNorm(y, NORM_INFINITY, &norm)); 186 if (norm > 1.0e-10) { 187 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm)); 188 PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 189 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 190 } 191 192 PetscCall(PetscFree(boundary_nodes)); 193 PetscCall(PetscFree2(boundary_indices, boundary_values)); 194 PetscCall(VecDestroy(&x)); 195 PetscCall(VecDestroy(&y)); 196 PetscCall(VecDestroy(&rhs)); 197 PetscCall(MatDestroy(&A)); 198 199 PetscCall(PetscFinalize()); 200 return 0; 201 } 202 203 /*TEST 204 205 test: 206 diff_args: -j 207 suffix: 0 208 209 test: 210 diff_args: -j 211 suffix: 1 212 nsize: 2 213 214 test: 215 diff_args: -j 216 suffix: 10 217 nsize: 2 218 args: -bs 2 -nonlocal_bc 219 220 test: 221 diff_args: -j 222 suffix: 11 223 nsize: 7 224 args: -bs 2 -nonlocal_bc 225 226 test: 227 diff_args: -j 228 suffix: 12 229 args: -bs 2 -nonlocal_bc -mat_type baij 230 231 test: 232 diff_args: -j 233 suffix: 13 234 nsize: 2 235 args: -bs 2 -nonlocal_bc -mat_type baij 236 237 test: 238 diff_args: -j 239 suffix: 14 240 nsize: 7 241 args: -bs 2 -nonlocal_bc -mat_type baij 242 243 test: 244 diff_args: -j 245 suffix: 2 246 nsize: 7 247 248 test: 249 diff_args: -j 250 suffix: 3 251 args: -mat_type baij 252 253 test: 254 diff_args: -j 255 suffix: 4 256 nsize: 2 257 args: -mat_type baij 258 259 test: 260 diff_args: -j 261 suffix: 5 262 nsize: 7 263 args: -mat_type baij 264 265 test: 266 diff_args: -j 267 suffix: 6 268 args: -bs 2 -mat_type baij 269 270 test: 271 diff_args: -j 272 suffix: 7 273 nsize: 2 274 args: -bs 2 -mat_type baij 275 276 test: 277 diff_args: -j 278 suffix: 8 279 nsize: 7 280 args: -bs 2 -mat_type baij 281 282 test: 283 diff_args: -j 284 suffix: 9 285 args: -bs 2 -nonlocal_bc 286 287 test: 288 diff_args: -j 289 suffix: 15 290 args: -bs 2 -nonlocal_bc -convname shell 291 292 test: 293 diff_args: -j 294 suffix: 16 295 nsize: 2 296 args: -bs 2 -nonlocal_bc -convname shell 297 298 test: 299 diff_args: -j 300 suffix: 17 301 args: -bs 2 -nonlocal_bc -convname dense 302 303 testset: 304 diff_args: -j 305 suffix: full 306 nsize: {{1 3}separate output} 307 args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 308 309 test: 310 diff_args: -j 311 requires: cuda 312 suffix: cusparse_1 313 nsize: 1 314 args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 315 316 test: 317 diff_args: -j 318 requires: cuda 319 suffix: cusparse_3 320 nsize: 3 321 args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 322 323 TEST*/ 324