1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc, char **args) { 7 Mat A; 8 Vec x, rhs, y; 9 PetscInt i, j, k, b, m = 3, n, nlocal = 2, bs = 1, Ii, J; 10 PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 11 PetscMPIInt rank, size; 12 PetscScalar v, v0, v1, v2, a0 = 0.1, a, rhsval, *boundary_values, diag = 1.0; 13 PetscReal norm; 14 char convname[64]; 15 PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 16 17 PetscFunctionBeginUser; 18 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 19 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 20 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 21 n = nlocal * size; 22 23 PetscCall(PetscOptionsGetInt(NULL, NULL, "-bs", &bs, NULL)); 24 PetscCall(PetscOptionsGetBool(NULL, NULL, "-nonlocal_bc", &nonlocalBC, NULL)); 25 PetscCall(PetscOptionsGetScalar(NULL, NULL, "-diag", &diag, NULL)); 26 PetscCall(PetscOptionsGetString(NULL, NULL, "-convname", convname, sizeof(convname), &convert)); 27 PetscCall(PetscOptionsGetBool(NULL, NULL, "-zerorhs", &zerorhs, NULL)); 28 29 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 30 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n * bs, m * n * bs)); 31 PetscCall(MatSetFromOptions(A)); 32 PetscCall(MatSetUp(A)); 33 34 PetscCall(MatCreateVecs(A, NULL, &rhs)); 35 PetscCall(VecSetFromOptions(rhs)); 36 PetscCall(VecSetUp(rhs)); 37 38 rhsval = 0.0; 39 for (i = 0; i < m; i++) { 40 for (j = nlocal * rank; j < nlocal * (rank + 1); j++) { 41 a = a0; 42 for (b = 0; b < bs; b++) { 43 /* let's start with a 5-point stencil diffusion term */ 44 v = -1.0; 45 Ii = (j + n * i) * bs + b; 46 if (i > 0) { 47 J = Ii - n * bs; 48 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 49 } 50 if (i < m - 1) { 51 J = Ii + n * bs; 52 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 53 } 54 if (j > 0) { 55 J = Ii - 1 * bs; 56 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 57 } 58 if (j < n - 1) { 59 J = Ii + 1 * bs; 60 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 61 } 62 v = 4.0; 63 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES)); 64 if (upwind) { 65 /* now add a 2nd order upwind advection term to add a little asymmetry */ 66 if (j > 2) { 67 J = Ii - 2 * bs; 68 v2 = 0.5 * a; 69 v1 = -2.0 * a; 70 v0 = 1.5 * a; 71 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v2, ADD_VALUES)); 72 } else { 73 /* fall back to 1st order upwind */ 74 v1 = -1.0 * a; 75 v0 = 1.0 * a; 76 }; 77 if (j > 1) { 78 J = Ii - 1 * bs; 79 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v1, ADD_VALUES)); 80 } 81 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v0, ADD_VALUES)); 82 a /= 10.; /* use a different velocity for the next component */ 83 /* add a coupling to the previous and next components */ 84 v = 0.5; 85 if (b > 0) { 86 J = Ii - 1; 87 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 88 } 89 if (b < bs - 1) { 90 J = Ii + 1; 91 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES)); 92 } 93 } 94 /* make up some rhs */ 95 PetscCall(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES)); 96 rhsval += 1.0; 97 } 98 } 99 } 100 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 101 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 102 103 if (convert) { /* Test different Mat implementations */ 104 Mat B; 105 106 PetscCall(MatConvert(A, convname, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatDestroy(&A)); 108 A = B; 109 } 110 111 PetscCall(VecAssemblyBegin(rhs)); 112 PetscCall(VecAssemblyEnd(rhs)); 113 /* set rhs to zero to simplify */ 114 if (zerorhs) PetscCall(VecZeroEntries(rhs)); 115 116 if (nonlocalBC) { 117 /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 118 if (rank == 0) { 119 nboundary_nodes = size > m ? nlocal : m - size + nlocal; 120 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 121 k = 0; 122 for (i = size; i < m; i++, k++) { boundary_nodes[k] = n * i; }; 123 } else if (rank < m) { 124 nboundary_nodes = nlocal + 1; 125 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 126 boundary_nodes[0] = rank * n; 127 k = 1; 128 } else { 129 nboundary_nodes = nlocal; 130 PetscCall(PetscMalloc1(nboundary_nodes, &boundary_nodes)); 131 k = 0; 132 } 133 for (j = nlocal * rank; j < nlocal * (rank + 1); j++, k++) { boundary_nodes[k] = j; }; 134 } else { 135 /*version where boundary conditions are set by the node owners only */ 136 PetscCall(PetscMalloc1(m * n, &boundary_nodes)); 137 k = 0; 138 for (j = 0; j < n; j++) { 139 Ii = j; 140 if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 141 } 142 for (i = 1; i < m; i++) { 143 Ii = n * i; 144 if (Ii >= rank * m * nlocal && Ii < (rank + 1) * m * nlocal) boundary_nodes[k++] = Ii; 145 } 146 nboundary_nodes = k; 147 } 148 149 PetscCall(VecDuplicate(rhs, &x)); 150 PetscCall(VecZeroEntries(x)); 151 PetscCall(PetscMalloc2(nboundary_nodes * bs, &boundary_indices, nboundary_nodes * bs, &boundary_values)); 152 for (k = 0; k < nboundary_nodes; k++) { 153 Ii = boundary_nodes[k] * bs; 154 v = 1.0 * boundary_nodes[k]; 155 for (b = 0; b < bs; b++, Ii++) { 156 boundary_indices[k * bs + b] = Ii; 157 boundary_values[k * bs + b] = v; 158 PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v))); 159 v += 0.1; 160 } 161 } 162 PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL)); 163 PetscCall(VecSetValues(x, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 164 PetscCall(VecAssemblyBegin(x)); 165 PetscCall(VecAssemblyEnd(x)); 166 167 /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 168 PetscCall(VecDuplicate(x, &y)); 169 PetscCall(MatMult(A, x, y)); 170 PetscCall(VecAYPX(y, -1.0, rhs)); 171 for (k = 0; k < nboundary_nodes * bs; k++) boundary_values[k] *= diag; 172 PetscCall(VecSetValues(y, nboundary_nodes * bs, boundary_indices, boundary_values, INSERT_VALUES)); 173 PetscCall(VecAssemblyBegin(y)); 174 PetscCall(VecAssemblyEnd(y)); 175 176 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n")); 177 PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 178 PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 179 180 PetscCall(MatZeroRowsColumns(A, nboundary_nodes * bs, boundary_indices, diag, x, rhs)); 181 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n")); 182 PetscCall(VecView(rhs, PETSC_VIEWER_STDOUT_WORLD)); 183 PetscCall(VecAXPY(y, -1.0, rhs)); 184 PetscCall(VecNorm(y, NORM_INFINITY, &norm)); 185 if (norm > 1.0e-10) { 186 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm)); 187 PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 188 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 189 } 190 191 PetscCall(PetscFree(boundary_nodes)); 192 PetscCall(PetscFree2(boundary_indices, boundary_values)); 193 PetscCall(VecDestroy(&x)); 194 PetscCall(VecDestroy(&y)); 195 PetscCall(VecDestroy(&rhs)); 196 PetscCall(MatDestroy(&A)); 197 198 PetscCall(PetscFinalize()); 199 return 0; 200 } 201 202 /*TEST 203 204 test: 205 diff_args: -j 206 suffix: 0 207 208 test: 209 diff_args: -j 210 suffix: 1 211 nsize: 2 212 213 test: 214 diff_args: -j 215 suffix: 10 216 nsize: 2 217 args: -bs 2 -nonlocal_bc 218 219 test: 220 diff_args: -j 221 suffix: 11 222 nsize: 7 223 args: -bs 2 -nonlocal_bc 224 225 test: 226 diff_args: -j 227 suffix: 12 228 args: -bs 2 -nonlocal_bc -mat_type baij 229 230 test: 231 diff_args: -j 232 suffix: 13 233 nsize: 2 234 args: -bs 2 -nonlocal_bc -mat_type baij 235 236 test: 237 diff_args: -j 238 suffix: 14 239 nsize: 7 240 args: -bs 2 -nonlocal_bc -mat_type baij 241 242 test: 243 diff_args: -j 244 suffix: 2 245 nsize: 7 246 247 test: 248 diff_args: -j 249 suffix: 3 250 args: -mat_type baij 251 252 test: 253 diff_args: -j 254 suffix: 4 255 nsize: 2 256 args: -mat_type baij 257 258 test: 259 diff_args: -j 260 suffix: 5 261 nsize: 7 262 args: -mat_type baij 263 264 test: 265 diff_args: -j 266 suffix: 6 267 args: -bs 2 -mat_type baij 268 269 test: 270 diff_args: -j 271 suffix: 7 272 nsize: 2 273 args: -bs 2 -mat_type baij 274 275 test: 276 diff_args: -j 277 suffix: 8 278 nsize: 7 279 args: -bs 2 -mat_type baij 280 281 test: 282 diff_args: -j 283 suffix: 9 284 args: -bs 2 -nonlocal_bc 285 286 test: 287 diff_args: -j 288 suffix: 15 289 args: -bs 2 -nonlocal_bc -convname shell 290 291 test: 292 diff_args: -j 293 suffix: 16 294 nsize: 2 295 args: -bs 2 -nonlocal_bc -convname shell 296 297 test: 298 diff_args: -j 299 suffix: 17 300 args: -bs 2 -nonlocal_bc -convname dense 301 302 testset: 303 diff_args: -j 304 suffix: full 305 nsize: {{1 3}separate output} 306 args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 307 308 test: 309 diff_args: -j 310 requires: cuda 311 suffix: cusparse_1 312 nsize: 1 313 args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 314 315 test: 316 diff_args: -j 317 requires: cuda 318 suffix: cusparse_3 319 nsize: 3 320 args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 321 322 TEST*/ 323