1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc,char **args) 7 { 8 Mat A; 9 Vec x, rhs, y; 10 PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J; 11 PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 12 PetscMPIInt rank,size; 13 PetscErrorCode ierr; 14 PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0; 15 PetscReal norm; 16 char convname[64]; 17 PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 18 19 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 20 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 21 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); 22 n = nlocal*size; 23 24 ierr = PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);CHKERRQ(ierr); 25 ierr = PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);CHKERRQ(ierr); 28 ierr = PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);CHKERRQ(ierr); 29 30 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 31 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);CHKERRQ(ierr); 32 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 33 ierr = MatSetUp(A);CHKERRQ(ierr); 34 35 ierr = MatCreateVecs(A, NULL, &rhs);CHKERRQ(ierr); 36 ierr = VecSetFromOptions(rhs);CHKERRQ(ierr); 37 ierr = VecSetUp(rhs);CHKERRQ(ierr); 38 39 rhsval = 0.0; 40 for (i=0; i<m; i++) { 41 for (j=nlocal*rank; j<nlocal*(rank+1); j++) { 42 a = a0; 43 for (b=0; b<bs; b++) { 44 /* let's start with a 5-point stencil diffusion term */ 45 v = -1.0; Ii = (j + n*i)*bs + b; 46 if (i>0) {J = Ii - n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 47 if (i<m-1) {J = Ii + n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 48 if (j>0) {J = Ii - 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 49 if (j<n-1) {J = Ii + 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 50 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); 51 if (upwind) { 52 /* now add a 2nd order upwind advection term to add a little asymmetry */ 53 if (j>2) { 54 J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a; 55 ierr = MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);CHKERRQ(ierr); 56 } else { 57 /* fall back to 1st order upwind */ 58 v1 = -1.0*a; v0 = 1.0*a; 59 }; 60 if (j>1) {J = Ii-1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);CHKERRQ(ierr);} 61 ierr = MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);CHKERRQ(ierr); 62 a /= 10.; /* use a different velocity for the next component */ 63 /* add a coupling to the previous and next components */ 64 v = 0.5; 65 if (b>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 66 if (b<bs-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 67 } 68 /* make up some rhs */ 69 ierr = VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);CHKERRQ(ierr); 70 rhsval += 1.0; 71 } 72 } 73 } 74 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 75 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 76 77 if (convert) { /* Test different Mat implementations */ 78 Mat B; 79 80 ierr = MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 81 ierr = MatDestroy(&A);CHKERRQ(ierr); 82 A = B; 83 } 84 85 ierr = VecAssemblyBegin(rhs);CHKERRQ(ierr); 86 ierr = VecAssemblyEnd(rhs);CHKERRQ(ierr); 87 /* set rhs to zero to simplify */ 88 if (zerorhs) { 89 ierr = VecZeroEntries(rhs);CHKERRQ(ierr); 90 } 91 92 if (nonlocalBC) { 93 /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 94 if (!rank) { 95 nboundary_nodes = size>m ? nlocal : m-size+nlocal; 96 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 97 k = 0; 98 for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;}; 99 } else if (rank < m) { 100 nboundary_nodes = nlocal+1; 101 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 102 boundary_nodes[0] = rank*n; 103 k = 1; 104 } else { 105 nboundary_nodes = nlocal; 106 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 107 k = 0; 108 } 109 for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;}; 110 } else { 111 /*version where boundary conditions are set by the node owners only */ 112 ierr = PetscMalloc1(m*n,&boundary_nodes);CHKERRQ(ierr); 113 k=0; 114 for (j=0; j<n; j++) { 115 Ii = j; 116 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 117 } 118 for (i=1; i<m; i++) { 119 Ii = n*i; 120 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 121 } 122 nboundary_nodes = k; 123 } 124 125 ierr = VecDuplicate(rhs, &x);CHKERRQ(ierr); 126 ierr = VecZeroEntries(x);CHKERRQ(ierr); 127 ierr = PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);CHKERRQ(ierr); 128 for (k=0; k<nboundary_nodes; k++) { 129 Ii = boundary_nodes[k]*bs; 130 v = 1.0*boundary_nodes[k]; 131 for (b=0; b<bs; b++, Ii++) { 132 boundary_indices[k*bs+b] = Ii; 133 boundary_values[k*bs+b] = v; 134 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %D %f\n", rank, Ii, (double)PetscRealPart(v));CHKERRQ(ierr); 135 v += 0.1; 136 } 137 } 138 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);CHKERRQ(ierr); 139 ierr = VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);CHKERRQ(ierr); 140 ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 141 ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 142 143 /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 144 ierr = VecDuplicate(x, &y);CHKERRQ(ierr); 145 ierr = MatMult(A, x, y);CHKERRQ(ierr); 146 ierr = VecAYPX(y, -1.0, rhs);CHKERRQ(ierr); 147 for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag; 148 ierr = VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);CHKERRQ(ierr); 149 ierr = VecAssemblyBegin(y);CHKERRQ(ierr); 150 ierr = VecAssemblyEnd(y);CHKERRQ(ierr); 151 152 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");CHKERRQ(ierr); 153 ierr = MatView(A, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 154 ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 155 156 ierr = MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);CHKERRQ(ierr); 157 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");CHKERRQ(ierr); 158 ierr = VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 159 ierr = VecAXPY(y, -1.0, rhs);CHKERRQ(ierr); 160 ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr); 161 if (norm > 1.0e-10) { 162 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);CHKERRQ(ierr); 163 ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 164 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 165 } 166 167 ierr = PetscFree(boundary_nodes);CHKERRQ(ierr); 168 ierr = PetscFree2(boundary_indices,boundary_values);CHKERRQ(ierr); 169 ierr = VecDestroy(&x);CHKERRQ(ierr); 170 ierr = VecDestroy(&y);CHKERRQ(ierr); 171 ierr = VecDestroy(&rhs);CHKERRQ(ierr); 172 ierr = MatDestroy(&A);CHKERRQ(ierr); 173 174 ierr = PetscFinalize(); 175 return ierr; 176 } 177 178 179 /*TEST 180 181 test: 182 diff_args: -j 183 suffix: 0 184 185 test: 186 diff_args: -j 187 suffix: 1 188 nsize: 2 189 190 test: 191 diff_args: -j 192 suffix: 10 193 nsize: 2 194 args: -bs 2 -nonlocal_bc 195 196 test: 197 diff_args: -j 198 suffix: 11 199 nsize: 7 200 args: -bs 2 -nonlocal_bc 201 202 test: 203 diff_args: -j 204 suffix: 12 205 args: -bs 2 -nonlocal_bc -mat_type baij 206 207 test: 208 diff_args: -j 209 suffix: 13 210 nsize: 2 211 args: -bs 2 -nonlocal_bc -mat_type baij 212 213 test: 214 diff_args: -j 215 suffix: 14 216 nsize: 7 217 args: -bs 2 -nonlocal_bc -mat_type baij 218 219 test: 220 diff_args: -j 221 suffix: 2 222 nsize: 7 223 224 test: 225 diff_args: -j 226 suffix: 3 227 args: -mat_type baij 228 229 test: 230 diff_args: -j 231 suffix: 4 232 nsize: 2 233 args: -mat_type baij 234 235 test: 236 diff_args: -j 237 suffix: 5 238 nsize: 7 239 args: -mat_type baij 240 241 test: 242 diff_args: -j 243 suffix: 6 244 args: -bs 2 -mat_type baij 245 246 test: 247 diff_args: -j 248 suffix: 7 249 nsize: 2 250 args: -bs 2 -mat_type baij 251 252 test: 253 diff_args: -j 254 suffix: 8 255 nsize: 7 256 args: -bs 2 -mat_type baij 257 258 test: 259 diff_args: -j 260 suffix: 9 261 args: -bs 2 -nonlocal_bc 262 263 test: 264 diff_args: -j 265 suffix: 15 266 args: -bs 2 -nonlocal_bc -convname shell 267 268 test: 269 diff_args: -j 270 suffix: 16 271 nsize: 2 272 args: -bs 2 -nonlocal_bc -convname shell 273 274 test: 275 diff_args: -j 276 suffix: 17 277 args: -bs 2 -nonlocal_bc -convname dense 278 279 testset: 280 diff_args: -j 281 suffix: full 282 nsize: {{1 3}separate output} 283 args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 284 285 test: 286 diff_args: -j 287 requires: cuda 288 suffix: cusparse_1 289 nsize: 1 290 args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 291 292 test: 293 diff_args: -j 294 requires: cuda 295 suffix: cusparse_3 296 nsize: 3 297 args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 298 299 TEST*/ 300