1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\ 2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n"; 3 4 #include <petscmat.h> 5 6 int main(int argc,char **args) 7 { 8 Mat A; 9 Vec x, rhs, y; 10 PetscInt i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J; 11 PetscInt *boundary_nodes, nboundary_nodes, *boundary_indices; 12 PetscMPIInt rank,size; 13 PetscErrorCode ierr; 14 PetscScalar v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0; 15 PetscReal norm; 16 char convname[64]; 17 PetscBool upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE; 18 19 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 20 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRMPI(ierr); 21 ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 22 n = nlocal*size; 23 24 ierr = PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL);CHKERRQ(ierr); 25 ierr = PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL);CHKERRQ(ierr); 26 ierr = PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL);CHKERRQ(ierr); 27 ierr = PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert);CHKERRQ(ierr); 28 ierr = PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL);CHKERRQ(ierr); 29 30 ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); 31 ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs);CHKERRQ(ierr); 32 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 33 ierr = MatSetUp(A);CHKERRQ(ierr); 34 35 ierr = MatCreateVecs(A, NULL, &rhs);CHKERRQ(ierr); 36 ierr = VecSetFromOptions(rhs);CHKERRQ(ierr); 37 ierr = VecSetUp(rhs);CHKERRQ(ierr); 38 39 rhsval = 0.0; 40 for (i=0; i<m; i++) { 41 for (j=nlocal*rank; j<nlocal*(rank+1); j++) { 42 a = a0; 43 for (b=0; b<bs; b++) { 44 /* let's start with a 5-point stencil diffusion term */ 45 v = -1.0; Ii = (j + n*i)*bs + b; 46 if (i>0) {J = Ii - n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 47 if (i<m-1) {J = Ii + n*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 48 if (j>0) {J = Ii - 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 49 if (j<n-1) {J = Ii + 1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 50 v = 4.0; ierr = MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);CHKERRQ(ierr); 51 if (upwind) { 52 /* now add a 2nd order upwind advection term to add a little asymmetry */ 53 if (j>2) { 54 J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a; 55 ierr = MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES);CHKERRQ(ierr); 56 } else { 57 /* fall back to 1st order upwind */ 58 v1 = -1.0*a; v0 = 1.0*a; 59 }; 60 if (j>1) {J = Ii-1*bs; ierr = MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES);CHKERRQ(ierr);} 61 ierr = MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES);CHKERRQ(ierr); 62 a /= 10.; /* use a different velocity for the next component */ 63 /* add a coupling to the previous and next components */ 64 v = 0.5; 65 if (b>0) {J = Ii - 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 66 if (b<bs-1) {J = Ii + 1; ierr = MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);CHKERRQ(ierr);} 67 } 68 /* make up some rhs */ 69 ierr = VecSetValue(rhs, Ii, rhsval, INSERT_VALUES);CHKERRQ(ierr); 70 rhsval += 1.0; 71 } 72 } 73 } 74 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 75 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 76 77 if (convert) { /* Test different Mat implementations */ 78 Mat B; 79 80 ierr = MatConvert(A,convname,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 81 ierr = MatDestroy(&A);CHKERRQ(ierr); 82 A = B; 83 } 84 85 ierr = VecAssemblyBegin(rhs);CHKERRQ(ierr); 86 ierr = VecAssemblyEnd(rhs);CHKERRQ(ierr); 87 /* set rhs to zero to simplify */ 88 if (zerorhs) { 89 ierr = VecZeroEntries(rhs);CHKERRQ(ierr); 90 } 91 92 if (nonlocalBC) { 93 /*version where boundary conditions are set by processes that don't necessarily own the nodes */ 94 if (!rank) { 95 nboundary_nodes = size>m ? nlocal : m-size+nlocal; 96 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 97 k = 0; 98 for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;}; 99 } else if (rank < m) { 100 nboundary_nodes = nlocal+1; 101 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 102 boundary_nodes[0] = rank*n; 103 k = 1; 104 } else { 105 nboundary_nodes = nlocal; 106 ierr = PetscMalloc1(nboundary_nodes,&boundary_nodes);CHKERRQ(ierr); 107 k = 0; 108 } 109 for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;}; 110 } else { 111 /*version where boundary conditions are set by the node owners only */ 112 ierr = PetscMalloc1(m*n,&boundary_nodes);CHKERRQ(ierr); 113 k=0; 114 for (j=0; j<n; j++) { 115 Ii = j; 116 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 117 } 118 for (i=1; i<m; i++) { 119 Ii = n*i; 120 if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii; 121 } 122 nboundary_nodes = k; 123 } 124 125 ierr = VecDuplicate(rhs, &x);CHKERRQ(ierr); 126 ierr = VecZeroEntries(x);CHKERRQ(ierr); 127 ierr = PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values);CHKERRQ(ierr); 128 for (k=0; k<nboundary_nodes; k++) { 129 Ii = boundary_nodes[k]*bs; 130 v = 1.0*boundary_nodes[k]; 131 for (b=0; b<bs; b++, Ii++) { 132 boundary_indices[k*bs+b] = Ii; 133 boundary_values[k*bs+b] = v; 134 ierr = PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %D %f\n", rank, Ii, (double)PetscRealPart(v));CHKERRQ(ierr); 135 v += 0.1; 136 } 137 } 138 ierr = PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL);CHKERRQ(ierr); 139 ierr = VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);CHKERRQ(ierr); 140 ierr = VecAssemblyBegin(x);CHKERRQ(ierr); 141 ierr = VecAssemblyEnd(x);CHKERRQ(ierr); 142 143 /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x and overwriting the boundary entries with boundary values */ 144 ierr = VecDuplicate(x, &y);CHKERRQ(ierr); 145 ierr = MatMult(A, x, y);CHKERRQ(ierr); 146 ierr = VecAYPX(y, -1.0, rhs);CHKERRQ(ierr); 147 for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag; 148 ierr = VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES);CHKERRQ(ierr); 149 ierr = VecAssemblyBegin(y);CHKERRQ(ierr); 150 ierr = VecAssemblyEnd(y);CHKERRQ(ierr); 151 152 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n");CHKERRQ(ierr); 153 ierr = MatView(A, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 154 ierr = VecView(x,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 155 156 ierr = MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs);CHKERRQ(ierr); 157 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n");CHKERRQ(ierr); 158 ierr = VecView(rhs,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 159 ierr = VecAXPY(y, -1.0, rhs);CHKERRQ(ierr); 160 ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr); 161 if (norm > 1.0e-10) { 162 ierr = PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm);CHKERRQ(ierr); 163 ierr = VecView(y,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 164 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns"); 165 } 166 167 ierr = PetscFree(boundary_nodes);CHKERRQ(ierr); 168 ierr = PetscFree2(boundary_indices,boundary_values);CHKERRQ(ierr); 169 ierr = VecDestroy(&x);CHKERRQ(ierr); 170 ierr = VecDestroy(&y);CHKERRQ(ierr); 171 ierr = VecDestroy(&rhs);CHKERRQ(ierr); 172 ierr = MatDestroy(&A);CHKERRQ(ierr); 173 174 ierr = PetscFinalize(); 175 return ierr; 176 } 177 178 /*TEST 179 180 test: 181 diff_args: -j 182 suffix: 0 183 184 test: 185 diff_args: -j 186 suffix: 1 187 nsize: 2 188 189 test: 190 diff_args: -j 191 suffix: 10 192 nsize: 2 193 args: -bs 2 -nonlocal_bc 194 195 test: 196 diff_args: -j 197 suffix: 11 198 nsize: 7 199 args: -bs 2 -nonlocal_bc 200 201 test: 202 diff_args: -j 203 suffix: 12 204 args: -bs 2 -nonlocal_bc -mat_type baij 205 206 test: 207 diff_args: -j 208 suffix: 13 209 nsize: 2 210 args: -bs 2 -nonlocal_bc -mat_type baij 211 212 test: 213 diff_args: -j 214 suffix: 14 215 nsize: 7 216 args: -bs 2 -nonlocal_bc -mat_type baij 217 218 test: 219 diff_args: -j 220 suffix: 2 221 nsize: 7 222 223 test: 224 diff_args: -j 225 suffix: 3 226 args: -mat_type baij 227 228 test: 229 diff_args: -j 230 suffix: 4 231 nsize: 2 232 args: -mat_type baij 233 234 test: 235 diff_args: -j 236 suffix: 5 237 nsize: 7 238 args: -mat_type baij 239 240 test: 241 diff_args: -j 242 suffix: 6 243 args: -bs 2 -mat_type baij 244 245 test: 246 diff_args: -j 247 suffix: 7 248 nsize: 2 249 args: -bs 2 -mat_type baij 250 251 test: 252 diff_args: -j 253 suffix: 8 254 nsize: 7 255 args: -bs 2 -mat_type baij 256 257 test: 258 diff_args: -j 259 suffix: 9 260 args: -bs 2 -nonlocal_bc 261 262 test: 263 diff_args: -j 264 suffix: 15 265 args: -bs 2 -nonlocal_bc -convname shell 266 267 test: 268 diff_args: -j 269 suffix: 16 270 nsize: 2 271 args: -bs 2 -nonlocal_bc -convname shell 272 273 test: 274 diff_args: -j 275 suffix: 17 276 args: -bs 2 -nonlocal_bc -convname dense 277 278 testset: 279 diff_args: -j 280 suffix: full 281 nsize: {{1 3}separate output} 282 args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0 283 284 test: 285 diff_args: -j 286 requires: cuda 287 suffix: cusparse_1 288 nsize: 1 289 args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output} 290 291 test: 292 diff_args: -j 293 requires: cuda 294 suffix: cusparse_3 295 nsize: 3 296 args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse 297 298 TEST*/ 299