xref: /petsc/src/mat/tests/ex18.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 static char help[] = "Tests the use of MatZeroRowsColumns() for parallel matrices.\n\
2 Contributed-by: Stephan Kramer <s.kramer@imperial.ac.uk>\n\n";
3 
4 #include <petscmat.h>
5 
6 int main(int argc,char **args)
7 {
8   Mat            A;
9   Vec            x, rhs, y;
10   PetscInt       i,j,k,b,m = 3,n,nlocal=2,bs=1,Ii,J;
11   PetscInt       *boundary_nodes, nboundary_nodes, *boundary_indices;
12   PetscMPIInt    rank,size;
13   PetscScalar    v,v0,v1,v2,a0=0.1,a,rhsval, *boundary_values,diag = 1.0;
14   PetscReal      norm;
15   char           convname[64];
16   PetscBool      upwind = PETSC_FALSE, nonlocalBC = PETSC_FALSE, zerorhs = PETSC_TRUE, convert = PETSC_FALSE;
17 
18   PetscFunctionBeginUser;
19   PetscCall(PetscInitialize(&argc,&args,(char*)0,help));
20   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank));
21   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
22   n = nlocal*size;
23 
24   PetscCall(PetscOptionsGetInt(NULL,NULL, "-bs", &bs, NULL));
25   PetscCall(PetscOptionsGetBool(NULL,NULL, "-nonlocal_bc", &nonlocalBC, NULL));
26   PetscCall(PetscOptionsGetScalar(NULL,NULL, "-diag", &diag, NULL));
27   PetscCall(PetscOptionsGetString(NULL,NULL,"-convname",convname,sizeof(convname),&convert));
28   PetscCall(PetscOptionsGetBool(NULL,NULL, "-zerorhs", &zerorhs, NULL));
29 
30   PetscCall(MatCreate(PETSC_COMM_WORLD,&A));
31   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n*bs,m*n*bs));
32   PetscCall(MatSetFromOptions(A));
33   PetscCall(MatSetUp(A));
34 
35   PetscCall(MatCreateVecs(A, NULL, &rhs));
36   PetscCall(VecSetFromOptions(rhs));
37   PetscCall(VecSetUp(rhs));
38 
39   rhsval = 0.0;
40   for (i=0; i<m; i++) {
41     for (j=nlocal*rank; j<nlocal*(rank+1); j++) {
42       a = a0;
43       for (b=0; b<bs; b++) {
44         /* let's start with a 5-point stencil diffusion term */
45         v = -1.0;  Ii = (j + n*i)*bs + b;
46         if (i>0)   {J = Ii - n*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
47         if (i<m-1) {J = Ii + n*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
48         if (j>0)   {J = Ii - 1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
49         if (j<n-1) {J = Ii + 1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
50         v = 4.0; PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES));
51         if (upwind) {
52           /* now add a 2nd order upwind advection term to add a little asymmetry */
53           if (j>2) {
54             J = Ii-2*bs; v2 = 0.5*a; v1 = -2.0*a; v0 = 1.5*a;
55             PetscCall(MatSetValues(A,1,&Ii,1,&J,&v2,ADD_VALUES));
56           } else {
57             /* fall back to 1st order upwind */
58             v1 = -1.0*a; v0 = 1.0*a;
59           };
60           if (j>1) {J = Ii-1*bs; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v1,ADD_VALUES));}
61           PetscCall(MatSetValues(A,1,&Ii,1,&Ii,&v0,ADD_VALUES));
62           a /= 10.; /* use a different velocity for the next component */
63           /* add a coupling to the previous and next components */
64           v = 0.5;
65           if (b>0) {J = Ii - 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
66           if (b<bs-1) {J = Ii + 1; PetscCall(MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES));}
67         }
68         /* make up some rhs */
69         PetscCall(VecSetValue(rhs, Ii, rhsval, INSERT_VALUES));
70         rhsval += 1.0;
71       }
72     }
73   }
74   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
75   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
76 
77   if (convert) { /* Test different Mat implementations */
78     Mat B;
79 
80     PetscCall(MatConvert(A,convname,MAT_INITIAL_MATRIX,&B));
81     PetscCall(MatDestroy(&A));
82     A    = B;
83   }
84 
85   PetscCall(VecAssemblyBegin(rhs));
86   PetscCall(VecAssemblyEnd(rhs));
87   /* set rhs to zero to simplify */
88   if (zerorhs) PetscCall(VecZeroEntries(rhs));
89 
90   if (nonlocalBC) {
91     /*version where boundary conditions are set by processes that don't necessarily own the nodes */
92     if (rank == 0) {
93       nboundary_nodes = size>m ? nlocal : m-size+nlocal;
94       PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes));
95       k = 0;
96       for (i=size; i<m; i++,k++) {boundary_nodes[k] = n*i;};
97     } else if (rank < m) {
98       nboundary_nodes = nlocal+1;
99       PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes));
100       boundary_nodes[0] = rank*n;
101       k = 1;
102     } else {
103       nboundary_nodes = nlocal;
104       PetscCall(PetscMalloc1(nboundary_nodes,&boundary_nodes));
105       k = 0;
106     }
107     for (j=nlocal*rank; j<nlocal*(rank+1); j++,k++) {boundary_nodes[k] = j;};
108   } else {
109     /*version where boundary conditions are set by the node owners only */
110     PetscCall(PetscMalloc1(m*n,&boundary_nodes));
111     k=0;
112     for (j=0; j<n; j++) {
113       Ii = j;
114       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
115     }
116     for (i=1; i<m; i++) {
117       Ii = n*i;
118       if (Ii>=rank*m*nlocal && Ii<(rank+1)*m*nlocal) boundary_nodes[k++] = Ii;
119     }
120     nboundary_nodes = k;
121   }
122 
123   PetscCall(VecDuplicate(rhs, &x));
124   PetscCall(VecZeroEntries(x));
125   PetscCall(PetscMalloc2(nboundary_nodes*bs,&boundary_indices,nboundary_nodes*bs,&boundary_values));
126   for (k=0; k<nboundary_nodes; k++) {
127     Ii = boundary_nodes[k]*bs;
128     v = 1.0*boundary_nodes[k];
129     for (b=0; b<bs; b++, Ii++) {
130       boundary_indices[k*bs+b] = Ii;
131       boundary_values[k*bs+b] = v;
132       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "%d %" PetscInt_FMT " %f\n", rank, Ii, (double)PetscRealPart(v)));
133       v += 0.1;
134     }
135   }
136   PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, NULL));
137   PetscCall(VecSetValues(x, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES));
138   PetscCall(VecAssemblyBegin(x));
139   PetscCall(VecAssemblyEnd(x));
140 
141   /* We can check the rhs returned by MatZeroColumns by computing y=rhs-A*x  and overwriting the boundary entries with boundary values */
142   PetscCall(VecDuplicate(x, &y));
143   PetscCall(MatMult(A, x, y));
144   PetscCall(VecAYPX(y, -1.0, rhs));
145   for (k=0; k<nboundary_nodes*bs; k++) boundary_values[k] *= diag;
146   PetscCall(VecSetValues(y, nboundary_nodes*bs, boundary_indices, boundary_values, INSERT_VALUES));
147   PetscCall(VecAssemblyBegin(y));
148   PetscCall(VecAssemblyEnd(y));
149 
150   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Matrix A and vector x:\n"));
151   PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD));
152   PetscCall(VecView(x,PETSC_VIEWER_STDOUT_WORLD));
153 
154   PetscCall(MatZeroRowsColumns(A, nboundary_nodes*bs, boundary_indices, diag, x, rhs));
155   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Vector rhs returned by MatZeroRowsColumns\n"));
156   PetscCall(VecView(rhs,PETSC_VIEWER_STDOUT_WORLD));
157   PetscCall(VecAXPY(y, -1.0, rhs));
158   PetscCall(VecNorm(y, NORM_INFINITY, &norm));
159   if (norm > 1.0e-10) {
160     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "*** Difference between rhs and y, inf-norm: %f\n", (double)norm));
161     PetscCall(VecView(y,PETSC_VIEWER_STDOUT_WORLD));
162     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Bug in MatZeroRowsColumns");
163   }
164 
165   PetscCall(PetscFree(boundary_nodes));
166   PetscCall(PetscFree2(boundary_indices,boundary_values));
167   PetscCall(VecDestroy(&x));
168   PetscCall(VecDestroy(&y));
169   PetscCall(VecDestroy(&rhs));
170   PetscCall(MatDestroy(&A));
171 
172   PetscCall(PetscFinalize());
173   return 0;
174 }
175 
176 /*TEST
177 
178    test:
179       diff_args: -j
180       suffix: 0
181 
182    test:
183       diff_args: -j
184       suffix: 1
185       nsize: 2
186 
187    test:
188       diff_args: -j
189       suffix: 10
190       nsize: 2
191       args: -bs 2 -nonlocal_bc
192 
193    test:
194       diff_args: -j
195       suffix: 11
196       nsize: 7
197       args: -bs 2 -nonlocal_bc
198 
199    test:
200       diff_args: -j
201       suffix: 12
202       args: -bs 2 -nonlocal_bc -mat_type baij
203 
204    test:
205       diff_args: -j
206       suffix: 13
207       nsize: 2
208       args: -bs 2 -nonlocal_bc -mat_type baij
209 
210    test:
211       diff_args: -j
212       suffix: 14
213       nsize: 7
214       args: -bs 2 -nonlocal_bc -mat_type baij
215 
216    test:
217       diff_args: -j
218       suffix: 2
219       nsize: 7
220 
221    test:
222       diff_args: -j
223       suffix: 3
224       args: -mat_type baij
225 
226    test:
227       diff_args: -j
228       suffix: 4
229       nsize: 2
230       args: -mat_type baij
231 
232    test:
233       diff_args: -j
234       suffix: 5
235       nsize: 7
236       args: -mat_type baij
237 
238    test:
239       diff_args: -j
240       suffix: 6
241       args: -bs 2 -mat_type baij
242 
243    test:
244       diff_args: -j
245       suffix: 7
246       nsize: 2
247       args: -bs 2 -mat_type baij
248 
249    test:
250       diff_args: -j
251       suffix: 8
252       nsize: 7
253       args: -bs 2 -mat_type baij
254 
255    test:
256       diff_args: -j
257       suffix: 9
258       args: -bs 2 -nonlocal_bc
259 
260    test:
261       diff_args: -j
262       suffix: 15
263       args: -bs 2 -nonlocal_bc -convname shell
264 
265    test:
266       diff_args: -j
267       suffix: 16
268       nsize: 2
269       args: -bs 2 -nonlocal_bc -convname shell
270 
271    test:
272       diff_args: -j
273       suffix: 17
274       args: -bs 2 -nonlocal_bc -convname dense
275 
276    testset:
277       diff_args: -j
278       suffix: full
279       nsize: {{1 3}separate output}
280       args: -diag {{0.12 -0.13}separate output} -convname {{aij shell baij}separate output} -zerorhs 0
281 
282    test:
283       diff_args: -j
284       requires: cuda
285       suffix: cusparse_1
286       nsize: 1
287       args: -diag {{0.12 -0.13}separate output} -convname {{seqaijcusparse mpiaijcusparse}separate output} -zerorhs 0 -mat_type {{seqaijcusparse mpiaijcusparse}separate output}
288 
289    test:
290       diff_args: -j
291       requires: cuda
292       suffix: cusparse_3
293       nsize: 3
294       args: -diag {{0.12 -0.13}separate output} -convname mpiaijcusparse -zerorhs 0 -mat_type mpiaijcusparse
295 
296 TEST*/
297