xref: /petsc/src/mat/tests/ex226.c (revision 732aec7a18f2199fb53bb9a2f3aef439a834ce31)
1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2 
3 #include <petscmat.h>
4 
5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
global_index(PetscInt i,PetscInt j,PetscInt k,PetscInt m,PetscInt n)6 PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7 {
8   return i + j * m + k * m * n;
9 }
10 
main(int argc,char ** argv)11 int main(int argc, char **argv)
12 {
13   Mat           A, B, C, PtAP, PtAP_copy, PtAP_squared;
14   PetscInt      i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15   PetscScalar   v;
16   PetscBool     equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17   char          stencil[PETSC_MAX_PATH_LEN];
18   PetscLogStage fullMatMatMultStage;
19 
20   PetscFunctionBeginUser;
21   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
22   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
23   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
24   PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
25   PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
26   PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
27 
28   /* Create a aij matrix A */
29   M = N = m * n * o;
30   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
31   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
32   PetscCall(MatSetType(A, MATAIJ));
33   PetscCall(MatSetFromOptions(A));
34 
35   /* Consistency checks */
36   PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
37 
38   /************ 2D stencils ***************/
39   PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
40   if (equal) { /* 5-point stencil, 2D */
41     PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
42     PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
43     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
44     for (Ii = Istart; Ii < Iend; Ii++) {
45       v = -1.0;
46       k = Ii / (m * n);
47       j = (Ii - k * m * n) / m;
48       i = (Ii - k * m * n - j * m);
49       if (i > 0) {
50         J = global_index(i - 1, j, k, m, n);
51         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
52       }
53       if (i < m - 1) {
54         J = global_index(i + 1, j, k, m, n);
55         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
56       }
57       if (j > 0) {
58         J = global_index(i, j - 1, k, m, n);
59         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
60       }
61       if (j < n - 1) {
62         J = global_index(i, j + 1, k, m, n);
63         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
64       }
65       v = 4.0;
66       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
67     }
68   }
69   PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
70   if (equal) { /* 9-point stencil, 2D */
71     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
72     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
73     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
74     for (Ii = Istart; Ii < Iend; Ii++) {
75       v = -1.0;
76       k = Ii / (m * n);
77       j = (Ii - k * m * n) / m;
78       i = (Ii - k * m * n - j * m);
79       if (i > 0) {
80         J = global_index(i - 1, j, k, m, n);
81         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
82       }
83       if (i > 0 && j > 0) {
84         J = global_index(i - 1, j - 1, k, m, n);
85         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
86       }
87       if (j > 0) {
88         J = global_index(i, j - 1, k, m, n);
89         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
90       }
91       if (i < m - 1 && j > 0) {
92         J = global_index(i + 1, j - 1, k, m, n);
93         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
94       }
95       if (i < m - 1) {
96         J = global_index(i + 1, j, k, m, n);
97         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
98       }
99       if (i < m - 1 && j < n - 1) {
100         J = global_index(i + 1, j + 1, k, m, n);
101         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
102       }
103       if (j < n - 1) {
104         J = global_index(i, j + 1, k, m, n);
105         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
106       }
107       if (i > 0 && j < n - 1) {
108         J = global_index(i - 1, j + 1, k, m, n);
109         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
110       }
111       v = 8.0;
112       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113     }
114   }
115   PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
116   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
117     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
118     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
119     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
120     for (Ii = Istart; Ii < Iend; Ii++) {
121       v = -1.0;
122       k = Ii / (m * n);
123       j = (Ii - k * m * n) / m;
124       i = (Ii - k * m * n - j * m);
125       if (i > 0) {
126         J = global_index(i - 1, j, k, m, n);
127         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
128       }
129       if (i > 1) {
130         J = global_index(i - 2, j, k, m, n);
131         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
132       }
133       if (i < m - 1) {
134         J = global_index(i + 1, j, k, m, n);
135         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
136       }
137       if (i < m - 2) {
138         J = global_index(i + 2, j, k, m, n);
139         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
140       }
141       if (j > 0) {
142         J = global_index(i, j - 1, k, m, n);
143         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
144       }
145       if (j > 1) {
146         J = global_index(i, j - 2, k, m, n);
147         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
148       }
149       if (j < n - 1) {
150         J = global_index(i, j + 1, k, m, n);
151         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
152       }
153       if (j < n - 2) {
154         J = global_index(i, j + 2, k, m, n);
155         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
156       }
157       v = 8.0;
158       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
159     }
160   }
161   PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
162   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
163     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
164     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
165     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
166     for (Ii = Istart; Ii < Iend; Ii++) {
167       v = -1.0;
168       k = Ii / (m * n);
169       j = (Ii - k * m * n) / m;
170       i = (Ii - k * m * n - j * m);
171       if (i > 0) {
172         J = global_index(i - 1, j, k, m, n);
173         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
174       }
175       if (i > 1) {
176         J = global_index(i - 2, j, k, m, n);
177         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
178       }
179       if (i > 2) {
180         J = global_index(i - 3, j, k, m, n);
181         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
182       }
183       if (i < m - 1) {
184         J = global_index(i + 1, j, k, m, n);
185         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
186       }
187       if (i < m - 2) {
188         J = global_index(i + 2, j, k, m, n);
189         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
190       }
191       if (i < m - 3) {
192         J = global_index(i + 3, j, k, m, n);
193         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
194       }
195       if (j > 0) {
196         J = global_index(i, j - 1, k, m, n);
197         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
198       }
199       if (j > 1) {
200         J = global_index(i, j - 2, k, m, n);
201         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
202       }
203       if (j > 2) {
204         J = global_index(i, j - 3, k, m, n);
205         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
206       }
207       if (j < n - 1) {
208         J = global_index(i, j + 1, k, m, n);
209         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
210       }
211       if (j < n - 2) {
212         J = global_index(i, j + 2, k, m, n);
213         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
214       }
215       if (j < n - 3) {
216         J = global_index(i, j + 3, k, m, n);
217         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
218       }
219       v = 12.0;
220       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
221     }
222   }
223   /************ 3D stencils ***************/
224   PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
225   if (equal) { /* 7-point stencil, 3D */
226     PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
227     PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
228     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
229     for (Ii = Istart; Ii < Iend; Ii++) {
230       v = -1.0;
231       k = Ii / (m * n);
232       j = (Ii - k * m * n) / m;
233       i = (Ii - k * m * n - j * m);
234       if (i > 0) {
235         J = global_index(i - 1, j, k, m, n);
236         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
237       }
238       if (i < m - 1) {
239         J = global_index(i + 1, j, k, m, n);
240         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
241       }
242       if (j > 0) {
243         J = global_index(i, j - 1, k, m, n);
244         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
245       }
246       if (j < n - 1) {
247         J = global_index(i, j + 1, k, m, n);
248         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
249       }
250       if (k > 0) {
251         J = global_index(i, j, k - 1, m, n);
252         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
253       }
254       if (k < o - 1) {
255         J = global_index(i, j, k + 1, m, n);
256         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
257       }
258       v = 6.0;
259       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
260     }
261   }
262   PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
263   if (equal) { /* 13-point stencil, 3D */
264     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
265     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
266     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
267     for (Ii = Istart; Ii < Iend; Ii++) {
268       v = -1.0;
269       k = Ii / (m * n);
270       j = (Ii - k * m * n) / m;
271       i = (Ii - k * m * n - j * m);
272       if (i > 0) {
273         J = global_index(i - 1, j, k, m, n);
274         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
275       }
276       if (i > 1) {
277         J = global_index(i - 2, j, k, m, n);
278         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
279       }
280       if (i < m - 1) {
281         J = global_index(i + 1, j, k, m, n);
282         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
283       }
284       if (i < m - 2) {
285         J = global_index(i + 2, j, k, m, n);
286         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
287       }
288       if (j > 0) {
289         J = global_index(i, j - 1, k, m, n);
290         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
291       }
292       if (j > 1) {
293         J = global_index(i, j - 2, k, m, n);
294         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
295       }
296       if (j < n - 1) {
297         J = global_index(i, j + 1, k, m, n);
298         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
299       }
300       if (j < n - 2) {
301         J = global_index(i, j + 2, k, m, n);
302         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
303       }
304       if (k > 0) {
305         J = global_index(i, j, k - 1, m, n);
306         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
307       }
308       if (k > 1) {
309         J = global_index(i, j, k - 2, m, n);
310         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
311       }
312       if (k < o - 1) {
313         J = global_index(i, j, k + 1, m, n);
314         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
315       }
316       if (k < o - 2) {
317         J = global_index(i, j, k + 2, m, n);
318         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
319       }
320       v = 12.0;
321       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
322     }
323   }
324   PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
325   if (equal) { /* 19-point stencil, 3D */
326     PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
327     PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
328     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
329     for (Ii = Istart; Ii < Iend; Ii++) {
330       v = -1.0;
331       k = Ii / (m * n);
332       j = (Ii - k * m * n) / m;
333       i = (Ii - k * m * n - j * m);
334       /* one hop */
335       if (i > 0) {
336         J = global_index(i - 1, j, k, m, n);
337         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
338       }
339       if (i < m - 1) {
340         J = global_index(i + 1, j, k, m, n);
341         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
342       }
343       if (j > 0) {
344         J = global_index(i, j - 1, k, m, n);
345         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
346       }
347       if (j < n - 1) {
348         J = global_index(i, j + 1, k, m, n);
349         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
350       }
351       if (k > 0) {
352         J = global_index(i, j, k - 1, m, n);
353         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
354       }
355       if (k < o - 1) {
356         J = global_index(i, j, k + 1, m, n);
357         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
358       }
359       /* two hops */
360       if (i > 0 && j > 0) {
361         J = global_index(i - 1, j - 1, k, m, n);
362         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
363       }
364       if (i > 0 && k > 0) {
365         J = global_index(i - 1, j, k - 1, m, n);
366         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
367       }
368       if (i > 0 && j < n - 1) {
369         J = global_index(i - 1, j + 1, k, m, n);
370         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
371       }
372       if (i > 0 && k < o - 1) {
373         J = global_index(i - 1, j, k + 1, m, n);
374         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
375       }
376       if (i < m - 1 && j > 0) {
377         J = global_index(i + 1, j - 1, k, m, n);
378         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
379       }
380       if (i < m - 1 && k > 0) {
381         J = global_index(i + 1, j, k - 1, m, n);
382         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
383       }
384       if (i < m - 1 && j < n - 1) {
385         J = global_index(i + 1, j + 1, k, m, n);
386         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
387       }
388       if (i < m - 1 && k < o - 1) {
389         J = global_index(i + 1, j, k + 1, m, n);
390         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
391       }
392       if (j > 0 && k > 0) {
393         J = global_index(i, j - 1, k - 1, m, n);
394         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
395       }
396       if (j > 0 && k < o - 1) {
397         J = global_index(i, j - 1, k + 1, m, n);
398         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
399       }
400       if (j < n - 1 && k > 0) {
401         J = global_index(i, j + 1, k - 1, m, n);
402         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
403       }
404       if (j < n - 1 && k < o - 1) {
405         J = global_index(i, j + 1, k + 1, m, n);
406         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
407       }
408       v = 18.0;
409       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
410     }
411   }
412   PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
413   if (equal) { /* 27-point stencil, 3D */
414     PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
415     PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
416     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
417     for (Ii = Istart; Ii < Iend; Ii++) {
418       v = -1.0;
419       k = Ii / (m * n);
420       j = (Ii - k * m * n) / m;
421       i = (Ii - k * m * n - j * m);
422       if (k > 0) {
423         if (j > 0) {
424           if (i > 0) {
425             J = global_index(i - 1, j - 1, k - 1, m, n);
426             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
427           }
428           J = global_index(i, j - 1, k - 1, m, n);
429           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
430           if (i < m - 1) {
431             J = global_index(i + 1, j - 1, k - 1, m, n);
432             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
433           }
434         }
435         {
436           if (i > 0) {
437             J = global_index(i - 1, j, k - 1, m, n);
438             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
439           }
440           J = global_index(i, j, k - 1, m, n);
441           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
442           if (i < m - 1) {
443             J = global_index(i + 1, j, k - 1, m, n);
444             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
445           }
446         }
447         if (j < n - 1) {
448           if (i > 0) {
449             J = global_index(i - 1, j + 1, k - 1, m, n);
450             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
451           }
452           J = global_index(i, j + 1, k - 1, m, n);
453           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
454           if (i < m - 1) {
455             J = global_index(i + 1, j + 1, k - 1, m, n);
456             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
457           }
458         }
459       }
460       {
461         if (j > 0) {
462           if (i > 0) {
463             J = global_index(i - 1, j - 1, k, m, n);
464             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
465           }
466           J = global_index(i, j - 1, k, m, n);
467           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
468           if (i < m - 1) {
469             J = global_index(i + 1, j - 1, k, m, n);
470             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
471           }
472         }
473         {
474           if (i > 0) {
475             J = global_index(i - 1, j, k, m, n);
476             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
477           }
478           J = global_index(i, j, k, m, n);
479           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
480           if (i < m - 1) {
481             J = global_index(i + 1, j, k, m, n);
482             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
483           }
484         }
485         if (j < n - 1) {
486           if (i > 0) {
487             J = global_index(i - 1, j + 1, k, m, n);
488             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
489           }
490           J = global_index(i, j + 1, k, m, n);
491           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
492           if (i < m - 1) {
493             J = global_index(i + 1, j + 1, k, m, n);
494             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
495           }
496         }
497       }
498       if (k < o - 1) {
499         if (j > 0) {
500           if (i > 0) {
501             J = global_index(i - 1, j - 1, k + 1, m, n);
502             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
503           }
504           J = global_index(i, j - 1, k + 1, m, n);
505           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
506           if (i < m - 1) {
507             J = global_index(i + 1, j - 1, k + 1, m, n);
508             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
509           }
510         }
511         {
512           if (i > 0) {
513             J = global_index(i - 1, j, k + 1, m, n);
514             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
515           }
516           J = global_index(i, j, k + 1, m, n);
517           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
518           if (i < m - 1) {
519             J = global_index(i + 1, j, k + 1, m, n);
520             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
521           }
522         }
523         if (j < n - 1) {
524           if (i > 0) {
525             J = global_index(i - 1, j + 1, k + 1, m, n);
526             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
527           }
528           J = global_index(i, j + 1, k + 1, m, n);
529           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
530           if (i < m - 1) {
531             J = global_index(i + 1, j + 1, k + 1, m, n);
532             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
533           }
534         }
535       }
536       v = 26.0;
537       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
538     }
539   }
540   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
541   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
542 
543   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
544   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
545 
546   PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
547 
548   /* Test C = A*B */
549   PetscCall(PetscLogStagePush(fullMatMatMultStage));
550   PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C));
551 
552   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
553   PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP));
554   PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
555   PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared));
556 
557   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
558   PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
559 
560   PetscCall(MatDestroy(&PtAP_squared));
561   PetscCall(MatDestroy(&PtAP_copy));
562   PetscCall(MatDestroy(&PtAP));
563   PetscCall(MatDestroy(&C));
564   PetscCall(MatDestroy(&B));
565   PetscCall(MatDestroy(&A));
566   PetscCall(PetscFinalize());
567   return 0;
568 }
569 
570 /*TEST
571 
572  test:
573       suffix: 1
574       nsize: 1
575       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
576 
577  test:
578        suffix: 2
579        nsize: 1
580        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
581 
582  test:
583       suffix: 3
584       nsize: 4
585       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
586 
587 TEST*/
588