xref: /petsc/src/mat/tests/ex226.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2 
3 #include <petscmat.h>
4 
5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
6 int global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7 {
8   return i + j * m + k * m * n;
9 }
10 
11 int main(int argc, char **argv)
12 {
13   Mat         A, B, C, PtAP, PtAP_copy, PtAP_squared;
14   PetscInt    i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15   PetscScalar v;
16   PetscBool   equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17   char        stencil[PETSC_MAX_PATH_LEN];
18 #if defined(PETSC_USE_LOG)
19   PetscLogStage fullMatMatMultStage;
20 #endif
21 
22   PetscFunctionBeginUser;
23   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
24   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
25   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
26   PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
27   PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
28   PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
29 
30   /* Create a aij matrix A */
31   M = N = m * n * o;
32   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
33   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
34   PetscCall(MatSetType(A, MATAIJ));
35   PetscCall(MatSetFromOptions(A));
36 
37   /* Consistency checks */
38   PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
39 
40   /************ 2D stencils ***************/
41   PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
42   if (equal) { /* 5-point stencil, 2D */
43     PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
44     PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
45     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
46     for (Ii = Istart; Ii < Iend; Ii++) {
47       v = -1.0;
48       k = Ii / (m * n);
49       j = (Ii - k * m * n) / m;
50       i = (Ii - k * m * n - j * m);
51       if (i > 0) {
52         J = global_index(i - 1, j, k, m, n);
53         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
54       }
55       if (i < m - 1) {
56         J = global_index(i + 1, j, k, m, n);
57         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
58       }
59       if (j > 0) {
60         J = global_index(i, j - 1, k, m, n);
61         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
62       }
63       if (j < n - 1) {
64         J = global_index(i, j + 1, k, m, n);
65         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
66       }
67       v = 4.0;
68       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
69     }
70   }
71   PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
72   if (equal) { /* 9-point stencil, 2D */
73     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
74     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
75     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
76     for (Ii = Istart; Ii < Iend; Ii++) {
77       v = -1.0;
78       k = Ii / (m * n);
79       j = (Ii - k * m * n) / m;
80       i = (Ii - k * m * n - j * m);
81       if (i > 0) {
82         J = global_index(i - 1, j, k, m, n);
83         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
84       }
85       if (i > 0 && j > 0) {
86         J = global_index(i - 1, j - 1, k, m, n);
87         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
88       }
89       if (j > 0) {
90         J = global_index(i, j - 1, k, m, n);
91         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
92       }
93       if (i < m - 1 && j > 0) {
94         J = global_index(i + 1, j - 1, k, m, n);
95         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
96       }
97       if (i < m - 1) {
98         J = global_index(i + 1, j, k, m, n);
99         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
100       }
101       if (i < m - 1 && j < n - 1) {
102         J = global_index(i + 1, j + 1, k, m, n);
103         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
104       }
105       if (j < n - 1) {
106         J = global_index(i, j + 1, k, m, n);
107         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
108       }
109       if (i > 0 && j < n - 1) {
110         J = global_index(i - 1, j + 1, k, m, n);
111         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
112       }
113       v = 8.0;
114       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
115     }
116   }
117   PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
118   if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
119     PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
120     PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
121     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
122     for (Ii = Istart; Ii < Iend; Ii++) {
123       v = -1.0;
124       k = Ii / (m * n);
125       j = (Ii - k * m * n) / m;
126       i = (Ii - k * m * n - j * m);
127       if (i > 0) {
128         J = global_index(i - 1, j, k, m, n);
129         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
130       }
131       if (i > 1) {
132         J = global_index(i - 2, j, k, m, n);
133         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
134       }
135       if (i < m - 1) {
136         J = global_index(i + 1, j, k, m, n);
137         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
138       }
139       if (i < m - 2) {
140         J = global_index(i + 2, j, k, m, n);
141         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
142       }
143       if (j > 0) {
144         J = global_index(i, j - 1, k, m, n);
145         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
146       }
147       if (j > 1) {
148         J = global_index(i, j - 2, k, m, n);
149         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
150       }
151       if (j < n - 1) {
152         J = global_index(i, j + 1, k, m, n);
153         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
154       }
155       if (j < n - 2) {
156         J = global_index(i, j + 2, k, m, n);
157         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
158       }
159       v = 8.0;
160       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
161     }
162   }
163   PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
164   if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
165     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
166     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
167     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
168     for (Ii = Istart; Ii < Iend; Ii++) {
169       v = -1.0;
170       k = Ii / (m * n);
171       j = (Ii - k * m * n) / m;
172       i = (Ii - k * m * n - j * m);
173       if (i > 0) {
174         J = global_index(i - 1, j, k, m, n);
175         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
176       }
177       if (i > 1) {
178         J = global_index(i - 2, j, k, m, n);
179         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
180       }
181       if (i > 2) {
182         J = global_index(i - 3, j, k, m, n);
183         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
184       }
185       if (i < m - 1) {
186         J = global_index(i + 1, j, k, m, n);
187         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
188       }
189       if (i < m - 2) {
190         J = global_index(i + 2, j, k, m, n);
191         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
192       }
193       if (i < m - 3) {
194         J = global_index(i + 3, j, k, m, n);
195         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
196       }
197       if (j > 0) {
198         J = global_index(i, j - 1, k, m, n);
199         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
200       }
201       if (j > 1) {
202         J = global_index(i, j - 2, k, m, n);
203         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
204       }
205       if (j > 2) {
206         J = global_index(i, j - 3, k, m, n);
207         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
208       }
209       if (j < n - 1) {
210         J = global_index(i, j + 1, k, m, n);
211         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
212       }
213       if (j < n - 2) {
214         J = global_index(i, j + 2, k, m, n);
215         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
216       }
217       if (j < n - 3) {
218         J = global_index(i, j + 3, k, m, n);
219         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
220       }
221       v = 12.0;
222       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
223     }
224   }
225   /************ 3D stencils ***************/
226   PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
227   if (equal) { /* 7-point stencil, 3D */
228     PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
229     PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
230     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
231     for (Ii = Istart; Ii < Iend; Ii++) {
232       v = -1.0;
233       k = Ii / (m * n);
234       j = (Ii - k * m * n) / m;
235       i = (Ii - k * m * n - j * m);
236       if (i > 0) {
237         J = global_index(i - 1, j, k, m, n);
238         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
239       }
240       if (i < m - 1) {
241         J = global_index(i + 1, j, k, m, n);
242         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
243       }
244       if (j > 0) {
245         J = global_index(i, j - 1, k, m, n);
246         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
247       }
248       if (j < n - 1) {
249         J = global_index(i, j + 1, k, m, n);
250         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
251       }
252       if (k > 0) {
253         J = global_index(i, j, k - 1, m, n);
254         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
255       }
256       if (k < o - 1) {
257         J = global_index(i, j, k + 1, m, n);
258         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
259       }
260       v = 6.0;
261       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
262     }
263   }
264   PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
265   if (equal) { /* 13-point stencil, 3D */
266     PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
267     PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
268     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
269     for (Ii = Istart; Ii < Iend; Ii++) {
270       v = -1.0;
271       k = Ii / (m * n);
272       j = (Ii - k * m * n) / m;
273       i = (Ii - k * m * n - j * m);
274       if (i > 0) {
275         J = global_index(i - 1, j, k, m, n);
276         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
277       }
278       if (i > 1) {
279         J = global_index(i - 2, j, k, m, n);
280         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
281       }
282       if (i < m - 1) {
283         J = global_index(i + 1, j, k, m, n);
284         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
285       }
286       if (i < m - 2) {
287         J = global_index(i + 2, j, k, m, n);
288         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
289       }
290       if (j > 0) {
291         J = global_index(i, j - 1, k, m, n);
292         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293       }
294       if (j > 1) {
295         J = global_index(i, j - 2, k, m, n);
296         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
297       }
298       if (j < n - 1) {
299         J = global_index(i, j + 1, k, m, n);
300         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
301       }
302       if (j < n - 2) {
303         J = global_index(i, j + 2, k, m, n);
304         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
305       }
306       if (k > 0) {
307         J = global_index(i, j, k - 1, m, n);
308         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
309       }
310       if (k > 1) {
311         J = global_index(i, j, k - 2, m, n);
312         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
313       }
314       if (k < o - 1) {
315         J = global_index(i, j, k + 1, m, n);
316         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
317       }
318       if (k < o - 2) {
319         J = global_index(i, j, k + 2, m, n);
320         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
321       }
322       v = 12.0;
323       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
324     }
325   }
326   PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
327   if (equal) { /* 19-point stencil, 3D */
328     PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
329     PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
330     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
331     for (Ii = Istart; Ii < Iend; Ii++) {
332       v = -1.0;
333       k = Ii / (m * n);
334       j = (Ii - k * m * n) / m;
335       i = (Ii - k * m * n - j * m);
336       /* one hop */
337       if (i > 0) {
338         J = global_index(i - 1, j, k, m, n);
339         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
340       }
341       if (i < m - 1) {
342         J = global_index(i + 1, j, k, m, n);
343         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
344       }
345       if (j > 0) {
346         J = global_index(i, j - 1, k, m, n);
347         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
348       }
349       if (j < n - 1) {
350         J = global_index(i, j + 1, k, m, n);
351         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
352       }
353       if (k > 0) {
354         J = global_index(i, j, k - 1, m, n);
355         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
356       }
357       if (k < o - 1) {
358         J = global_index(i, j, k + 1, m, n);
359         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
360       }
361       /* two hops */
362       if (i > 0 && j > 0) {
363         J = global_index(i - 1, j - 1, k, m, n);
364         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
365       }
366       if (i > 0 && k > 0) {
367         J = global_index(i - 1, j, k - 1, m, n);
368         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
369       }
370       if (i > 0 && j < n - 1) {
371         J = global_index(i - 1, j + 1, k, m, n);
372         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
373       }
374       if (i > 0 && k < o - 1) {
375         J = global_index(i - 1, j, k + 1, m, n);
376         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
377       }
378       if (i < m - 1 && j > 0) {
379         J = global_index(i + 1, j - 1, k, m, n);
380         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
381       }
382       if (i < m - 1 && k > 0) {
383         J = global_index(i + 1, j, k - 1, m, n);
384         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
385       }
386       if (i < m - 1 && j < n - 1) {
387         J = global_index(i + 1, j + 1, k, m, n);
388         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
389       }
390       if (i < m - 1 && k < o - 1) {
391         J = global_index(i + 1, j, k + 1, m, n);
392         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
393       }
394       if (j > 0 && k > 0) {
395         J = global_index(i, j - 1, k - 1, m, n);
396         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
397       }
398       if (j > 0 && k < o - 1) {
399         J = global_index(i, j - 1, k + 1, m, n);
400         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
401       }
402       if (j < n - 1 && k > 0) {
403         J = global_index(i, j + 1, k - 1, m, n);
404         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
405       }
406       if (j < n - 1 && k < o - 1) {
407         J = global_index(i, j + 1, k + 1, m, n);
408         PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
409       }
410       v = 18.0;
411       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
412     }
413   }
414   PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
415   if (equal) { /* 27-point stencil, 3D */
416     PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
417     PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
418     PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
419     for (Ii = Istart; Ii < Iend; Ii++) {
420       v = -1.0;
421       k = Ii / (m * n);
422       j = (Ii - k * m * n) / m;
423       i = (Ii - k * m * n - j * m);
424       if (k > 0) {
425         if (j > 0) {
426           if (i > 0) {
427             J = global_index(i - 1, j - 1, k - 1, m, n);
428             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
429           }
430           J = global_index(i, j - 1, k - 1, m, n);
431           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
432           if (i < m - 1) {
433             J = global_index(i + 1, j - 1, k - 1, m, n);
434             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
435           }
436         }
437         {
438           if (i > 0) {
439             J = global_index(i - 1, j, k - 1, m, n);
440             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
441           }
442           J = global_index(i, j, k - 1, m, n);
443           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
444           if (i < m - 1) {
445             J = global_index(i + 1, j, k - 1, m, n);
446             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
447           }
448         }
449         if (j < n - 1) {
450           if (i > 0) {
451             J = global_index(i - 1, j + 1, k - 1, m, n);
452             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
453           }
454           J = global_index(i, j + 1, k - 1, m, n);
455           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
456           if (i < m - 1) {
457             J = global_index(i + 1, j + 1, k - 1, m, n);
458             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
459           }
460         }
461       }
462       {
463         if (j > 0) {
464           if (i > 0) {
465             J = global_index(i - 1, j - 1, k, m, n);
466             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
467           }
468           J = global_index(i, j - 1, k, m, n);
469           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
470           if (i < m - 1) {
471             J = global_index(i + 1, j - 1, k, m, n);
472             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
473           }
474         }
475         {
476           if (i > 0) {
477             J = global_index(i - 1, j, k, m, n);
478             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
479           }
480           J = global_index(i, j, k, m, n);
481           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
482           if (i < m - 1) {
483             J = global_index(i + 1, j, k, m, n);
484             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
485           }
486         }
487         if (j < n - 1) {
488           if (i > 0) {
489             J = global_index(i - 1, j + 1, k, m, n);
490             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
491           }
492           J = global_index(i, j + 1, k, m, n);
493           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
494           if (i < m - 1) {
495             J = global_index(i + 1, j + 1, k, m, n);
496             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
497           }
498         }
499       }
500       if (k < o - 1) {
501         if (j > 0) {
502           if (i > 0) {
503             J = global_index(i - 1, j - 1, k + 1, m, n);
504             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
505           }
506           J = global_index(i, j - 1, k + 1, m, n);
507           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
508           if (i < m - 1) {
509             J = global_index(i + 1, j - 1, k + 1, m, n);
510             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
511           }
512         }
513         {
514           if (i > 0) {
515             J = global_index(i - 1, j, k + 1, m, n);
516             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
517           }
518           J = global_index(i, j, k + 1, m, n);
519           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
520           if (i < m - 1) {
521             J = global_index(i + 1, j, k + 1, m, n);
522             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
523           }
524         }
525         if (j < n - 1) {
526           if (i > 0) {
527             J = global_index(i - 1, j + 1, k + 1, m, n);
528             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
529           }
530           J = global_index(i, j + 1, k + 1, m, n);
531           PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
532           if (i < m - 1) {
533             J = global_index(i + 1, j + 1, k + 1, m, n);
534             PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
535           }
536         }
537       }
538       v = 26.0;
539       PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
540     }
541   }
542   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
543   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
544 
545   /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
546   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
547 
548   PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
549 
550   /* Test C = A*B */
551   PetscCall(PetscLogStagePush(fullMatMatMultStage));
552   PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &C));
553 
554   /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C)  */
555   PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP));
556   PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
557   PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &PtAP_squared));
558 
559   PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
560   PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
561 
562   PetscCall(MatDestroy(&PtAP_squared));
563   PetscCall(MatDestroy(&PtAP_copy));
564   PetscCall(MatDestroy(&PtAP));
565   PetscCall(MatDestroy(&C));
566   PetscCall(MatDestroy(&B));
567   PetscCall(MatDestroy(&A));
568   PetscCall(PetscFinalize());
569   return 0;
570 }
571 
572 /*TEST
573 
574  test:
575       suffix: 1
576       nsize: 1
577       args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
578 
579  test:
580        suffix: 2
581        nsize: 1
582        args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
583 
584  test:
585       suffix: 3
586       nsize: 4
587       args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
588 
589 TEST*/
590