1 static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2
3 #include <petscmat.h>
4
5 /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
global_index(PetscInt i,PetscInt j,PetscInt k,PetscInt m,PetscInt n)6 PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7 {
8 return i + j * m + k * m * n;
9 }
10
main(int argc,char ** argv)11 int main(int argc, char **argv)
12 {
13 Mat A, B, C, PtAP, PtAP_copy, PtAP_squared;
14 PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15 PetscScalar v;
16 PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17 char stencil[PETSC_MAX_PATH_LEN];
18 PetscLogStage fullMatMatMultStage;
19
20 PetscFunctionBeginUser;
21 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
22 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
23 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
24 PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
25 PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
26 PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
27
28 /* Create a aij matrix A */
29 M = N = m * n * o;
30 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
31 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
32 PetscCall(MatSetType(A, MATAIJ));
33 PetscCall(MatSetFromOptions(A));
34
35 /* Consistency checks */
36 PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
37
38 /************ 2D stencils ***************/
39 PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
40 if (equal) { /* 5-point stencil, 2D */
41 PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
42 PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
43 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
44 for (Ii = Istart; Ii < Iend; Ii++) {
45 v = -1.0;
46 k = Ii / (m * n);
47 j = (Ii - k * m * n) / m;
48 i = (Ii - k * m * n - j * m);
49 if (i > 0) {
50 J = global_index(i - 1, j, k, m, n);
51 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
52 }
53 if (i < m - 1) {
54 J = global_index(i + 1, j, k, m, n);
55 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
56 }
57 if (j > 0) {
58 J = global_index(i, j - 1, k, m, n);
59 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
60 }
61 if (j < n - 1) {
62 J = global_index(i, j + 1, k, m, n);
63 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
64 }
65 v = 4.0;
66 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
67 }
68 }
69 PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
70 if (equal) { /* 9-point stencil, 2D */
71 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
72 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
73 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
74 for (Ii = Istart; Ii < Iend; Ii++) {
75 v = -1.0;
76 k = Ii / (m * n);
77 j = (Ii - k * m * n) / m;
78 i = (Ii - k * m * n - j * m);
79 if (i > 0) {
80 J = global_index(i - 1, j, k, m, n);
81 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
82 }
83 if (i > 0 && j > 0) {
84 J = global_index(i - 1, j - 1, k, m, n);
85 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
86 }
87 if (j > 0) {
88 J = global_index(i, j - 1, k, m, n);
89 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
90 }
91 if (i < m - 1 && j > 0) {
92 J = global_index(i + 1, j - 1, k, m, n);
93 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
94 }
95 if (i < m - 1) {
96 J = global_index(i + 1, j, k, m, n);
97 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
98 }
99 if (i < m - 1 && j < n - 1) {
100 J = global_index(i + 1, j + 1, k, m, n);
101 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
102 }
103 if (j < n - 1) {
104 J = global_index(i, j + 1, k, m, n);
105 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
106 }
107 if (i > 0 && j < n - 1) {
108 J = global_index(i - 1, j + 1, k, m, n);
109 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
110 }
111 v = 8.0;
112 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113 }
114 }
115 PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
116 if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
117 PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
118 PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
119 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
120 for (Ii = Istart; Ii < Iend; Ii++) {
121 v = -1.0;
122 k = Ii / (m * n);
123 j = (Ii - k * m * n) / m;
124 i = (Ii - k * m * n - j * m);
125 if (i > 0) {
126 J = global_index(i - 1, j, k, m, n);
127 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
128 }
129 if (i > 1) {
130 J = global_index(i - 2, j, k, m, n);
131 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
132 }
133 if (i < m - 1) {
134 J = global_index(i + 1, j, k, m, n);
135 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
136 }
137 if (i < m - 2) {
138 J = global_index(i + 2, j, k, m, n);
139 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
140 }
141 if (j > 0) {
142 J = global_index(i, j - 1, k, m, n);
143 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
144 }
145 if (j > 1) {
146 J = global_index(i, j - 2, k, m, n);
147 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
148 }
149 if (j < n - 1) {
150 J = global_index(i, j + 1, k, m, n);
151 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
152 }
153 if (j < n - 2) {
154 J = global_index(i, j + 2, k, m, n);
155 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
156 }
157 v = 8.0;
158 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
159 }
160 }
161 PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
162 if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
163 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
164 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
165 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
166 for (Ii = Istart; Ii < Iend; Ii++) {
167 v = -1.0;
168 k = Ii / (m * n);
169 j = (Ii - k * m * n) / m;
170 i = (Ii - k * m * n - j * m);
171 if (i > 0) {
172 J = global_index(i - 1, j, k, m, n);
173 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
174 }
175 if (i > 1) {
176 J = global_index(i - 2, j, k, m, n);
177 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
178 }
179 if (i > 2) {
180 J = global_index(i - 3, j, k, m, n);
181 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
182 }
183 if (i < m - 1) {
184 J = global_index(i + 1, j, k, m, n);
185 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
186 }
187 if (i < m - 2) {
188 J = global_index(i + 2, j, k, m, n);
189 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
190 }
191 if (i < m - 3) {
192 J = global_index(i + 3, j, k, m, n);
193 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
194 }
195 if (j > 0) {
196 J = global_index(i, j - 1, k, m, n);
197 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
198 }
199 if (j > 1) {
200 J = global_index(i, j - 2, k, m, n);
201 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
202 }
203 if (j > 2) {
204 J = global_index(i, j - 3, k, m, n);
205 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
206 }
207 if (j < n - 1) {
208 J = global_index(i, j + 1, k, m, n);
209 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
210 }
211 if (j < n - 2) {
212 J = global_index(i, j + 2, k, m, n);
213 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
214 }
215 if (j < n - 3) {
216 J = global_index(i, j + 3, k, m, n);
217 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
218 }
219 v = 12.0;
220 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
221 }
222 }
223 /************ 3D stencils ***************/
224 PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
225 if (equal) { /* 7-point stencil, 3D */
226 PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
227 PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
228 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
229 for (Ii = Istart; Ii < Iend; Ii++) {
230 v = -1.0;
231 k = Ii / (m * n);
232 j = (Ii - k * m * n) / m;
233 i = (Ii - k * m * n - j * m);
234 if (i > 0) {
235 J = global_index(i - 1, j, k, m, n);
236 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
237 }
238 if (i < m - 1) {
239 J = global_index(i + 1, j, k, m, n);
240 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
241 }
242 if (j > 0) {
243 J = global_index(i, j - 1, k, m, n);
244 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
245 }
246 if (j < n - 1) {
247 J = global_index(i, j + 1, k, m, n);
248 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
249 }
250 if (k > 0) {
251 J = global_index(i, j, k - 1, m, n);
252 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
253 }
254 if (k < o - 1) {
255 J = global_index(i, j, k + 1, m, n);
256 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
257 }
258 v = 6.0;
259 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
260 }
261 }
262 PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
263 if (equal) { /* 13-point stencil, 3D */
264 PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
265 PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
266 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
267 for (Ii = Istart; Ii < Iend; Ii++) {
268 v = -1.0;
269 k = Ii / (m * n);
270 j = (Ii - k * m * n) / m;
271 i = (Ii - k * m * n - j * m);
272 if (i > 0) {
273 J = global_index(i - 1, j, k, m, n);
274 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
275 }
276 if (i > 1) {
277 J = global_index(i - 2, j, k, m, n);
278 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
279 }
280 if (i < m - 1) {
281 J = global_index(i + 1, j, k, m, n);
282 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
283 }
284 if (i < m - 2) {
285 J = global_index(i + 2, j, k, m, n);
286 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
287 }
288 if (j > 0) {
289 J = global_index(i, j - 1, k, m, n);
290 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
291 }
292 if (j > 1) {
293 J = global_index(i, j - 2, k, m, n);
294 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
295 }
296 if (j < n - 1) {
297 J = global_index(i, j + 1, k, m, n);
298 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
299 }
300 if (j < n - 2) {
301 J = global_index(i, j + 2, k, m, n);
302 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
303 }
304 if (k > 0) {
305 J = global_index(i, j, k - 1, m, n);
306 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
307 }
308 if (k > 1) {
309 J = global_index(i, j, k - 2, m, n);
310 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
311 }
312 if (k < o - 1) {
313 J = global_index(i, j, k + 1, m, n);
314 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
315 }
316 if (k < o - 2) {
317 J = global_index(i, j, k + 2, m, n);
318 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
319 }
320 v = 12.0;
321 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
322 }
323 }
324 PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
325 if (equal) { /* 19-point stencil, 3D */
326 PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
327 PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
328 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
329 for (Ii = Istart; Ii < Iend; Ii++) {
330 v = -1.0;
331 k = Ii / (m * n);
332 j = (Ii - k * m * n) / m;
333 i = (Ii - k * m * n - j * m);
334 /* one hop */
335 if (i > 0) {
336 J = global_index(i - 1, j, k, m, n);
337 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
338 }
339 if (i < m - 1) {
340 J = global_index(i + 1, j, k, m, n);
341 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
342 }
343 if (j > 0) {
344 J = global_index(i, j - 1, k, m, n);
345 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
346 }
347 if (j < n - 1) {
348 J = global_index(i, j + 1, k, m, n);
349 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
350 }
351 if (k > 0) {
352 J = global_index(i, j, k - 1, m, n);
353 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
354 }
355 if (k < o - 1) {
356 J = global_index(i, j, k + 1, m, n);
357 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
358 }
359 /* two hops */
360 if (i > 0 && j > 0) {
361 J = global_index(i - 1, j - 1, k, m, n);
362 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
363 }
364 if (i > 0 && k > 0) {
365 J = global_index(i - 1, j, k - 1, m, n);
366 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
367 }
368 if (i > 0 && j < n - 1) {
369 J = global_index(i - 1, j + 1, k, m, n);
370 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
371 }
372 if (i > 0 && k < o - 1) {
373 J = global_index(i - 1, j, k + 1, m, n);
374 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
375 }
376 if (i < m - 1 && j > 0) {
377 J = global_index(i + 1, j - 1, k, m, n);
378 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
379 }
380 if (i < m - 1 && k > 0) {
381 J = global_index(i + 1, j, k - 1, m, n);
382 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
383 }
384 if (i < m - 1 && j < n - 1) {
385 J = global_index(i + 1, j + 1, k, m, n);
386 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
387 }
388 if (i < m - 1 && k < o - 1) {
389 J = global_index(i + 1, j, k + 1, m, n);
390 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
391 }
392 if (j > 0 && k > 0) {
393 J = global_index(i, j - 1, k - 1, m, n);
394 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
395 }
396 if (j > 0 && k < o - 1) {
397 J = global_index(i, j - 1, k + 1, m, n);
398 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
399 }
400 if (j < n - 1 && k > 0) {
401 J = global_index(i, j + 1, k - 1, m, n);
402 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
403 }
404 if (j < n - 1 && k < o - 1) {
405 J = global_index(i, j + 1, k + 1, m, n);
406 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
407 }
408 v = 18.0;
409 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
410 }
411 }
412 PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
413 if (equal) { /* 27-point stencil, 3D */
414 PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
415 PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
416 PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
417 for (Ii = Istart; Ii < Iend; Ii++) {
418 v = -1.0;
419 k = Ii / (m * n);
420 j = (Ii - k * m * n) / m;
421 i = (Ii - k * m * n - j * m);
422 if (k > 0) {
423 if (j > 0) {
424 if (i > 0) {
425 J = global_index(i - 1, j - 1, k - 1, m, n);
426 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
427 }
428 J = global_index(i, j - 1, k - 1, m, n);
429 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
430 if (i < m - 1) {
431 J = global_index(i + 1, j - 1, k - 1, m, n);
432 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
433 }
434 }
435 {
436 if (i > 0) {
437 J = global_index(i - 1, j, k - 1, m, n);
438 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
439 }
440 J = global_index(i, j, k - 1, m, n);
441 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
442 if (i < m - 1) {
443 J = global_index(i + 1, j, k - 1, m, n);
444 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
445 }
446 }
447 if (j < n - 1) {
448 if (i > 0) {
449 J = global_index(i - 1, j + 1, k - 1, m, n);
450 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
451 }
452 J = global_index(i, j + 1, k - 1, m, n);
453 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
454 if (i < m - 1) {
455 J = global_index(i + 1, j + 1, k - 1, m, n);
456 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
457 }
458 }
459 }
460 {
461 if (j > 0) {
462 if (i > 0) {
463 J = global_index(i - 1, j - 1, k, m, n);
464 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
465 }
466 J = global_index(i, j - 1, k, m, n);
467 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
468 if (i < m - 1) {
469 J = global_index(i + 1, j - 1, k, m, n);
470 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
471 }
472 }
473 {
474 if (i > 0) {
475 J = global_index(i - 1, j, k, m, n);
476 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
477 }
478 J = global_index(i, j, k, m, n);
479 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
480 if (i < m - 1) {
481 J = global_index(i + 1, j, k, m, n);
482 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
483 }
484 }
485 if (j < n - 1) {
486 if (i > 0) {
487 J = global_index(i - 1, j + 1, k, m, n);
488 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
489 }
490 J = global_index(i, j + 1, k, m, n);
491 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
492 if (i < m - 1) {
493 J = global_index(i + 1, j + 1, k, m, n);
494 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
495 }
496 }
497 }
498 if (k < o - 1) {
499 if (j > 0) {
500 if (i > 0) {
501 J = global_index(i - 1, j - 1, k + 1, m, n);
502 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
503 }
504 J = global_index(i, j - 1, k + 1, m, n);
505 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
506 if (i < m - 1) {
507 J = global_index(i + 1, j - 1, k + 1, m, n);
508 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
509 }
510 }
511 {
512 if (i > 0) {
513 J = global_index(i - 1, j, k + 1, m, n);
514 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
515 }
516 J = global_index(i, j, k + 1, m, n);
517 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
518 if (i < m - 1) {
519 J = global_index(i + 1, j, k + 1, m, n);
520 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
521 }
522 }
523 if (j < n - 1) {
524 if (i > 0) {
525 J = global_index(i - 1, j + 1, k + 1, m, n);
526 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
527 }
528 J = global_index(i, j + 1, k + 1, m, n);
529 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
530 if (i < m - 1) {
531 J = global_index(i + 1, j + 1, k + 1, m, n);
532 PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
533 }
534 }
535 }
536 v = 26.0;
537 PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
538 }
539 }
540 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
541 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
542
543 /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
544 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
545
546 PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
547
548 /* Test C = A*B */
549 PetscCall(PetscLogStagePush(fullMatMatMultStage));
550 PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C));
551
552 /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
553 PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP));
554 PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
555 PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared));
556
557 PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
558 PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
559
560 PetscCall(MatDestroy(&PtAP_squared));
561 PetscCall(MatDestroy(&PtAP_copy));
562 PetscCall(MatDestroy(&PtAP));
563 PetscCall(MatDestroy(&C));
564 PetscCall(MatDestroy(&B));
565 PetscCall(MatDestroy(&A));
566 PetscCall(PetscFinalize());
567 return 0;
568 }
569
570 /*TEST
571
572 test:
573 suffix: 1
574 nsize: 1
575 args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
576
577 test:
578 suffix: 2
579 nsize: 1
580 args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
581
582 test:
583 suffix: 3
584 nsize: 4
585 args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
586
587 TEST*/
588