1c4762a1bSJed Brown static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n";
2c4762a1bSJed Brown
3c4762a1bSJed Brown #include <petscmat.h>
4c4762a1bSJed Brown
5c4762a1bSJed Brown /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */
global_index(PetscInt i,PetscInt j,PetscInt k,PetscInt m,PetscInt n)66497c311SBarry Smith PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n)
7d71ae5a4SJacob Faibussowitsch {
89371c9d4SSatish Balay return i + j * m + k * m * n;
99371c9d4SSatish Balay }
10c4762a1bSJed Brown
main(int argc,char ** argv)11d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
12d71ae5a4SJacob Faibussowitsch {
13c4762a1bSJed Brown Mat A, B, C, PtAP, PtAP_copy, PtAP_squared;
14c4762a1bSJed Brown PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1;
15c4762a1bSJed Brown PetscScalar v;
16c4762a1bSJed Brown PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE;
17c4762a1bSJed Brown char stencil[PETSC_MAX_PATH_LEN];
18c4762a1bSJed Brown PetscLogStage fullMatMatMultStage;
19c4762a1bSJed Brown
20327415f7SBarry Smith PetscFunctionBeginUser;
21*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
229566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
239566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
249566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL));
259566063dSJacob Faibussowitsch PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view));
269566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL));
27c4762a1bSJed Brown
28c4762a1bSJed Brown /* Create a aij matrix A */
29c4762a1bSJed Brown M = N = m * n * o;
309566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
319566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N));
329566063dSJacob Faibussowitsch PetscCall(MatSetType(A, MATAIJ));
339566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(A));
34c4762a1bSJed Brown
35c4762a1bSJed Brown /* Consistency checks */
36bc30f867SBarry Smith PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!");
37c4762a1bSJed Brown
38c4762a1bSJed Brown /************ 2D stencils ***************/
399566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d5point", &equal));
40c4762a1bSJed Brown if (equal) { /* 5-point stencil, 2D */
419566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
429566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
439566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
44c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
459371c9d4SSatish Balay v = -1.0;
469371c9d4SSatish Balay k = Ii / (m * n);
479371c9d4SSatish Balay j = (Ii - k * m * n) / m;
489371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
499371c9d4SSatish Balay if (i > 0) {
509371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
519371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
529371c9d4SSatish Balay }
539371c9d4SSatish Balay if (i < m - 1) {
549371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
569371c9d4SSatish Balay }
579371c9d4SSatish Balay if (j > 0) {
589371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
599371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
609371c9d4SSatish Balay }
619371c9d4SSatish Balay if (j < n - 1) {
629371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
639371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
649371c9d4SSatish Balay }
659371c9d4SSatish Balay v = 4.0;
669371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
67c4762a1bSJed Brown }
68c4762a1bSJed Brown }
699566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point", &equal));
70c4762a1bSJed Brown if (equal) { /* 9-point stencil, 2D */
719566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
729566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
739566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
74c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
759371c9d4SSatish Balay v = -1.0;
769371c9d4SSatish Balay k = Ii / (m * n);
779371c9d4SSatish Balay j = (Ii - k * m * n) / m;
789371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
799371c9d4SSatish Balay if (i > 0) {
809371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
819371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
829371c9d4SSatish Balay }
839371c9d4SSatish Balay if (i > 0 && j > 0) {
849371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n);
859371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
869371c9d4SSatish Balay }
879371c9d4SSatish Balay if (j > 0) {
889371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
899371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
909371c9d4SSatish Balay }
919371c9d4SSatish Balay if (i < m - 1 && j > 0) {
929371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n);
939371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
949371c9d4SSatish Balay }
959371c9d4SSatish Balay if (i < m - 1) {
969371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
979371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
989371c9d4SSatish Balay }
999371c9d4SSatish Balay if (i < m - 1 && j < n - 1) {
1009371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n);
1019371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1029371c9d4SSatish Balay }
1039371c9d4SSatish Balay if (j < n - 1) {
1049371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
1059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1069371c9d4SSatish Balay }
1079371c9d4SSatish Balay if (i > 0 && j < n - 1) {
1089371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n);
1099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1109371c9d4SSatish Balay }
1119371c9d4SSatish Balay v = 8.0;
1129371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
113c4762a1bSJed Brown }
114c4762a1bSJed Brown }
1159566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d9point2", &equal));
116c4762a1bSJed Brown if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */
1179566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL));
1189566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL));
1199566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
120c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
1219371c9d4SSatish Balay v = -1.0;
1229371c9d4SSatish Balay k = Ii / (m * n);
1239371c9d4SSatish Balay j = (Ii - k * m * n) / m;
1249371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
1259371c9d4SSatish Balay if (i > 0) {
1269371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
1279371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1289371c9d4SSatish Balay }
1299371c9d4SSatish Balay if (i > 1) {
1309371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n);
1319371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1329371c9d4SSatish Balay }
1339371c9d4SSatish Balay if (i < m - 1) {
1349371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
1359371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1369371c9d4SSatish Balay }
1379371c9d4SSatish Balay if (i < m - 2) {
1389371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n);
1399371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1409371c9d4SSatish Balay }
1419371c9d4SSatish Balay if (j > 0) {
1429371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
1439371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1449371c9d4SSatish Balay }
1459371c9d4SSatish Balay if (j > 1) {
1469371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n);
1479371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1489371c9d4SSatish Balay }
1499371c9d4SSatish Balay if (j < n - 1) {
1509371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
1519371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1529371c9d4SSatish Balay }
1539371c9d4SSatish Balay if (j < n - 2) {
1549371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n);
1559371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1569371c9d4SSatish Balay }
1579371c9d4SSatish Balay v = 8.0;
1589371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
159c4762a1bSJed Brown }
160c4762a1bSJed Brown }
1619566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "2d13point", &equal));
162c4762a1bSJed Brown if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */
1639566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
1649566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
1659566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
166c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
1679371c9d4SSatish Balay v = -1.0;
1689371c9d4SSatish Balay k = Ii / (m * n);
1699371c9d4SSatish Balay j = (Ii - k * m * n) / m;
1709371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
1719371c9d4SSatish Balay if (i > 0) {
1729371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
1739371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1749371c9d4SSatish Balay }
1759371c9d4SSatish Balay if (i > 1) {
1769371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n);
1779371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1789371c9d4SSatish Balay }
1799371c9d4SSatish Balay if (i > 2) {
1809371c9d4SSatish Balay J = global_index(i - 3, j, k, m, n);
1819371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1829371c9d4SSatish Balay }
1839371c9d4SSatish Balay if (i < m - 1) {
1849371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
1859371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1869371c9d4SSatish Balay }
1879371c9d4SSatish Balay if (i < m - 2) {
1889371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n);
1899371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1909371c9d4SSatish Balay }
1919371c9d4SSatish Balay if (i < m - 3) {
1929371c9d4SSatish Balay J = global_index(i + 3, j, k, m, n);
1939371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1949371c9d4SSatish Balay }
1959371c9d4SSatish Balay if (j > 0) {
1969371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
1979371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
1989371c9d4SSatish Balay }
1999371c9d4SSatish Balay if (j > 1) {
2009371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n);
2019371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2029371c9d4SSatish Balay }
2039371c9d4SSatish Balay if (j > 2) {
2049371c9d4SSatish Balay J = global_index(i, j - 3, k, m, n);
2059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2069371c9d4SSatish Balay }
2079371c9d4SSatish Balay if (j < n - 1) {
2089371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
2099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2109371c9d4SSatish Balay }
2119371c9d4SSatish Balay if (j < n - 2) {
2129371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n);
2139371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2149371c9d4SSatish Balay }
2159371c9d4SSatish Balay if (j < n - 3) {
2169371c9d4SSatish Balay J = global_index(i, j + 3, k, m, n);
2179371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2189371c9d4SSatish Balay }
2199371c9d4SSatish Balay v = 12.0;
2209371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
221c4762a1bSJed Brown }
222c4762a1bSJed Brown }
223c4762a1bSJed Brown /************ 3D stencils ***************/
2249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d7point", &equal));
225c4762a1bSJed Brown if (equal) { /* 7-point stencil, 3D */
2269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL));
2279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL));
2289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
229c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
2309371c9d4SSatish Balay v = -1.0;
2319371c9d4SSatish Balay k = Ii / (m * n);
2329371c9d4SSatish Balay j = (Ii - k * m * n) / m;
2339371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
2349371c9d4SSatish Balay if (i > 0) {
2359371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
2369371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2379371c9d4SSatish Balay }
2389371c9d4SSatish Balay if (i < m - 1) {
2399371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
2409371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2419371c9d4SSatish Balay }
2429371c9d4SSatish Balay if (j > 0) {
2439371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
2449371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2459371c9d4SSatish Balay }
2469371c9d4SSatish Balay if (j < n - 1) {
2479371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
2489371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2499371c9d4SSatish Balay }
2509371c9d4SSatish Balay if (k > 0) {
2519371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n);
2529371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2539371c9d4SSatish Balay }
2549371c9d4SSatish Balay if (k < o - 1) {
2559371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n);
2569371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2579371c9d4SSatish Balay }
2589371c9d4SSatish Balay v = 6.0;
2599371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
260c4762a1bSJed Brown }
261c4762a1bSJed Brown }
2629566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d13point", &equal));
263c4762a1bSJed Brown if (equal) { /* 13-point stencil, 3D */
2649566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL));
2659566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL));
2669566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
267c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
2689371c9d4SSatish Balay v = -1.0;
2699371c9d4SSatish Balay k = Ii / (m * n);
2709371c9d4SSatish Balay j = (Ii - k * m * n) / m;
2719371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
2729371c9d4SSatish Balay if (i > 0) {
2739371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
2749371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2759371c9d4SSatish Balay }
2769371c9d4SSatish Balay if (i > 1) {
2779371c9d4SSatish Balay J = global_index(i - 2, j, k, m, n);
2789371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2799371c9d4SSatish Balay }
2809371c9d4SSatish Balay if (i < m - 1) {
2819371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
2829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2839371c9d4SSatish Balay }
2849371c9d4SSatish Balay if (i < m - 2) {
2859371c9d4SSatish Balay J = global_index(i + 2, j, k, m, n);
2869371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2879371c9d4SSatish Balay }
2889371c9d4SSatish Balay if (j > 0) {
2899371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
2909371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2919371c9d4SSatish Balay }
2929371c9d4SSatish Balay if (j > 1) {
2939371c9d4SSatish Balay J = global_index(i, j - 2, k, m, n);
2949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2959371c9d4SSatish Balay }
2969371c9d4SSatish Balay if (j < n - 1) {
2979371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
2989371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
2999371c9d4SSatish Balay }
3009371c9d4SSatish Balay if (j < n - 2) {
3019371c9d4SSatish Balay J = global_index(i, j + 2, k, m, n);
3029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3039371c9d4SSatish Balay }
3049371c9d4SSatish Balay if (k > 0) {
3059371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n);
3069371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3079371c9d4SSatish Balay }
3089371c9d4SSatish Balay if (k > 1) {
3099371c9d4SSatish Balay J = global_index(i, j, k - 2, m, n);
3109371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3119371c9d4SSatish Balay }
3129371c9d4SSatish Balay if (k < o - 1) {
3139371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n);
3149371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3159371c9d4SSatish Balay }
3169371c9d4SSatish Balay if (k < o - 2) {
3179371c9d4SSatish Balay J = global_index(i, j, k + 2, m, n);
3189371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3199371c9d4SSatish Balay }
3209371c9d4SSatish Balay v = 12.0;
3219371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
322c4762a1bSJed Brown }
323c4762a1bSJed Brown }
3249566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d19point", &equal));
325c4762a1bSJed Brown if (equal) { /* 19-point stencil, 3D */
3269566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL));
3279566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL));
3289566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
329c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
3309371c9d4SSatish Balay v = -1.0;
3319371c9d4SSatish Balay k = Ii / (m * n);
3329371c9d4SSatish Balay j = (Ii - k * m * n) / m;
3339371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
334c4762a1bSJed Brown /* one hop */
3359371c9d4SSatish Balay if (i > 0) {
3369371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
3379371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3389371c9d4SSatish Balay }
3399371c9d4SSatish Balay if (i < m - 1) {
3409371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
3419371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3429371c9d4SSatish Balay }
3439371c9d4SSatish Balay if (j > 0) {
3449371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
3459371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3469371c9d4SSatish Balay }
3479371c9d4SSatish Balay if (j < n - 1) {
3489371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
3499371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3509371c9d4SSatish Balay }
3519371c9d4SSatish Balay if (k > 0) {
3529371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n);
3539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3549371c9d4SSatish Balay }
3559371c9d4SSatish Balay if (k < o - 1) {
3569371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n);
3579371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3589371c9d4SSatish Balay }
359c4762a1bSJed Brown /* two hops */
3609371c9d4SSatish Balay if (i > 0 && j > 0) {
3619371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n);
3629371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3639371c9d4SSatish Balay }
3649371c9d4SSatish Balay if (i > 0 && k > 0) {
3659371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n);
3669371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3679371c9d4SSatish Balay }
3689371c9d4SSatish Balay if (i > 0 && j < n - 1) {
3699371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n);
3709371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3719371c9d4SSatish Balay }
3729371c9d4SSatish Balay if (i > 0 && k < o - 1) {
3739371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n);
3749371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3759371c9d4SSatish Balay }
3769371c9d4SSatish Balay if (i < m - 1 && j > 0) {
3779371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n);
3789371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3799371c9d4SSatish Balay }
3809371c9d4SSatish Balay if (i < m - 1 && k > 0) {
3819371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n);
3829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3839371c9d4SSatish Balay }
3849371c9d4SSatish Balay if (i < m - 1 && j < n - 1) {
3859371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n);
3869371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3879371c9d4SSatish Balay }
3889371c9d4SSatish Balay if (i < m - 1 && k < o - 1) {
3899371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n);
3909371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3919371c9d4SSatish Balay }
3929371c9d4SSatish Balay if (j > 0 && k > 0) {
3939371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n);
3949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3959371c9d4SSatish Balay }
3969371c9d4SSatish Balay if (j > 0 && k < o - 1) {
3979371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n);
3989371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
3999371c9d4SSatish Balay }
4009371c9d4SSatish Balay if (j < n - 1 && k > 0) {
4019371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n);
4029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4039371c9d4SSatish Balay }
4049371c9d4SSatish Balay if (j < n - 1 && k < o - 1) {
4059371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n);
4069371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4079371c9d4SSatish Balay }
4089371c9d4SSatish Balay v = 18.0;
4099371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
410c4762a1bSJed Brown }
411c4762a1bSJed Brown }
4129566063dSJacob Faibussowitsch PetscCall(PetscStrcmp(stencil, "3d27point", &equal));
413c4762a1bSJed Brown if (equal) { /* 27-point stencil, 3D */
4149566063dSJacob Faibussowitsch PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL));
4159566063dSJacob Faibussowitsch PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL));
4169566063dSJacob Faibussowitsch PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
417c4762a1bSJed Brown for (Ii = Istart; Ii < Iend; Ii++) {
4189371c9d4SSatish Balay v = -1.0;
4199371c9d4SSatish Balay k = Ii / (m * n);
4209371c9d4SSatish Balay j = (Ii - k * m * n) / m;
4219371c9d4SSatish Balay i = (Ii - k * m * n - j * m);
422c4762a1bSJed Brown if (k > 0) {
423c4762a1bSJed Brown if (j > 0) {
4249371c9d4SSatish Balay if (i > 0) {
4259371c9d4SSatish Balay J = global_index(i - 1, j - 1, k - 1, m, n);
4269371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4279371c9d4SSatish Balay }
4289371c9d4SSatish Balay J = global_index(i, j - 1, k - 1, m, n);
4299371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4309371c9d4SSatish Balay if (i < m - 1) {
4319371c9d4SSatish Balay J = global_index(i + 1, j - 1, k - 1, m, n);
4329371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4339371c9d4SSatish Balay }
434c4762a1bSJed Brown }
435c4762a1bSJed Brown {
4369371c9d4SSatish Balay if (i > 0) {
4379371c9d4SSatish Balay J = global_index(i - 1, j, k - 1, m, n);
4389371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4399371c9d4SSatish Balay }
4409371c9d4SSatish Balay J = global_index(i, j, k - 1, m, n);
4419371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4429371c9d4SSatish Balay if (i < m - 1) {
4439371c9d4SSatish Balay J = global_index(i + 1, j, k - 1, m, n);
4449371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4459371c9d4SSatish Balay }
446c4762a1bSJed Brown }
447c4762a1bSJed Brown if (j < n - 1) {
4489371c9d4SSatish Balay if (i > 0) {
4499371c9d4SSatish Balay J = global_index(i - 1, j + 1, k - 1, m, n);
4509371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4519371c9d4SSatish Balay }
4529371c9d4SSatish Balay J = global_index(i, j + 1, k - 1, m, n);
4539371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4549371c9d4SSatish Balay if (i < m - 1) {
4559371c9d4SSatish Balay J = global_index(i + 1, j + 1, k - 1, m, n);
4569371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4579371c9d4SSatish Balay }
458c4762a1bSJed Brown }
459c4762a1bSJed Brown }
460c4762a1bSJed Brown {
461c4762a1bSJed Brown if (j > 0) {
4629371c9d4SSatish Balay if (i > 0) {
4639371c9d4SSatish Balay J = global_index(i - 1, j - 1, k, m, n);
4649371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4659371c9d4SSatish Balay }
4669371c9d4SSatish Balay J = global_index(i, j - 1, k, m, n);
4679371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4689371c9d4SSatish Balay if (i < m - 1) {
4699371c9d4SSatish Balay J = global_index(i + 1, j - 1, k, m, n);
4709371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4719371c9d4SSatish Balay }
472c4762a1bSJed Brown }
473c4762a1bSJed Brown {
4749371c9d4SSatish Balay if (i > 0) {
4759371c9d4SSatish Balay J = global_index(i - 1, j, k, m, n);
4769371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4779371c9d4SSatish Balay }
4789371c9d4SSatish Balay J = global_index(i, j, k, m, n);
4799371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4809371c9d4SSatish Balay if (i < m - 1) {
4819371c9d4SSatish Balay J = global_index(i + 1, j, k, m, n);
4829371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4839371c9d4SSatish Balay }
484c4762a1bSJed Brown }
485c4762a1bSJed Brown if (j < n - 1) {
4869371c9d4SSatish Balay if (i > 0) {
4879371c9d4SSatish Balay J = global_index(i - 1, j + 1, k, m, n);
4889371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4899371c9d4SSatish Balay }
4909371c9d4SSatish Balay J = global_index(i, j + 1, k, m, n);
4919371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4929371c9d4SSatish Balay if (i < m - 1) {
4939371c9d4SSatish Balay J = global_index(i + 1, j + 1, k, m, n);
4949371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
4959371c9d4SSatish Balay }
496c4762a1bSJed Brown }
497c4762a1bSJed Brown }
498c4762a1bSJed Brown if (k < o - 1) {
499c4762a1bSJed Brown if (j > 0) {
5009371c9d4SSatish Balay if (i > 0) {
5019371c9d4SSatish Balay J = global_index(i - 1, j - 1, k + 1, m, n);
5029371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5039371c9d4SSatish Balay }
5049371c9d4SSatish Balay J = global_index(i, j - 1, k + 1, m, n);
5059371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5069371c9d4SSatish Balay if (i < m - 1) {
5079371c9d4SSatish Balay J = global_index(i + 1, j - 1, k + 1, m, n);
5089371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5099371c9d4SSatish Balay }
510c4762a1bSJed Brown }
511c4762a1bSJed Brown {
5129371c9d4SSatish Balay if (i > 0) {
5139371c9d4SSatish Balay J = global_index(i - 1, j, k + 1, m, n);
5149371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5159371c9d4SSatish Balay }
5169371c9d4SSatish Balay J = global_index(i, j, k + 1, m, n);
5179371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5189371c9d4SSatish Balay if (i < m - 1) {
5199371c9d4SSatish Balay J = global_index(i + 1, j, k + 1, m, n);
5209371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5219371c9d4SSatish Balay }
522c4762a1bSJed Brown }
523c4762a1bSJed Brown if (j < n - 1) {
5249371c9d4SSatish Balay if (i > 0) {
5259371c9d4SSatish Balay J = global_index(i - 1, j + 1, k + 1, m, n);
5269371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5279371c9d4SSatish Balay }
5289371c9d4SSatish Balay J = global_index(i, j + 1, k + 1, m, n);
5299371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
5309371c9d4SSatish Balay if (i < m - 1) {
5319371c9d4SSatish Balay J = global_index(i + 1, j + 1, k + 1, m, n);
5329371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES));
533c4762a1bSJed Brown }
534c4762a1bSJed Brown }
5359371c9d4SSatish Balay }
5369371c9d4SSatish Balay v = 26.0;
5379371c9d4SSatish Balay PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES));
538c4762a1bSJed Brown }
539c4762a1bSJed Brown }
5409566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
5419566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
542c4762a1bSJed Brown
543c4762a1bSJed Brown /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */
5449566063dSJacob Faibussowitsch PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
545c4762a1bSJed Brown
5469566063dSJacob Faibussowitsch PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage));
547c4762a1bSJed Brown
548c4762a1bSJed Brown /* Test C = A*B */
5499566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(fullMatMatMultStage));
550fb842aefSJose E. Roman PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C));
551c4762a1bSJed Brown
552c4762a1bSJed Brown /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */
553fb842aefSJose E. Roman PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP));
5549566063dSJacob Faibussowitsch PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy));
555fb842aefSJose E. Roman PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared));
556c4762a1bSJed Brown
5579566063dSJacob Faibussowitsch PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD));
5589566063dSJacob Faibussowitsch PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD));
559c4762a1bSJed Brown
5609566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_squared));
5619566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP_copy));
5629566063dSJacob Faibussowitsch PetscCall(MatDestroy(&PtAP));
5639566063dSJacob Faibussowitsch PetscCall(MatDestroy(&C));
5649566063dSJacob Faibussowitsch PetscCall(MatDestroy(&B));
5659566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A));
5669566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
567b122ec5aSJacob Faibussowitsch return 0;
568c4762a1bSJed Brown }
569c4762a1bSJed Brown
570c4762a1bSJed Brown /*TEST
571c4762a1bSJed Brown
572c4762a1bSJed Brown test:
573c4762a1bSJed Brown suffix: 1
574c4762a1bSJed Brown nsize: 1
575c20d7725SJed Brown args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted
576c4762a1bSJed Brown
577c4762a1bSJed Brown test:
578c4762a1bSJed Brown suffix: 2
579c4762a1bSJed Brown nsize: 1
580c4762a1bSJed Brown args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge
581c4762a1bSJed Brown
582c4762a1bSJed Brown test:
583c4762a1bSJed Brown suffix: 3
584c4762a1bSJed Brown nsize: 4
585c4762a1bSJed Brown args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi
586c4762a1bSJed Brown
587c4762a1bSJed Brown TEST*/
588