static char help[] = "Benchmark for MatMatMult() of AIJ matrices using different 2d finite-difference stencils.\n\n"; #include /* Converts 3d grid coordinates (i,j,k) for a grid of size m \times n to global indexing. Pass k = 0 for a 2d grid. */ PetscInt global_index(PetscInt i, PetscInt j, PetscInt k, PetscInt m, PetscInt n) { return i + j * m + k * m * n; } int main(int argc, char **argv) { Mat A, B, C, PtAP, PtAP_copy, PtAP_squared; PetscInt i, M, N, Istart, Iend, n = 7, j, J, Ii, m = 8, k, o = 1; PetscScalar v; PetscBool equal = PETSC_FALSE, mat_view = PETSC_FALSE; char stencil[PETSC_MAX_PATH_LEN]; PetscLogStage fullMatMatMultStage; PetscFunctionBeginUser; PetscCall(PetscInitialize(&argc, &argv, NULL, help)); PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL)); PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL)); PetscCall(PetscOptionsGetInt(NULL, NULL, "-o", &o, NULL)); PetscCall(PetscOptionsHasName(NULL, NULL, "-result_view", &mat_view)); PetscCall(PetscOptionsGetString(NULL, NULL, "-stencil", stencil, sizeof(stencil), NULL)); /* Create a aij matrix A */ M = N = m * n * o; PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, M, N)); PetscCall(MatSetType(A, MATAIJ)); PetscCall(MatSetFromOptions(A)); /* Consistency checks */ PetscCheck(o >= 1 && m > 1 && n >= 1, PETSC_COMM_WORLD, PETSC_ERR_USER, "Dimensions need to be larger than zero!"); /************ 2D stencils ***************/ PetscCall(PetscStrcmp(stencil, "2d5point", &equal)); if (equal) { /* 5-point stencil, 2D */ PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 4.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "2d9point", &equal)); if (equal) { /* 9-point stencil, 2D */ PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 0 && j > 0) { J = global_index(i - 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && j > 0) { J = global_index(i + 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && j < n - 1) { J = global_index(i + 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 0 && j < n - 1) { J = global_index(i - 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 8.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "2d9point2", &equal)); if (equal) { /* 9-point Cartesian stencil (width 2 per coordinate), 2D */ PetscCall(MatMPIAIJSetPreallocation(A, 9, NULL, 9, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 9, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 1) { J = global_index(i - 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 2) { J = global_index(i + 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 1) { J = global_index(i, j - 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 2) { J = global_index(i, j + 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 8.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "2d13point", &equal)); if (equal) { /* 13-point Cartesian stencil (width 3 per coordinate), 2D */ PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 1) { J = global_index(i - 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 2) { J = global_index(i - 3, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 2) { J = global_index(i + 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 3) { J = global_index(i + 3, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 1) { J = global_index(i, j - 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 2) { J = global_index(i, j - 3, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 2) { J = global_index(i, j + 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 3) { J = global_index(i, j + 3, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 12.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } /************ 3D stencils ***************/ PetscCall(PetscStrcmp(stencil, "3d7point", &equal)); if (equal) { /* 7-point stencil, 3D */ PetscCall(MatMPIAIJSetPreallocation(A, 7, NULL, 7, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 7, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k > 0) { J = global_index(i, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k < o - 1) { J = global_index(i, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 6.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "3d13point", &equal)); if (equal) { /* 13-point stencil, 3D */ PetscCall(MatMPIAIJSetPreallocation(A, 13, NULL, 13, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 13, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 1) { J = global_index(i - 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 2) { J = global_index(i + 2, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 1) { J = global_index(i, j - 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 2) { J = global_index(i, j + 2, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k > 0) { J = global_index(i, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k > 1) { J = global_index(i, j, k - 2, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k < o - 1) { J = global_index(i, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k < o - 2) { J = global_index(i, j, k + 2, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 12.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "3d19point", &equal)); if (equal) { /* 19-point stencil, 3D */ PetscCall(MatMPIAIJSetPreallocation(A, 19, NULL, 19, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 19, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); /* one hop */ if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0) { J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1) { J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k > 0) { J = global_index(i, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (k < o - 1) { J = global_index(i, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } /* two hops */ if (i > 0 && j > 0) { J = global_index(i - 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 0 && k > 0) { J = global_index(i - 1, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 0 && j < n - 1) { J = global_index(i - 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i > 0 && k < o - 1) { J = global_index(i - 1, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && j > 0) { J = global_index(i + 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && k > 0) { J = global_index(i + 1, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && j < n - 1) { J = global_index(i + 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (i < m - 1 && k < o - 1) { J = global_index(i + 1, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0 && k > 0) { J = global_index(i, j - 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j > 0 && k < o - 1) { J = global_index(i, j - 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1 && k > 0) { J = global_index(i, j + 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } if (j < n - 1 && k < o - 1) { J = global_index(i, j + 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } v = 18.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(PetscStrcmp(stencil, "3d27point", &equal)); if (equal) { /* 27-point stencil, 3D */ PetscCall(MatMPIAIJSetPreallocation(A, 27, NULL, 27, NULL)); PetscCall(MatSeqAIJSetPreallocation(A, 27, NULL)); PetscCall(MatGetOwnershipRange(A, &Istart, &Iend)); for (Ii = Istart; Ii < Iend; Ii++) { v = -1.0; k = Ii / (m * n); j = (Ii - k * m * n) / m; i = (Ii - k * m * n - j * m); if (k > 0) { if (j > 0) { if (i > 0) { J = global_index(i - 1, j - 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j - 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j - 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } { if (i > 0) { J = global_index(i - 1, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } if (j < n - 1) { if (i > 0) { J = global_index(i - 1, j + 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j + 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j + 1, k - 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } } { if (j > 0) { if (i > 0) { J = global_index(i - 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j - 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } { if (i > 0) { J = global_index(i - 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } if (j < n - 1) { if (i > 0) { J = global_index(i - 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j + 1, k, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } } if (k < o - 1) { if (j > 0) { if (i > 0) { J = global_index(i - 1, j - 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j - 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j - 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } { if (i > 0) { J = global_index(i - 1, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } if (j < n - 1) { if (i > 0) { J = global_index(i - 1, j + 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } J = global_index(i, j + 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); if (i < m - 1) { J = global_index(i + 1, j + 1, k + 1, m, n); PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES)); } } } v = 26.0; PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES)); } } PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); /* Copy A into B in order to have a more representative benchmark (A*A has more cache hits than A*B) */ PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); PetscCall(PetscLogStageRegister("Full MatMatMult", &fullMatMatMultStage)); /* Test C = A*B */ PetscCall(PetscLogStagePush(fullMatMatMultStage)); PetscCall(MatMatMult(A, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &C)); /* Test PtAP_squared = PtAP(C,C)*PtAP(C,C) */ PetscCall(MatPtAP(C, C, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP)); PetscCall(MatDuplicate(PtAP, MAT_COPY_VALUES, &PtAP_copy)); PetscCall(MatMatMult(PtAP, PtAP_copy, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &PtAP_squared)); PetscCall(MatView(C, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(MatView(PtAP_squared, PETSC_VIEWER_STDOUT_WORLD)); PetscCall(MatDestroy(&PtAP_squared)); PetscCall(MatDestroy(&PtAP_copy)); PetscCall(MatDestroy(&PtAP)); PetscCall(MatDestroy(&C)); PetscCall(MatDestroy(&B)); PetscCall(MatDestroy(&A)); PetscCall(PetscFinalize()); return 0; } /*TEST test: suffix: 1 nsize: 1 args: -m 8 -n 8 -stencil 2d5point -matmatmult_via sorted test: suffix: 2 nsize: 1 args: -m 5 -n 5 -o 5 -stencil 3d27point -matmatmult_via rowmerge test: suffix: 3 nsize: 4 args: -m 6 -n 6 -stencil 2d5point -matmatmult_via seqmpi TEST*/