xref: /petsc/src/mat/tests/ex237.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
2 
3 /*
4   See the paper below for more information
5 
6    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7    P. Jolivet, J. E. Roman, and S. Zampini (2020).
8 */
9 
10 #include <petsc.h>
11 
12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13 #include <mkl.h>
14 #define PetscStackCallMKLSparse(func, args) do {               \
15     sparse_status_t __ierr;                                    \
16     PetscStackPush(#func);                                     \
17     __ierr = func args;                                        \
18     PetscStackPop;                                             \
19     PetscCheckFalse(__ierr != SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
20   } while (0)
21 #else
22 #define PetscStackCallMKLSparse(func, args) do {               \
23     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
24   } while (0)
25 #endif
26 
27 int main(int argc, char** argv)
28 {
29   Mat            A, C, D, E;
30   PetscInt       nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
31   PetscViewer    viewer;
32   PetscInt       bs[10], N[8];
33   char           *type[10];
34   PetscMPIInt    size;
35   PetscBool      flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl;
36   char           file[PETSC_MAX_PATH_LEN];
37   PetscErrorCode ierr;
38 
39   ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr;
40   if (ierr) return ierr;
41   CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
42   PetscCheckFalse(size != 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
43   CHKERRQ(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
44   PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
45   CHKERRQ(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
46   CHKERRQ(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
47   if (!flg) {
48     nbs = 1;
49     bs[0] = 1;
50   }
51   CHKERRQ(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
52   if (!flg) {
53     nN = 8;
54     N[0] = 1;  N[1] = 2;  N[2] = 4;  N[3] = 8;
55     N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128;
56   }
57   CHKERRQ(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
58   if (!flg) {
59     ntype = 1;
60     CHKERRQ(PetscStrallocpy(MATSEQAIJ, &type[0]));
61   }
62   CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
63   CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL));
64   CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
65   for (PetscInt j = 0; j < nbs; ++j) {
66     CHKERRQ(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
67     CHKERRQ(MatCreate(PETSC_COMM_WORLD, &A));
68     CHKERRQ(MatSetFromOptions(A));
69     CHKERRQ(MatLoad(A, viewer));
70     CHKERRQ(PetscViewerDestroy(&viewer));
71     CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
72     PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
73     CHKERRQ(MatGetSize(A, &m, &M));
74     if (m == M) {
75       Mat oA;
76       CHKERRQ(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
77       CHKERRQ(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
78       CHKERRQ(MatDestroy(&oA));
79     }
80     CHKERRQ(MatGetLocalSize(A, &m, NULL));
81     CHKERRQ(MatGetSize(A, &M, NULL));
82     if (bs[j] > 1) {
83       Mat               T, Tt, B;
84       const PetscScalar *ptr;
85       PetscScalar       *val, *Aa;
86       const PetscInt    *Ai, *Aj;
87       PetscInt          An, i, k;
88       PetscBool         done;
89 
90       CHKERRQ(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
91       CHKERRQ(MatSetRandom(T, NULL));
92       CHKERRQ(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
93       CHKERRQ(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
94       CHKERRQ(MatDestroy(&Tt));
95       CHKERRQ(MatDenseGetArrayRead(T, &ptr));
96       CHKERRQ(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
97       PetscCheckFalse(!done || An != m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
98       CHKERRQ(MatSeqAIJGetArray(A, &Aa));
99       CHKERRQ(MatCreate(PETSC_COMM_WORLD, &B));
100       CHKERRQ(MatSetType(B, MATSEQBAIJ));
101       CHKERRQ(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
102       CHKERRQ(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
103       for (i = 0; i < Ai[An]; ++i)
104         for (k = 0; k < bs[j] * bs[j]; ++k)
105           val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
106       CHKERRQ(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
107       CHKERRQ(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
108       CHKERRQ(PetscFree(val));
109       CHKERRQ(MatSeqAIJRestoreArray(A, &Aa));
110       CHKERRQ(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
111       CHKERRQ(MatDenseRestoreArrayRead(T, &ptr));
112       CHKERRQ(MatDestroy(&T));
113       CHKERRQ(MatDestroy(&A));
114       A    = B;
115     }
116     /* reconvert back to SeqAIJ before converting to the desired type later */
117     if (!convert) E = A;
118     CHKERRQ(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
119     CHKERRQ(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
120     for (PetscInt i = 0; i < ntype; ++i) {
121       char        *tmp;
122       PetscInt    *ia_ptr, *ja_ptr, k;
123       PetscScalar *a_ptr;
124 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
125       struct matrix_descr descr;
126       sparse_matrix_t     spr;
127       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
128       descr.diag = SPARSE_DIAG_NON_UNIT;
129 #endif
130       if (convert) {
131         CHKERRQ(MatDestroy(&A));
132       }
133       CHKERRQ(PetscStrstr(type[i], "mkl", &tmp));
134       if (tmp) {
135         size_t mlen, tlen;
136         char base[256];
137 
138         mkl  = PETSC_TRUE;
139         CHKERRQ(PetscStrlen(tmp, &mlen));
140         CHKERRQ(PetscStrlen(type[i], &tlen));
141         CHKERRQ(PetscStrncpy(base, type[i], tlen-mlen + 1));
142         CHKERRQ(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
143       } else {
144         mkl  = PETSC_FALSE;
145         CHKERRQ(PetscStrstr(type[i], "maij", &tmp));
146         if (!tmp) {
147           CHKERRQ(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148         } else {
149           CHKERRQ(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
150           maij = PETSC_TRUE;
151         }
152       }
153       CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
154       if (mkl) {
155         const PetscInt *Ai, *Aj;
156         PetscInt       An;
157         PetscBool      done;
158 
159         CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
160         PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
161         CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
162         CHKERRQ(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
163         PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
164         CHKERRQ(PetscMalloc1(An + 1, &ia_ptr));
165         CHKERRQ(PetscMalloc1(Ai[An], &ja_ptr));
166         if (flg) { /* SeqAIJ */
167           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
168           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
169           CHKERRQ(MatSeqAIJGetArray(A, &a_ptr));
170           PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
171         } else {
172           CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
173           if (flg) {
174             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
175             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
176             CHKERRQ(MatSeqBAIJGetArray(A, &a_ptr));
177             PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
178           } else {
179             CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
180             if (flg) {
181               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
182               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
183               CHKERRQ(MatSeqSBAIJGetArray(A, &a_ptr));
184               PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
185 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
186               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
187               descr.mode = SPARSE_FILL_MODE_UPPER;
188               descr.diag = SPARSE_DIAG_NON_UNIT;
189 #endif
190             }
191           }
192         }
193         CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
194         CHKERRQ(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
195       }
196 
197       CHKERRQ(MatViewFromOptions(A, NULL, "-A_view"));
198 
199       for (k = 0; k < nN; ++k) {
200         MatType       Atype, Ctype;
201         PetscInt      AM, AN, CM, CN, t;
202 #if defined(PETSC_USE_LOG)
203         PetscLogStage stage, tstage;
204         char          stage_s[256];
205 #endif
206 
207         CHKERRQ(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
208         CHKERRQ(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
209         CHKERRQ(MatSetRandom(C, NULL));
210         if (cuda) { /* convert to GPU if needed */
211           CHKERRQ(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
212           CHKERRQ(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
213         }
214         if (mkl) {
215           if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
216           else          PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
217           PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
218           PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
219         }
220         CHKERRQ(MatGetType(A, &Atype));
221         CHKERRQ(MatGetType(C, &Ctype));
222         CHKERRQ(MatGetSize(A, &AM, &AN));
223         CHKERRQ(MatGetSize(C, &CM, &CN));
224 
225 #if defined(PETSC_USE_LOG)
226         if (!maij || N[k] > 1) {
227           CHKERRQ(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
228           CHKERRQ(PetscLogStageRegister(stage_s, &stage));
229         }
230         if (trans && N[k] > 1) {
231           CHKERRQ(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
232           CHKERRQ(PetscLogStageRegister(stage_s, &tstage));
233         }
234 #endif
235         /* A*B */
236         if (N[k] > 1) {
237           if (!maij) {
238             CHKERRQ(MatProductCreateWithMat(A, C, NULL, D));
239             CHKERRQ(MatProductSetType(D, MATPRODUCT_AB));
240             CHKERRQ(MatProductSetFromOptions(D));
241             CHKERRQ(MatProductSymbolic(D));
242           }
243 
244           if (!mkl) {
245             if (!maij) {
246               CHKERRQ(MatProductNumeric(D));
247               CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
248               CHKERRQ(PetscLogStagePush(stage));
249               for (t = 0; t < trial; ++t) {
250                 CHKERRQ(MatProductNumeric(D));
251               }
252               CHKERRQ(PetscLogStagePop());
253             } else {
254               Mat               E, Ct, Dt;
255               Vec               cC, cD;
256               const PetscScalar *c_ptr;
257               PetscScalar       *d_ptr;
258               CHKERRQ(MatCreateMAIJ(A, N[k], &E));
259               CHKERRQ(MatDenseGetLocalMatrix(C, &Ct));
260               CHKERRQ(MatDenseGetLocalMatrix(D, &Dt));
261               CHKERRQ(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
262               CHKERRQ(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
263               CHKERRQ(MatDenseGetArrayRead(Ct, &c_ptr));
264               CHKERRQ(MatDenseGetArrayWrite(Dt, &d_ptr));
265               CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
266               CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
267               CHKERRQ(MatMult(E, cC, cD));
268               CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
269               CHKERRQ(PetscLogStagePush(stage));
270               for (t = 0; t < trial; ++t) {
271                 CHKERRQ(MatMult(E, cC, cD));
272               }
273               CHKERRQ(PetscLogStagePop());
274               CHKERRQ(VecDestroy(&cD));
275               CHKERRQ(VecDestroy(&cC));
276               CHKERRQ(MatDestroy(&E));
277               CHKERRQ(MatDenseRestoreArrayWrite(Dt, &d_ptr));
278               CHKERRQ(MatDenseRestoreArrayRead(Ct, &c_ptr));
279               CHKERRQ(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
280               CHKERRQ(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
281             }
282           } else {
283             const PetscScalar *c_ptr;
284             PetscScalar       *d_ptr;
285 
286             CHKERRQ(MatDenseGetArrayRead(C, &c_ptr));
287             CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr));
288             PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
289             CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
290             CHKERRQ(PetscLogStagePush(stage));
291             for (t = 0; t < trial; ++t) {
292               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
293             }
294             CHKERRQ(PetscLogStagePop());
295             CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr));
296             CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr));
297           }
298         } else if (maij) {
299           CHKERRQ(MatDestroy(&C));
300           CHKERRQ(MatDestroy(&D));
301           continue;
302         } else if (!mkl) {
303           Vec cC, cD;
304 
305           CHKERRQ(MatDenseGetColumnVecRead(C, 0, &cC));
306           CHKERRQ(MatDenseGetColumnVecWrite(D, 0, &cD));
307           CHKERRQ(MatMult(A, cC, cD));
308           CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
309           CHKERRQ(PetscLogStagePush(stage));
310           for (t = 0; t < trial; ++t) {
311             CHKERRQ(MatMult(A, cC, cD));
312           }
313           CHKERRQ(PetscLogStagePop());
314           CHKERRQ(MatDenseRestoreColumnVecRead(C, 0, &cC));
315           CHKERRQ(MatDenseRestoreColumnVecWrite(D, 0, &cD));
316         } else {
317           const PetscScalar *c_ptr;
318           PetscScalar       *d_ptr;
319 
320           CHKERRQ(MatDenseGetArrayRead(C, &c_ptr));
321           CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr));
322           CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
323           PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324           CHKERRQ(PetscLogStagePush(stage));
325           for (t = 0; t < trial; ++t) {
326             PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
327           }
328           CHKERRQ(PetscLogStagePop());
329           CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr));
330           CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr));
331         }
332 
333         if (check) {
334           CHKERRQ(MatMatMultEqual(A, C, D, 10, &flg));
335           if (!flg) {
336             MatType Dtype;
337 
338             CHKERRQ(MatGetType(D, &Dtype));
339             CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
340           }
341         }
342 
343         /* MKL implementation seems buggy for ABt */
344         /* A*Bt */
345         if (!mkl && trans && N[k] > 1) {
346           CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
347           if (flg) {
348             CHKERRQ(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
349             CHKERRQ(MatGetType(C, &Ctype));
350             if (!mkl) {
351               CHKERRQ(MatProductCreateWithMat(A, C, NULL, D));
352               CHKERRQ(MatProductSetType(D, MATPRODUCT_ABt));
353               CHKERRQ(MatProductSetFromOptions(D));
354               CHKERRQ(MatProductSymbolic(D));
355               CHKERRQ(MatProductNumeric(D));
356               CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
357               CHKERRQ(PetscLogStagePush(tstage));
358               for (t = 0; t < trial; ++t) {
359                 CHKERRQ(MatProductNumeric(D));
360               }
361               CHKERRQ(PetscLogStagePop());
362             } else {
363               const PetscScalar *c_ptr;
364               PetscScalar       *d_ptr;
365 
366               PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
367               PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
368               CHKERRQ(MatDenseGetArrayRead(C, &c_ptr));
369               CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr));
370               CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
371               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
372               CHKERRQ(PetscLogStagePush(stage));
373               for (t = 0; t < trial; ++t) {
374                 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
375               }
376               CHKERRQ(PetscLogStagePop());
377               CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr));
378               CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr));
379             }
380           }
381         }
382 
383         if (!mkl && trans && N[k] > 1 && flg && check) {
384           CHKERRQ(MatMatTransposeMultEqual(A, C, D, 10, &flg));
385           if (!flg) {
386             MatType Dtype;
387             CHKERRQ(MatGetType(D, &Dtype));
388             CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
389           }
390         }
391         CHKERRQ(MatDestroy(&C));
392         CHKERRQ(MatDestroy(&D));
393       }
394       if (mkl) {
395         PetscStackCallMKLSparse(mkl_sparse_destroy, (spr));
396         CHKERRQ(PetscFree(ia_ptr));
397         CHKERRQ(PetscFree(ja_ptr));
398       }
399       if (cuda && i != ntype - 1) {
400         CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
401         break;
402       }
403     }
404     if (E != A) {
405       CHKERRQ(MatDestroy(&E));
406     }
407     CHKERRQ(MatDestroy(&A));
408   }
409   for (m = 0; m < ntype; ++m) {
410     CHKERRQ(PetscFree(type[m]));
411   }
412   ierr = PetscFinalize();
413   return 0;
414 }
415 
416 /*TEST
417    build:
418      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
419 
420    testset:
421      nsize: 1
422      filter: sed "/Benchmarking/d"
423      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output}
424      test:
425        suffix: basic
426        args: -type aij,sbaij,baij
427        output_file: output/ex237.out
428      test:
429        suffix: maij
430        args: -type aij,maij
431        output_file: output/ex237.out
432      test:
433        suffix: cuda
434        requires: cuda
435        args: -type aij,aijcusparse
436        output_file: output/ex237.out
437      test:
438        suffix: mkl
439        requires: mkl_sparse_optimize
440        args: -type aij,aijmkl,baijmkl,sbaijmkl
441        output_file: output/ex237.out
442 
443 TEST*/
444