xref: /petsc/src/mat/tests/ex237.c (revision 71383f7570b59a18517995bd8cb04ed04c3be442)
1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
2 
3 /*
4   See the paper below for more information
5 
6    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7    P. Jolivet, J. E. Roman, and S. Zampini (2020).
8 */
9 
10 #include <petsc.h>
11 
12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13   #include <mkl.h>
14   #define PetscCallMKLSparse(func, args) \
15     do { \
16       sparse_status_t __ierr; \
17       PetscStackPushExternal(#func); \
18       __ierr = func args; \
19       PetscStackPop; \
20       PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
21     } while (0)
22 #else
23   #define PetscCallMKLSparse(func, args) \
24     do { \
25       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
26     } while (0)
27 #endif
28 
main(int argc,char ** argv)29 int main(int argc, char **argv)
30 {
31   Mat         A, C, D, E;
32   PetscInt    nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
33   PetscViewer viewer;
34   PetscInt    bs[10], N[8];
35   char       *type[10];
36   PetscMPIInt size;
37   PetscBool   flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl;
38   char        file[PETSC_MAX_PATH_LEN];
39 
40   PetscFunctionBeginUser;
41   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
42   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
43   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
44   PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
45   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
46   PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
47   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
48   if (!flg) {
49     nbs   = 1;
50     bs[0] = 1;
51   }
52   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
53   if (!flg) {
54     nN   = 8;
55     N[0] = 1;
56     N[1] = 2;
57     N[2] = 4;
58     N[3] = 8;
59     N[4] = 16;
60     N[5] = 32;
61     N[6] = 64;
62     N[7] = 128;
63   }
64   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
65   if (!flg) {
66     ntype = 1;
67     PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0]));
68   }
69   PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
70   PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL));
71   PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL));
72   PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
73   for (PetscInt j = 0; j < nbs; ++j) {
74     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
75     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
76     PetscCall(MatSetFromOptions(A));
77     PetscCall(MatLoad(A, viewer));
78     PetscCall(PetscViewerDestroy(&viewer));
79     PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
80     PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
81     PetscCall(MatGetSize(A, &m, &M));
82     if (m == M) {
83       Mat oA;
84       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
85       PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
86       PetscCall(MatDestroy(&oA));
87     }
88     PetscCall(MatGetLocalSize(A, &m, NULL));
89     PetscCall(MatGetSize(A, &M, NULL));
90     if (bs[j] > 1) {
91       Mat                T, Tt, B;
92       const PetscScalar *ptr;
93       PetscScalar       *val, *Aa;
94       const PetscInt    *Ai, *Aj;
95       PetscInt           An, i, k;
96       PetscBool          done;
97 
98       PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
99       PetscCall(MatSetRandom(T, NULL));
100       PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
101       PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
102       PetscCall(MatDestroy(&Tt));
103       PetscCall(MatDenseGetArrayRead(T, &ptr));
104       PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
105       PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
106       PetscCall(MatSeqAIJGetArray(A, &Aa));
107       PetscCall(MatCreate(PETSC_COMM_WORLD, &B));
108       PetscCall(MatSetType(B, MATSEQBAIJ));
109       PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
110       PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
111       for (i = 0; i < Ai[An]; ++i)
112         for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
113       PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
114       PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
115       PetscCall(PetscFree(val));
116       PetscCall(MatSeqAIJRestoreArray(A, &Aa));
117       PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
118       PetscCall(MatDenseRestoreArrayRead(T, &ptr));
119       PetscCall(MatDestroy(&T));
120       PetscCall(MatDestroy(&A));
121       A = B;
122     }
123     /* reconvert back to SeqAIJ before converting to the desired type later */
124     if (!convert) E = A;
125     PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
126     PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
127     for (PetscInt i = 0; i < ntype; ++i) {
128       char        *tmp = NULL;
129       PetscInt    *ia_ptr, *ja_ptr, k;
130       PetscScalar *a_ptr;
131 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132       struct matrix_descr descr;
133       sparse_matrix_t     spr;
134       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135       descr.diag = SPARSE_DIAG_NON_UNIT;
136 #endif
137       if (convert) PetscCall(MatDestroy(&A));
138       PetscCall(PetscStrstr(type[i], "mkl", &tmp));
139       if (tmp) {
140         size_t mlen, tlen;
141         char   base[256];
142 
143         mkl = PETSC_TRUE;
144         PetscCall(PetscStrlen(tmp, &mlen));
145         PetscCall(PetscStrlen(type[i], &tlen));
146         PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1));
147         PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148       } else {
149         mkl = PETSC_FALSE;
150         PetscCall(PetscStrstr(type[i], "maij", &tmp));
151         if (!tmp) {
152           PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
153         } else {
154           PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
155           maij = PETSC_TRUE;
156         }
157       }
158       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
159       if (mkl) {
160         const PetscInt *Ai, *Aj;
161         PetscInt        An;
162         PetscBool       done;
163 
164         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
165         PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
166         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
167         PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
168         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
169         PetscCall(PetscMalloc1(An + 1, &ia_ptr));
170         PetscCall(PetscMalloc1(Ai[An], &ja_ptr));
171         if (flg) { /* SeqAIJ */
172           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
173           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
174           PetscCall(MatSeqAIJGetArray(A, &a_ptr));
175           PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
176         } else {
177           PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
178           if (flg) {
179             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181             PetscCall(MatSeqBAIJGetArray(A, &a_ptr));
182             PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
183           } else {
184             PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
185             if (flg) {
186               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
187               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
188               PetscCall(MatSeqSBAIJGetArray(A, &a_ptr));
189               PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
192               descr.mode = SPARSE_FILL_MODE_UPPER;
193               descr.diag = SPARSE_DIAG_NON_UNIT;
194 #endif
195             }
196           }
197         }
198         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
199         PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
200       }
201 
202       PetscCall(MatViewFromOptions(A, NULL, "-A_view"));
203 
204       for (k = 0; k < nN; ++k) {
205         MatType       Atype, Ctype;
206         PetscInt      AM, AN, CM, CN, t;
207         PetscBool     set = PETSC_FALSE;
208         PetscLogStage ab_stage, abt_stage, atb_stage;
209         char          stage_s[256];
210 
211         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
212         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
213         PetscCall(MatSetRandom(C, NULL));
214         if (cuda) { /* convert to GPU if needed */
215           PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
216           PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
217         }
218         if (mkl) {
219           if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
220           else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
221           PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
222           PetscCallMKLSparse(mkl_sparse_optimize, (spr));
223         }
224         PetscCall(MatGetType(A, &Atype));
225         PetscCall(MatGetType(C, &Ctype));
226         PetscCall(MatGetSize(A, &AM, &AN));
227         PetscCall(MatGetSize(C, &CM, &CN));
228 
229         if (!maij || N[k] > 1) {
230           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
231           PetscCall(PetscLogStageRegister(stage_s, &ab_stage));
232         }
233         if (abt && N[k] > 1) {
234           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
235           PetscCall(PetscLogStageRegister(stage_s, &abt_stage));
236         }
237         if (atb) {
238           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
239           PetscCall(PetscLogStageRegister(stage_s, &atb_stage));
240         }
241         /* A*B */
242         if (N[k] > 1) {
243           if (!maij) {
244             PetscCall(MatProductCreateWithMat(A, C, NULL, D));
245             PetscCall(MatProductSetType(D, MATPRODUCT_AB));
246             PetscCall(MatProductSetFromOptions(D));
247             PetscCall(MatProductSymbolic(D));
248           }
249 
250           if (!mkl) {
251             if (!maij) {
252               PetscCall(MatProductNumeric(D));
253               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
254               PetscCall(PetscLogStagePush(ab_stage));
255               for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
256               PetscCall(PetscLogStagePop());
257             } else {
258               Mat                E, Ct, Dt;
259               Vec                cC, cD;
260               const PetscScalar *c_ptr;
261               PetscScalar       *d_ptr;
262               PetscCall(MatCreateMAIJ(A, N[k], &E));
263               PetscCall(MatDenseGetLocalMatrix(C, &Ct));
264               PetscCall(MatDenseGetLocalMatrix(D, &Dt));
265               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
266               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
267               PetscCall(MatDenseGetArrayRead(Ct, &c_ptr));
268               PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr));
269               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
270               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
271               PetscCall(MatMult(E, cC, cD));
272               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
273               PetscCall(PetscLogStagePush(ab_stage));
274               for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD));
275               PetscCall(PetscLogStagePop());
276               PetscCall(VecDestroy(&cD));
277               PetscCall(VecDestroy(&cC));
278               PetscCall(MatDestroy(&E));
279               PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr));
280               PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr));
281               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
282               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
283             }
284           } else {
285             const PetscScalar *c_ptr;
286             PetscScalar       *d_ptr;
287 
288             PetscCall(MatDenseGetArrayRead(C, &c_ptr));
289             PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
290             PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
291             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
292             PetscCall(PetscLogStagePush(ab_stage));
293             for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
294             PetscCall(PetscLogStagePop());
295             PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
296             PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
297           }
298         } else if (maij) {
299           PetscCall(MatDestroy(&C));
300           PetscCall(MatDestroy(&D));
301           continue;
302         } else if (!mkl) {
303           Vec cC, cD;
304 
305           PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
306           PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
307           PetscCall(MatMult(A, cC, cD));
308           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
309           PetscCall(PetscLogStagePush(ab_stage));
310           for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD));
311           PetscCall(PetscLogStagePop());
312           PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
313           PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
314         } else {
315           const PetscScalar *c_ptr;
316           PetscScalar       *d_ptr;
317 
318           PetscCall(MatDenseGetArrayRead(C, &c_ptr));
319           PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
320           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
321           PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322           PetscCall(PetscLogStagePush(ab_stage));
323           for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324           PetscCall(PetscLogStagePop());
325           PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
326           PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
327         }
328 
329         if (check) {
330           PetscCall(MatMatMultEqual(A, C, D, 10, &flg));
331           if (!flg) {
332             MatType Dtype;
333 
334             PetscCall(MatGetType(D, &Dtype));
335             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
336           }
337         }
338 
339         /* MKL implementation seems buggy for ABt */
340         /* A*Bt */
341         if (!mkl && abt && N[k] > 1) {
342           PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
343           if (flg) {
344             PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
345             PetscCall(MatGetType(C, &Ctype));
346             if (!mkl) {
347               PetscCall(MatProductCreateWithMat(A, C, NULL, D));
348               PetscCall(MatProductSetType(D, MATPRODUCT_ABt));
349               PetscCall(MatProductSetFromOptions(D));
350               PetscCall(MatProductSymbolic(D));
351               PetscCall(MatProductNumeric(D));
352               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
353               PetscCall(PetscLogStagePush(abt_stage));
354               for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
355               PetscCall(PetscLogStagePop());
356             } else {
357               const PetscScalar *c_ptr;
358               PetscScalar       *d_ptr;
359 
360               PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
361               PetscCallMKLSparse(mkl_sparse_optimize, (spr));
362               PetscCall(MatDenseGetArrayRead(C, &c_ptr));
363               PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
364               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
365               PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
366               PetscCall(PetscLogStagePush(ab_stage));
367               for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
368               PetscCall(PetscLogStagePop());
369               PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
370               PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
371             }
372           }
373         }
374 
375         if (!mkl && abt && N[k] > 1 && flg && check) {
376           PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg));
377           if (!flg) {
378             MatType Dtype;
379             PetscCall(MatGetType(D, &Dtype));
380             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
381           }
382         }
383 
384         /* At*B */
385         if (!mkl && atb) {
386           PetscCall(MatIsSymmetricKnown(A, &set, &flg));
387           set = (PetscBool)(set && flg);
388           PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE));
389           if (N[k] > 1) {
390             PetscCall(MatProductCreateWithMat(A, C, NULL, D));
391             PetscCall(MatProductSetType(D, MATPRODUCT_AtB));
392             PetscCall(MatProductSetFromOptions(D));
393             PetscCall(MatProductSymbolic(D));
394             PetscCall(MatProductNumeric(D));
395             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN));
396             PetscCall(PetscLogStagePush(atb_stage));
397             for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
398             PetscCall(PetscLogStagePop());
399           } else {
400             Vec cC, cD;
401 
402             PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
403             PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
404             PetscCall(MatMultTranspose(A, cC, cD));
405             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
406             PetscCall(PetscLogStagePush(atb_stage));
407             for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD));
408             PetscCall(PetscLogStagePop());
409             PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
410             PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
411           }
412         }
413 
414         if (!mkl && atb && N[k] > 1 && check) {
415           PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg));
416           if (!flg) {
417             MatType Dtype;
418             PetscCall(MatGetType(D, &Dtype));
419             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
420           }
421         }
422         if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
423         PetscCall(MatDestroy(&C));
424         PetscCall(MatDestroy(&D));
425       }
426       if (mkl) {
427         PetscCallMKLSparse(mkl_sparse_destroy, (spr));
428         PetscCall(PetscFree(ia_ptr));
429         PetscCall(PetscFree(ja_ptr));
430       }
431       if (cuda && i != ntype - 1) {
432         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
433         break;
434       }
435     }
436     if (E != A) PetscCall(MatDestroy(&E));
437     PetscCall(MatDestroy(&A));
438   }
439   for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m]));
440   PetscCall(PetscFinalize());
441   return 0;
442 }
443 
444 /*TEST
445    build:
446      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
447 
448    testset:
449      nsize: 1
450      filter: sed "/Benchmarking/d"
451      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output}
452      output_file: output/empty.out
453      test:
454        suffix: basic
455        args: -type aij,sbaij,baij
456      test:
457        suffix: maij
458        args: -type aij,maij
459      test:
460        suffix: cuda
461        requires: cuda
462        args: -type aij,aijcusparse
463      test:
464        suffix: mkl
465        requires: mkl_sparse_optimize
466        args: -type aij,aijmkl,baijmkl,sbaijmkl
467 
468    test:
469      nsize: 1
470      filter: sed "/Benchmarking/d"
471      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij
472      output_file: output/empty.out
473 
474 TEST*/
475