xref: /petsc/src/mat/tests/ex237.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
2 
3 /*
4   See the paper below for more information
5 
6    "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7    P. Jolivet, J. E. Roman, and S. Zampini (2020).
8 */
9 
10 #include <petsc.h>
11 
12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13 #include <mkl.h>
14 #define PetscStackCallMKLSparse(func, args) do {               \
15     sparse_status_t __ierr;                                    \
16     PetscStackPush(#func);                                     \
17     __ierr = func args;                                        \
18     PetscStackPop;                                             \
19     PetscCheckFalse(__ierr != SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
20   } while (0)
21 #else
22 #define PetscStackCallMKLSparse(func, args) do {               \
23     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
24   } while (0)
25 #endif
26 
27 int main(int argc, char** argv)
28 {
29   Mat          A, C, D, E;
30   PetscInt     nbs             = 10, ntype = 10, nN = 8, m, M, trial = 5;
31   PetscViewer  viewer;
32   PetscInt     bs[10], N[8];
33   char        *type[10];
34   PetscMPIInt  size;
35   PetscBool    flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl;
36   char         file[PETSC_MAX_PATH_LEN];
37 
38   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
39   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
40   PetscCheck(size == 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
41   PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
42   PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
43   PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
44   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
45   if (!flg) {
46     nbs = 1;
47     bs[0] = 1;
48   }
49   PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
50   if (!flg) {
51     nN = 8;
52     N[0] = 1;  N[1] = 2;  N[2] = 4;  N[3] = 8;
53     N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128;
54   }
55   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
56   if (!flg) {
57     ntype = 1;
58     PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0]));
59   }
60   PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
61   PetscCall(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL));
62   PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
63   for (PetscInt j = 0; j < nbs; ++j) {
64     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
65     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
66     PetscCall(MatSetFromOptions(A));
67     PetscCall(MatLoad(A, viewer));
68     PetscCall(PetscViewerDestroy(&viewer));
69     PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
70     PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
71     PetscCall(MatGetSize(A, &m, &M));
72     if (m == M) {
73       Mat oA;
74       PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
75       PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
76       PetscCall(MatDestroy(&oA));
77     }
78     PetscCall(MatGetLocalSize(A, &m, NULL));
79     PetscCall(MatGetSize(A, &M, NULL));
80     if (bs[j] > 1) {
81       Mat               T, Tt, B;
82       const PetscScalar *ptr;
83       PetscScalar       *val, *Aa;
84       const PetscInt    *Ai, *Aj;
85       PetscInt          An, i, k;
86       PetscBool         done;
87 
88       PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
89       PetscCall(MatSetRandom(T, NULL));
90       PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
91       PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
92       PetscCall(MatDestroy(&Tt));
93       PetscCall(MatDenseGetArrayRead(T, &ptr));
94       PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
95       PetscCheck(done && An == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
96       PetscCall(MatSeqAIJGetArray(A, &Aa));
97       PetscCall(MatCreate(PETSC_COMM_WORLD, &B));
98       PetscCall(MatSetType(B, MATSEQBAIJ));
99       PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
100       PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
101       for (i = 0; i < Ai[An]; ++i)
102         for (k = 0; k < bs[j] * bs[j]; ++k)
103           val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
104       PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
105       PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
106       PetscCall(PetscFree(val));
107       PetscCall(MatSeqAIJRestoreArray(A, &Aa));
108       PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
109       PetscCall(MatDenseRestoreArrayRead(T, &ptr));
110       PetscCall(MatDestroy(&T));
111       PetscCall(MatDestroy(&A));
112       A    = B;
113     }
114     /* reconvert back to SeqAIJ before converting to the desired type later */
115     if (!convert) E = A;
116     PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
117     PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
118     for (PetscInt i = 0; i < ntype; ++i) {
119       char        *tmp;
120       PetscInt    *ia_ptr, *ja_ptr, k;
121       PetscScalar *a_ptr;
122 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
123       struct matrix_descr descr;
124       sparse_matrix_t     spr;
125       descr.type = SPARSE_MATRIX_TYPE_GENERAL;
126       descr.diag = SPARSE_DIAG_NON_UNIT;
127 #endif
128       if (convert) {
129         PetscCall(MatDestroy(&A));
130       }
131       PetscCall(PetscStrstr(type[i], "mkl", &tmp));
132       if (tmp) {
133         size_t mlen, tlen;
134         char base[256];
135 
136         mkl  = PETSC_TRUE;
137         PetscCall(PetscStrlen(tmp, &mlen));
138         PetscCall(PetscStrlen(type[i], &tlen));
139         PetscCall(PetscStrncpy(base, type[i], tlen-mlen + 1));
140         PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
141       } else {
142         mkl  = PETSC_FALSE;
143         PetscCall(PetscStrstr(type[i], "maij", &tmp));
144         if (!tmp) {
145           PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
146         } else {
147           PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148           maij = PETSC_TRUE;
149         }
150       }
151       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
152       if (mkl) {
153         const PetscInt *Ai, *Aj;
154         PetscInt       An;
155         PetscBool      done;
156 
157         PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
158         PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
159         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
160         PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
161         PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
162         PetscCall(PetscMalloc1(An + 1, &ia_ptr));
163         PetscCall(PetscMalloc1(Ai[An], &ja_ptr));
164         if (flg) { /* SeqAIJ */
165           for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
166           for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
167           PetscCall(MatSeqAIJGetArray(A, &a_ptr));
168           PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
169         } else {
170           PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
171           if (flg) {
172             for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
173             for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
174             PetscCall(MatSeqBAIJGetArray(A, &a_ptr));
175             PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
176           } else {
177             PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
178             if (flg) {
179               for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180               for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181               PetscCall(MatSeqSBAIJGetArray(A, &a_ptr));
182               PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
184               descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
185               descr.mode = SPARSE_FILL_MODE_UPPER;
186               descr.diag = SPARSE_DIAG_NON_UNIT;
187 #endif
188             }
189           }
190         }
191         PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
192         PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
193       }
194 
195       PetscCall(MatViewFromOptions(A, NULL, "-A_view"));
196 
197       for (k = 0; k < nN; ++k) {
198         MatType       Atype, Ctype;
199         PetscInt      AM, AN, CM, CN, t;
200 #if defined(PETSC_USE_LOG)
201         PetscLogStage stage, tstage;
202         char          stage_s[256];
203 #endif
204 
205         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
206         PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
207         PetscCall(MatSetRandom(C, NULL));
208         if (cuda) { /* convert to GPU if needed */
209           PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
210           PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
211         }
212         if (mkl) {
213           if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
214           else          PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
215           PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
216           PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
217         }
218         PetscCall(MatGetType(A, &Atype));
219         PetscCall(MatGetType(C, &Ctype));
220         PetscCall(MatGetSize(A, &AM, &AN));
221         PetscCall(MatGetSize(C, &CM, &CN));
222 
223 #if defined(PETSC_USE_LOG)
224         if (!maij || N[k] > 1) {
225           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
226           PetscCall(PetscLogStageRegister(stage_s, &stage));
227         }
228         if (trans && N[k] > 1) {
229           PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
230           PetscCall(PetscLogStageRegister(stage_s, &tstage));
231         }
232 #endif
233         /* A*B */
234         if (N[k] > 1) {
235           if (!maij) {
236             PetscCall(MatProductCreateWithMat(A, C, NULL, D));
237             PetscCall(MatProductSetType(D, MATPRODUCT_AB));
238             PetscCall(MatProductSetFromOptions(D));
239             PetscCall(MatProductSymbolic(D));
240           }
241 
242           if (!mkl) {
243             if (!maij) {
244               PetscCall(MatProductNumeric(D));
245               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
246               PetscCall(PetscLogStagePush(stage));
247               for (t = 0; t < trial; ++t) {
248                 PetscCall(MatProductNumeric(D));
249               }
250               PetscCall(PetscLogStagePop());
251             } else {
252               Mat               E, Ct, Dt;
253               Vec               cC, cD;
254               const PetscScalar *c_ptr;
255               PetscScalar       *d_ptr;
256               PetscCall(MatCreateMAIJ(A, N[k], &E));
257               PetscCall(MatDenseGetLocalMatrix(C, &Ct));
258               PetscCall(MatDenseGetLocalMatrix(D, &Dt));
259               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
260               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
261               PetscCall(MatDenseGetArrayRead(Ct, &c_ptr));
262               PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr));
263               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
264               PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
265               PetscCall(MatMult(E, cC, cD));
266               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
267               PetscCall(PetscLogStagePush(stage));
268               for (t = 0; t < trial; ++t) {
269                 PetscCall(MatMult(E, cC, cD));
270               }
271               PetscCall(PetscLogStagePop());
272               PetscCall(VecDestroy(&cD));
273               PetscCall(VecDestroy(&cC));
274               PetscCall(MatDestroy(&E));
275               PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr));
276               PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr));
277               PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
278               PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
279             }
280           } else {
281             const PetscScalar *c_ptr;
282             PetscScalar       *d_ptr;
283 
284             PetscCall(MatDenseGetArrayRead(C, &c_ptr));
285             PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
286             PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
287             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
288             PetscCall(PetscLogStagePush(stage));
289             for (t = 0; t < trial; ++t) {
290               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
291             }
292             PetscCall(PetscLogStagePop());
293             PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
294             PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
295           }
296         } else if (maij) {
297           PetscCall(MatDestroy(&C));
298           PetscCall(MatDestroy(&D));
299           continue;
300         } else if (!mkl) {
301           Vec cC, cD;
302 
303           PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
304           PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
305           PetscCall(MatMult(A, cC, cD));
306           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
307           PetscCall(PetscLogStagePush(stage));
308           for (t = 0; t < trial; ++t) {
309             PetscCall(MatMult(A, cC, cD));
310           }
311           PetscCall(PetscLogStagePop());
312           PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
313           PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
314         } else {
315           const PetscScalar *c_ptr;
316           PetscScalar       *d_ptr;
317 
318           PetscCall(MatDenseGetArrayRead(C, &c_ptr));
319           PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
320           PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
321           PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322           PetscCall(PetscLogStagePush(stage));
323           for (t = 0; t < trial; ++t) {
324             PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
325           }
326           PetscCall(PetscLogStagePop());
327           PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
328           PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
329         }
330 
331         if (check) {
332           PetscCall(MatMatMultEqual(A, C, D, 10, &flg));
333           if (!flg) {
334             MatType Dtype;
335 
336             PetscCall(MatGetType(D, &Dtype));
337             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
338           }
339         }
340 
341         /* MKL implementation seems buggy for ABt */
342         /* A*Bt */
343         if (!mkl && trans && N[k] > 1) {
344           PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
345           if (flg) {
346             PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
347             PetscCall(MatGetType(C, &Ctype));
348             if (!mkl) {
349               PetscCall(MatProductCreateWithMat(A, C, NULL, D));
350               PetscCall(MatProductSetType(D, MATPRODUCT_ABt));
351               PetscCall(MatProductSetFromOptions(D));
352               PetscCall(MatProductSymbolic(D));
353               PetscCall(MatProductNumeric(D));
354               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
355               PetscCall(PetscLogStagePush(tstage));
356               for (t = 0; t < trial; ++t) {
357                 PetscCall(MatProductNumeric(D));
358               }
359               PetscCall(PetscLogStagePop());
360             } else {
361               const PetscScalar *c_ptr;
362               PetscScalar       *d_ptr;
363 
364               PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
365               PetscStackCallMKLSparse(mkl_sparse_optimize, (spr));
366               PetscCall(MatDenseGetArrayRead(C, &c_ptr));
367               PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
368               PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
369               PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
370               PetscCall(PetscLogStagePush(stage));
371               for (t = 0; t < trial; ++t) {
372                 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
373               }
374               PetscCall(PetscLogStagePop());
375               PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
376               PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
377             }
378           }
379         }
380 
381         if (!mkl && trans && N[k] > 1 && flg && check) {
382           PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg));
383           if (!flg) {
384             MatType Dtype;
385             PetscCall(MatGetType(D, &Dtype));
386             PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
387           }
388         }
389         PetscCall(MatDestroy(&C));
390         PetscCall(MatDestroy(&D));
391       }
392       if (mkl) {
393         PetscStackCallMKLSparse(mkl_sparse_destroy, (spr));
394         PetscCall(PetscFree(ia_ptr));
395         PetscCall(PetscFree(ja_ptr));
396       }
397       if (cuda && i != ntype - 1) {
398         PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
399         break;
400       }
401     }
402     if (E != A) PetscCall(MatDestroy(&E));
403     PetscCall(MatDestroy(&A));
404   }
405   for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m]));
406   PetscCall(PetscFinalize());
407   return 0;
408 }
409 
410 /*TEST
411    build:
412      requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
413 
414    testset:
415      nsize: 1
416      filter: sed "/Benchmarking/d"
417      args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output}
418      test:
419        suffix: basic
420        args: -type aij,sbaij,baij
421        output_file: output/ex237.out
422      test:
423        suffix: maij
424        args: -type aij,maij
425        output_file: output/ex237.out
426      test:
427        suffix: cuda
428        requires: cuda
429        args: -type aij,aijcusparse
430        output_file: output/ex237.out
431      test:
432        suffix: mkl
433        requires: mkl_sparse_optimize
434        args: -type aij,aijmkl,baijmkl,sbaijmkl
435        output_file: output/ex237.out
436 
437 TEST*/
438