1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n";
2
3 /*
4 See the paper below for more information
5
6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods",
7 P. Jolivet, J. E. Roman, and S. Zampini (2020).
8 */
9
10 #include <petsc.h>
11
12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
13 #include <mkl.h>
14 #define PetscCallMKLSparse(func, args) \
15 do { \
16 sparse_status_t __ierr; \
17 PetscStackPushExternal(#func); \
18 __ierr = func args; \
19 PetscStackPop; \
20 PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \
21 } while (0)
22 #else
23 #define PetscCallMKLSparse(func, args) \
24 do { \
25 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \
26 } while (0)
27 #endif
28
main(int argc,char ** argv)29 int main(int argc, char **argv)
30 {
31 Mat A, C, D, E;
32 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5;
33 PetscViewer viewer;
34 PetscInt bs[10], N[8];
35 char *type[10];
36 PetscMPIInt size;
37 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl;
38 char file[PETSC_MAX_PATH_LEN];
39
40 PetscFunctionBeginUser;
41 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
42 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
43 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only");
44 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg));
45 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option");
46 PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL));
47 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg));
48 if (!flg) {
49 nbs = 1;
50 bs[0] = 1;
51 }
52 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg));
53 if (!flg) {
54 nN = 8;
55 N[0] = 1;
56 N[1] = 2;
57 N[2] = 4;
58 N[3] = 8;
59 N[4] = 16;
60 N[5] = 32;
61 N[6] = 64;
62 N[7] = 128;
63 }
64 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg));
65 if (!flg) {
66 ntype = 1;
67 PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0]));
68 }
69 PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL));
70 PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL));
71 PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL));
72 PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL));
73 for (PetscInt j = 0; j < nbs; ++j) {
74 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
75 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
76 PetscCall(MatSetFromOptions(A));
77 PetscCall(MatLoad(A, viewer));
78 PetscCall(PetscViewerDestroy(&viewer));
79 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
80 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix");
81 PetscCall(MatGetSize(A, &m, &M));
82 if (m == M) {
83 Mat oA;
84 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA));
85 PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN));
86 PetscCall(MatDestroy(&oA));
87 }
88 PetscCall(MatGetLocalSize(A, &m, NULL));
89 PetscCall(MatGetSize(A, &M, NULL));
90 if (bs[j] > 1) {
91 Mat T, Tt, B;
92 const PetscScalar *ptr;
93 PetscScalar *val, *Aa;
94 const PetscInt *Ai, *Aj;
95 PetscInt An, i, k;
96 PetscBool done;
97
98 PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T));
99 PetscCall(MatSetRandom(T, NULL));
100 PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt));
101 PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN));
102 PetscCall(MatDestroy(&Tt));
103 PetscCall(MatDenseGetArrayRead(T, &ptr));
104 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
105 PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
106 PetscCall(MatSeqAIJGetArray(A, &Aa));
107 PetscCall(MatCreate(PETSC_COMM_WORLD, &B));
108 PetscCall(MatSetType(B, MATSEQBAIJ));
109 PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE));
110 PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val));
111 for (i = 0; i < Ai[An]; ++i)
112 for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k];
113 PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE));
114 PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val));
115 PetscCall(PetscFree(val));
116 PetscCall(MatSeqAIJRestoreArray(A, &Aa));
117 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done));
118 PetscCall(MatDenseRestoreArrayRead(T, &ptr));
119 PetscCall(MatDestroy(&T));
120 PetscCall(MatDestroy(&A));
121 A = B;
122 }
123 /* reconvert back to SeqAIJ before converting to the desired type later */
124 if (!convert) E = A;
125 PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E));
126 PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE));
127 for (PetscInt i = 0; i < ntype; ++i) {
128 char *tmp = NULL;
129 PetscInt *ia_ptr, *ja_ptr, k;
130 PetscScalar *a_ptr;
131 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
132 struct matrix_descr descr;
133 sparse_matrix_t spr;
134 descr.type = SPARSE_MATRIX_TYPE_GENERAL;
135 descr.diag = SPARSE_DIAG_NON_UNIT;
136 #endif
137 if (convert) PetscCall(MatDestroy(&A));
138 PetscCall(PetscStrstr(type[i], "mkl", &tmp));
139 if (tmp) {
140 size_t mlen, tlen;
141 char base[256];
142
143 mkl = PETSC_TRUE;
144 PetscCall(PetscStrlen(tmp, &mlen));
145 PetscCall(PetscStrlen(type[i], &tlen));
146 PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1));
147 PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
148 } else {
149 mkl = PETSC_FALSE;
150 PetscCall(PetscStrstr(type[i], "maij", &tmp));
151 if (!tmp) {
152 PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
153 } else {
154 PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A));
155 maij = PETSC_TRUE;
156 }
157 }
158 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
159 if (mkl) {
160 const PetscInt *Ai, *Aj;
161 PetscInt An;
162 PetscBool done;
163
164 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, ""));
165 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented");
166 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
167 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
168 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes");
169 PetscCall(PetscMalloc1(An + 1, &ia_ptr));
170 PetscCall(PetscMalloc1(Ai[An], &ja_ptr));
171 if (flg) { /* SeqAIJ */
172 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k];
173 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k];
174 PetscCall(MatSeqAIJGetArray(A, &a_ptr));
175 PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
176 } else {
177 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg));
178 if (flg) {
179 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
180 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
181 PetscCall(MatSeqBAIJGetArray(A, &a_ptr));
182 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
183 } else {
184 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg));
185 if (flg) {
186 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
187 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */
188 PetscCall(MatSeqSBAIJGetArray(A, &a_ptr));
189 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr));
190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
191 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
192 descr.mode = SPARSE_FILL_MODE_UPPER;
193 descr.diag = SPARSE_DIAG_NON_UNIT;
194 #endif
195 }
196 }
197 }
198 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg));
199 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done));
200 }
201
202 PetscCall(MatViewFromOptions(A, NULL, "-A_view"));
203
204 for (k = 0; k < nN; ++k) {
205 MatType Atype, Ctype;
206 PetscInt AM, AN, CM, CN, t;
207 PetscBool set = PETSC_FALSE;
208 PetscLogStage ab_stage, abt_stage, atb_stage;
209 char stage_s[256];
210
211 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C));
212 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D));
213 PetscCall(MatSetRandom(C, NULL));
214 if (cuda) { /* convert to GPU if needed */
215 PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C));
216 PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D));
217 }
218 if (mkl) {
219 if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial));
220 else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial));
221 PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE));
222 PetscCallMKLSparse(mkl_sparse_optimize, (spr));
223 }
224 PetscCall(MatGetType(A, &Atype));
225 PetscCall(MatGetType(C, &Ctype));
226 PetscCall(MatGetSize(A, &AM, &AN));
227 PetscCall(MatGetSize(C, &CM, &CN));
228
229 if (!maij || N[k] > 1) {
230 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
231 PetscCall(PetscLogStageRegister(stage_s, &ab_stage));
232 }
233 if (abt && N[k] > 1) {
234 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
235 PetscCall(PetscLogStageRegister(stage_s, &abt_stage));
236 }
237 if (atb) {
238 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k]));
239 PetscCall(PetscLogStageRegister(stage_s, &atb_stage));
240 }
241 /* A*B */
242 if (N[k] > 1) {
243 if (!maij) {
244 PetscCall(MatProductCreateWithMat(A, C, NULL, D));
245 PetscCall(MatProductSetType(D, MATPRODUCT_AB));
246 PetscCall(MatProductSetFromOptions(D));
247 PetscCall(MatProductSymbolic(D));
248 }
249
250 if (!mkl) {
251 if (!maij) {
252 PetscCall(MatProductNumeric(D));
253 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN));
254 PetscCall(PetscLogStagePush(ab_stage));
255 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
256 PetscCall(PetscLogStagePop());
257 } else {
258 Mat E, Ct, Dt;
259 Vec cC, cD;
260 const PetscScalar *c_ptr;
261 PetscScalar *d_ptr;
262 PetscCall(MatCreateMAIJ(A, N[k], &E));
263 PetscCall(MatDenseGetLocalMatrix(C, &Ct));
264 PetscCall(MatDenseGetLocalMatrix(D, &Dt));
265 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
266 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
267 PetscCall(MatDenseGetArrayRead(Ct, &c_ptr));
268 PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr));
269 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC));
270 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD));
271 PetscCall(MatMult(E, cC, cD));
272 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1));
273 PetscCall(PetscLogStagePush(ab_stage));
274 for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD));
275 PetscCall(PetscLogStagePop());
276 PetscCall(VecDestroy(&cD));
277 PetscCall(VecDestroy(&cC));
278 PetscCall(MatDestroy(&E));
279 PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr));
280 PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr));
281 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct));
282 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt));
283 }
284 } else {
285 const PetscScalar *c_ptr;
286 PetscScalar *d_ptr;
287
288 PetscCall(MatDenseGetArrayRead(C, &c_ptr));
289 PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
290 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
291 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
292 PetscCall(PetscLogStagePush(ab_stage));
293 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
294 PetscCall(PetscLogStagePop());
295 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
296 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
297 }
298 } else if (maij) {
299 PetscCall(MatDestroy(&C));
300 PetscCall(MatDestroy(&D));
301 continue;
302 } else if (!mkl) {
303 Vec cC, cD;
304
305 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
306 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
307 PetscCall(MatMult(A, cC, cD));
308 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
309 PetscCall(PetscLogStagePush(ab_stage));
310 for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD));
311 PetscCall(PetscLogStagePop());
312 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
313 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
314 } else {
315 const PetscScalar *c_ptr;
316 PetscScalar *d_ptr;
317
318 PetscCall(MatDenseGetArrayRead(C, &c_ptr));
319 PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
320 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
321 PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
322 PetscCall(PetscLogStagePush(ab_stage));
323 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr));
324 PetscCall(PetscLogStagePop());
325 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
326 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
327 }
328
329 if (check) {
330 PetscCall(MatMatMultEqual(A, C, D, 10, &flg));
331 if (!flg) {
332 MatType Dtype;
333
334 PetscCall(MatGetType(D, &Dtype));
335 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
336 }
337 }
338
339 /* MKL implementation seems buggy for ABt */
340 /* A*Bt */
341 if (!mkl && abt && N[k] > 1) {
342 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, ""));
343 if (flg) {
344 PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C));
345 PetscCall(MatGetType(C, &Ctype));
346 if (!mkl) {
347 PetscCall(MatProductCreateWithMat(A, C, NULL, D));
348 PetscCall(MatProductSetType(D, MATPRODUCT_ABt));
349 PetscCall(MatProductSetFromOptions(D));
350 PetscCall(MatProductSymbolic(D));
351 PetscCall(MatProductNumeric(D));
352 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN));
353 PetscCall(PetscLogStagePush(abt_stage));
354 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
355 PetscCall(PetscLogStagePop());
356 } else {
357 const PetscScalar *c_ptr;
358 PetscScalar *d_ptr;
359
360 PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial));
361 PetscCallMKLSparse(mkl_sparse_optimize, (spr));
362 PetscCall(MatDenseGetArrayRead(C, &c_ptr));
363 PetscCall(MatDenseGetArrayWrite(D, &d_ptr));
364 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN));
365 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
366 PetscCall(PetscLogStagePush(ab_stage));
367 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM));
368 PetscCall(PetscLogStagePop());
369 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr));
370 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr));
371 }
372 }
373 }
374
375 if (!mkl && abt && N[k] > 1 && flg && check) {
376 PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg));
377 if (!flg) {
378 MatType Dtype;
379 PetscCall(MatGetType(D, &Dtype));
380 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
381 }
382 }
383
384 /* At*B */
385 if (!mkl && atb) {
386 PetscCall(MatIsSymmetricKnown(A, &set, &flg));
387 set = (PetscBool)(set && flg);
388 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE));
389 if (N[k] > 1) {
390 PetscCall(MatProductCreateWithMat(A, C, NULL, D));
391 PetscCall(MatProductSetType(D, MATPRODUCT_AtB));
392 PetscCall(MatProductSetFromOptions(D));
393 PetscCall(MatProductSymbolic(D));
394 PetscCall(MatProductNumeric(D));
395 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN));
396 PetscCall(PetscLogStagePush(atb_stage));
397 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D));
398 PetscCall(PetscLogStagePop());
399 } else {
400 Vec cC, cD;
401
402 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC));
403 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD));
404 PetscCall(MatMultTranspose(A, cC, cD));
405 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN));
406 PetscCall(PetscLogStagePush(atb_stage));
407 for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD));
408 PetscCall(PetscLogStagePop());
409 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC));
410 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD));
411 }
412 }
413
414 if (!mkl && atb && N[k] > 1 && check) {
415 PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg));
416 if (!flg) {
417 MatType Dtype;
418 PetscCall(MatGetType(D, &Dtype));
419 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]));
420 }
421 }
422 if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
423 PetscCall(MatDestroy(&C));
424 PetscCall(MatDestroy(&D));
425 }
426 if (mkl) {
427 PetscCallMKLSparse(mkl_sparse_destroy, (spr));
428 PetscCall(PetscFree(ia_ptr));
429 PetscCall(PetscFree(ja_ptr));
430 }
431 if (cuda && i != ntype - 1) {
432 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n"));
433 break;
434 }
435 }
436 if (E != A) PetscCall(MatDestroy(&E));
437 PetscCall(MatDestroy(&A));
438 }
439 for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m]));
440 PetscCall(PetscFinalize());
441 return 0;
442 }
443
444 /*TEST
445 build:
446 requires: double !complex !defined(PETSC_USE_64BIT_INDICES)
447
448 testset:
449 nsize: 1
450 filter: sed "/Benchmarking/d"
451 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output}
452 output_file: output/empty.out
453 test:
454 suffix: basic
455 args: -type aij,sbaij,baij
456 test:
457 suffix: maij
458 args: -type aij,maij
459 test:
460 suffix: cuda
461 requires: cuda
462 args: -type aij,aijcusparse
463 test:
464 suffix: mkl
465 requires: mkl_sparse_optimize
466 args: -type aij,aijmkl,baijmkl,sbaijmkl
467
468 test:
469 nsize: 1
470 filter: sed "/Benchmarking/d"
471 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij
472 output_file: output/empty.out
473
474 TEST*/
475