1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscStackCallMKLSparse(func, args) do { \ 15 sparse_status_t __ierr; \ 16 PetscStackPush(#func); \ 17 __ierr = func args; \ 18 PetscStackPop; \ 19 PetscCheckFalse(__ierr != SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20 } while (0) 21 #else 22 #define PetscStackCallMKLSparse(func, args) do { \ 23 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24 } while (0) 25 #endif 26 27 int main(int argc, char** argv) 28 { 29 Mat A, C, D, E; 30 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 31 PetscViewer viewer; 32 PetscInt bs[10], N[8]; 33 char *type[10]; 34 PetscMPIInt size; 35 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 36 char file[PETSC_MAX_PATH_LEN]; 37 38 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 39 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 40 PetscCheck(size == 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 41 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 42 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 43 PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 44 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 45 if (!flg) { 46 nbs = 1; 47 bs[0] = 1; 48 } 49 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 50 if (!flg) { 51 nN = 8; 52 N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 53 N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 54 } 55 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 56 if (!flg) { 57 ntype = 1; 58 PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 59 } 60 PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 61 PetscCall(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL)); 62 PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 63 for (PetscInt j = 0; j < nbs; ++j) { 64 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 65 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 66 PetscCall(MatSetFromOptions(A)); 67 PetscCall(MatLoad(A, viewer)); 68 PetscCall(PetscViewerDestroy(&viewer)); 69 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 70 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 71 PetscCall(MatGetSize(A, &m, &M)); 72 if (m == M) { 73 Mat oA; 74 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 75 PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 76 PetscCall(MatDestroy(&oA)); 77 } 78 PetscCall(MatGetLocalSize(A, &m, NULL)); 79 PetscCall(MatGetSize(A, &M, NULL)); 80 if (bs[j] > 1) { 81 Mat T, Tt, B; 82 const PetscScalar *ptr; 83 PetscScalar *val, *Aa; 84 const PetscInt *Ai, *Aj; 85 PetscInt An, i, k; 86 PetscBool done; 87 88 PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 89 PetscCall(MatSetRandom(T, NULL)); 90 PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 91 PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 92 PetscCall(MatDestroy(&Tt)); 93 PetscCall(MatDenseGetArrayRead(T, &ptr)); 94 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 95 PetscCheck(done && An == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 96 PetscCall(MatSeqAIJGetArray(A, &Aa)); 97 PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 98 PetscCall(MatSetType(B, MATSEQBAIJ)); 99 PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 100 PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 101 for (i = 0; i < Ai[An]; ++i) 102 for (k = 0; k < bs[j] * bs[j]; ++k) 103 val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 104 PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 105 PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 106 PetscCall(PetscFree(val)); 107 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 108 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 109 PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 110 PetscCall(MatDestroy(&T)); 111 PetscCall(MatDestroy(&A)); 112 A = B; 113 } 114 /* reconvert back to SeqAIJ before converting to the desired type later */ 115 if (!convert) E = A; 116 PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 117 PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 118 for (PetscInt i = 0; i < ntype; ++i) { 119 char *tmp; 120 PetscInt *ia_ptr, *ja_ptr, k; 121 PetscScalar *a_ptr; 122 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 123 struct matrix_descr descr; 124 sparse_matrix_t spr; 125 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 126 descr.diag = SPARSE_DIAG_NON_UNIT; 127 #endif 128 if (convert) { 129 PetscCall(MatDestroy(&A)); 130 } 131 PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 132 if (tmp) { 133 size_t mlen, tlen; 134 char base[256]; 135 136 mkl = PETSC_TRUE; 137 PetscCall(PetscStrlen(tmp, &mlen)); 138 PetscCall(PetscStrlen(type[i], &tlen)); 139 PetscCall(PetscStrncpy(base, type[i], tlen-mlen + 1)); 140 PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 141 } else { 142 mkl = PETSC_FALSE; 143 PetscCall(PetscStrstr(type[i], "maij", &tmp)); 144 if (!tmp) { 145 PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 146 } else { 147 PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 148 maij = PETSC_TRUE; 149 } 150 } 151 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 152 if (mkl) { 153 const PetscInt *Ai, *Aj; 154 PetscInt An; 155 PetscBool done; 156 157 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 158 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 159 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 160 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 161 PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 162 PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 163 PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 164 if (flg) { /* SeqAIJ */ 165 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 166 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 167 PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 168 PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 169 } else { 170 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 171 if (flg) { 172 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 173 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 174 PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 175 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 176 } else { 177 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 178 if (flg) { 179 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 180 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181 PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 182 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 183 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 184 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 185 descr.mode = SPARSE_FILL_MODE_UPPER; 186 descr.diag = SPARSE_DIAG_NON_UNIT; 187 #endif 188 } 189 } 190 } 191 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 192 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 193 } 194 195 PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 196 197 for (k = 0; k < nN; ++k) { 198 MatType Atype, Ctype; 199 PetscInt AM, AN, CM, CN, t; 200 #if defined(PETSC_USE_LOG) 201 PetscLogStage stage, tstage; 202 char stage_s[256]; 203 #endif 204 205 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 206 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 207 PetscCall(MatSetRandom(C, NULL)); 208 if (cuda) { /* convert to GPU if needed */ 209 PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 210 PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 211 } 212 if (mkl) { 213 if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 214 else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 215 PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 216 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 217 } 218 PetscCall(MatGetType(A, &Atype)); 219 PetscCall(MatGetType(C, &Ctype)); 220 PetscCall(MatGetSize(A, &AM, &AN)); 221 PetscCall(MatGetSize(C, &CM, &CN)); 222 223 #if defined(PETSC_USE_LOG) 224 if (!maij || N[k] > 1) { 225 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 226 PetscCall(PetscLogStageRegister(stage_s, &stage)); 227 } 228 if (trans && N[k] > 1) { 229 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 230 PetscCall(PetscLogStageRegister(stage_s, &tstage)); 231 } 232 #endif 233 /* A*B */ 234 if (N[k] > 1) { 235 if (!maij) { 236 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 237 PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 238 PetscCall(MatProductSetFromOptions(D)); 239 PetscCall(MatProductSymbolic(D)); 240 } 241 242 if (!mkl) { 243 if (!maij) { 244 PetscCall(MatProductNumeric(D)); 245 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 246 PetscCall(PetscLogStagePush(stage)); 247 for (t = 0; t < trial; ++t) { 248 PetscCall(MatProductNumeric(D)); 249 } 250 PetscCall(PetscLogStagePop()); 251 } else { 252 Mat E, Ct, Dt; 253 Vec cC, cD; 254 const PetscScalar *c_ptr; 255 PetscScalar *d_ptr; 256 PetscCall(MatCreateMAIJ(A, N[k], &E)); 257 PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 258 PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 259 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 260 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 261 PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 262 PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 263 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 264 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 265 PetscCall(MatMult(E, cC, cD)); 266 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 267 PetscCall(PetscLogStagePush(stage)); 268 for (t = 0; t < trial; ++t) { 269 PetscCall(MatMult(E, cC, cD)); 270 } 271 PetscCall(PetscLogStagePop()); 272 PetscCall(VecDestroy(&cD)); 273 PetscCall(VecDestroy(&cC)); 274 PetscCall(MatDestroy(&E)); 275 PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 276 PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 277 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 278 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 279 } 280 } else { 281 const PetscScalar *c_ptr; 282 PetscScalar *d_ptr; 283 284 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 285 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 286 PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 287 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 288 PetscCall(PetscLogStagePush(stage)); 289 for (t = 0; t < trial; ++t) { 290 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 291 } 292 PetscCall(PetscLogStagePop()); 293 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 294 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 295 } 296 } else if (maij) { 297 PetscCall(MatDestroy(&C)); 298 PetscCall(MatDestroy(&D)); 299 continue; 300 } else if (!mkl) { 301 Vec cC, cD; 302 303 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 304 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 305 PetscCall(MatMult(A, cC, cD)); 306 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 307 PetscCall(PetscLogStagePush(stage)); 308 for (t = 0; t < trial; ++t) { 309 PetscCall(MatMult(A, cC, cD)); 310 } 311 PetscCall(PetscLogStagePop()); 312 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 313 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 314 } else { 315 const PetscScalar *c_ptr; 316 PetscScalar *d_ptr; 317 318 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 319 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 320 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 321 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 322 PetscCall(PetscLogStagePush(stage)); 323 for (t = 0; t < trial; ++t) { 324 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 325 } 326 PetscCall(PetscLogStagePop()); 327 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 328 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 329 } 330 331 if (check) { 332 PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 333 if (!flg) { 334 MatType Dtype; 335 336 PetscCall(MatGetType(D, &Dtype)); 337 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 338 } 339 } 340 341 /* MKL implementation seems buggy for ABt */ 342 /* A*Bt */ 343 if (!mkl && trans && N[k] > 1) { 344 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 345 if (flg) { 346 PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 347 PetscCall(MatGetType(C, &Ctype)); 348 if (!mkl) { 349 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 350 PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 351 PetscCall(MatProductSetFromOptions(D)); 352 PetscCall(MatProductSymbolic(D)); 353 PetscCall(MatProductNumeric(D)); 354 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 355 PetscCall(PetscLogStagePush(tstage)); 356 for (t = 0; t < trial; ++t) { 357 PetscCall(MatProductNumeric(D)); 358 } 359 PetscCall(PetscLogStagePop()); 360 } else { 361 const PetscScalar *c_ptr; 362 PetscScalar *d_ptr; 363 364 PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 365 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 366 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 367 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 368 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 369 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 370 PetscCall(PetscLogStagePush(stage)); 371 for (t = 0; t < trial; ++t) { 372 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 373 } 374 PetscCall(PetscLogStagePop()); 375 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 376 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 377 } 378 } 379 } 380 381 if (!mkl && trans && N[k] > 1 && flg && check) { 382 PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 383 if (!flg) { 384 MatType Dtype; 385 PetscCall(MatGetType(D, &Dtype)); 386 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 387 } 388 } 389 PetscCall(MatDestroy(&C)); 390 PetscCall(MatDestroy(&D)); 391 } 392 if (mkl) { 393 PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 394 PetscCall(PetscFree(ia_ptr)); 395 PetscCall(PetscFree(ja_ptr)); 396 } 397 if (cuda && i != ntype - 1) { 398 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 399 break; 400 } 401 } 402 if (E != A) PetscCall(MatDestroy(&E)); 403 PetscCall(MatDestroy(&A)); 404 } 405 for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 406 PetscCall(PetscFinalize()); 407 return 0; 408 } 409 410 /*TEST 411 build: 412 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 413 414 testset: 415 nsize: 1 416 filter: sed "/Benchmarking/d" 417 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 418 test: 419 suffix: basic 420 args: -type aij,sbaij,baij 421 output_file: output/ex237.out 422 test: 423 suffix: maij 424 args: -type aij,maij 425 output_file: output/ex237.out 426 test: 427 suffix: cuda 428 requires: cuda 429 args: -type aij,aijcusparse 430 output_file: output/ex237.out 431 test: 432 suffix: mkl 433 requires: mkl_sparse_optimize 434 args: -type aij,aijmkl,baijmkl,sbaijmkl 435 output_file: output/ex237.out 436 437 TEST*/ 438