1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscCallMKLSparse(func, args) \ 15 do { \ 16 sparse_status_t __ierr; \ 17 PetscStackPushExternal(#func); \ 18 __ierr = func args; \ 19 PetscStackPop; \ 20 PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 21 } while (0) 22 #else 23 #define PetscCallMKLSparse(func, args) \ 24 do { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); } while (0) 25 #endif 26 27 int main(int argc, char **argv) { 28 Mat A, C, D, E; 29 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 30 PetscViewer viewer; 31 PetscInt bs[10], N[8]; 32 char *type[10]; 33 PetscMPIInt size; 34 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 35 char file[PETSC_MAX_PATH_LEN]; 36 37 PetscFunctionBeginUser; 38 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 39 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 40 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 41 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 42 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 43 PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 44 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 45 if (!flg) { 46 nbs = 1; 47 bs[0] = 1; 48 } 49 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 50 if (!flg) { 51 nN = 8; 52 N[0] = 1; 53 N[1] = 2; 54 N[2] = 4; 55 N[3] = 8; 56 N[4] = 16; 57 N[5] = 32; 58 N[6] = 64; 59 N[7] = 128; 60 } 61 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 62 if (!flg) { 63 ntype = 1; 64 PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 65 } 66 PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 67 PetscCall(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL)); 68 PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 69 for (PetscInt j = 0; j < nbs; ++j) { 70 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 71 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 72 PetscCall(MatSetFromOptions(A)); 73 PetscCall(MatLoad(A, viewer)); 74 PetscCall(PetscViewerDestroy(&viewer)); 75 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 76 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 77 PetscCall(MatGetSize(A, &m, &M)); 78 if (m == M) { 79 Mat oA; 80 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 81 PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 82 PetscCall(MatDestroy(&oA)); 83 } 84 PetscCall(MatGetLocalSize(A, &m, NULL)); 85 PetscCall(MatGetSize(A, &M, NULL)); 86 if (bs[j] > 1) { 87 Mat T, Tt, B; 88 const PetscScalar *ptr; 89 PetscScalar *val, *Aa; 90 const PetscInt *Ai, *Aj; 91 PetscInt An, i, k; 92 PetscBool done; 93 94 PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 95 PetscCall(MatSetRandom(T, NULL)); 96 PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 97 PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 98 PetscCall(MatDestroy(&Tt)); 99 PetscCall(MatDenseGetArrayRead(T, &ptr)); 100 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 101 PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 102 PetscCall(MatSeqAIJGetArray(A, &Aa)); 103 PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 104 PetscCall(MatSetType(B, MATSEQBAIJ)); 105 PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 106 PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 107 for (i = 0; i < Ai[An]; ++i) 108 for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 109 PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 110 PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 111 PetscCall(PetscFree(val)); 112 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 113 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 114 PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 115 PetscCall(MatDestroy(&T)); 116 PetscCall(MatDestroy(&A)); 117 A = B; 118 } 119 /* reconvert back to SeqAIJ before converting to the desired type later */ 120 if (!convert) E = A; 121 PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 122 PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 123 for (PetscInt i = 0; i < ntype; ++i) { 124 char *tmp; 125 PetscInt *ia_ptr, *ja_ptr, k; 126 PetscScalar *a_ptr; 127 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 128 struct matrix_descr descr; 129 sparse_matrix_t spr; 130 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 131 descr.diag = SPARSE_DIAG_NON_UNIT; 132 #endif 133 if (convert) PetscCall(MatDestroy(&A)); 134 PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 135 if (tmp) { 136 size_t mlen, tlen; 137 char base[256]; 138 139 mkl = PETSC_TRUE; 140 PetscCall(PetscStrlen(tmp, &mlen)); 141 PetscCall(PetscStrlen(type[i], &tlen)); 142 PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1)); 143 PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 144 } else { 145 mkl = PETSC_FALSE; 146 PetscCall(PetscStrstr(type[i], "maij", &tmp)); 147 if (!tmp) { 148 PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 149 } else { 150 PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 151 maij = PETSC_TRUE; 152 } 153 } 154 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 155 if (mkl) { 156 const PetscInt *Ai, *Aj; 157 PetscInt An; 158 PetscBool done; 159 160 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 161 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 162 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 163 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 164 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 165 PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 166 PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 167 if (flg) { /* SeqAIJ */ 168 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 169 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 170 PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 171 PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 172 } else { 173 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 174 if (flg) { 175 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 176 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 177 PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 178 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 179 } else { 180 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 181 if (flg) { 182 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 183 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 184 PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 185 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 186 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 187 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 188 descr.mode = SPARSE_FILL_MODE_UPPER; 189 descr.diag = SPARSE_DIAG_NON_UNIT; 190 #endif 191 } 192 } 193 } 194 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 195 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 196 } 197 198 PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 199 200 for (k = 0; k < nN; ++k) { 201 MatType Atype, Ctype; 202 PetscInt AM, AN, CM, CN, t; 203 #if defined(PETSC_USE_LOG) 204 PetscLogStage stage, tstage; 205 char stage_s[256]; 206 #endif 207 208 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 209 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 210 PetscCall(MatSetRandom(C, NULL)); 211 if (cuda) { /* convert to GPU if needed */ 212 PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 213 PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 214 } 215 if (mkl) { 216 if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 217 else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 218 PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 219 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 220 } 221 PetscCall(MatGetType(A, &Atype)); 222 PetscCall(MatGetType(C, &Ctype)); 223 PetscCall(MatGetSize(A, &AM, &AN)); 224 PetscCall(MatGetSize(C, &CM, &CN)); 225 226 #if defined(PETSC_USE_LOG) 227 if (!maij || N[k] > 1) { 228 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 229 PetscCall(PetscLogStageRegister(stage_s, &stage)); 230 } 231 if (trans && N[k] > 1) { 232 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 233 PetscCall(PetscLogStageRegister(stage_s, &tstage)); 234 } 235 #endif 236 /* A*B */ 237 if (N[k] > 1) { 238 if (!maij) { 239 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 240 PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 241 PetscCall(MatProductSetFromOptions(D)); 242 PetscCall(MatProductSymbolic(D)); 243 } 244 245 if (!mkl) { 246 if (!maij) { 247 PetscCall(MatProductNumeric(D)); 248 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 249 PetscCall(PetscLogStagePush(stage)); 250 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 251 PetscCall(PetscLogStagePop()); 252 } else { 253 Mat E, Ct, Dt; 254 Vec cC, cD; 255 const PetscScalar *c_ptr; 256 PetscScalar *d_ptr; 257 PetscCall(MatCreateMAIJ(A, N[k], &E)); 258 PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 259 PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 260 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 261 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 262 PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 263 PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 264 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 265 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 266 PetscCall(MatMult(E, cC, cD)); 267 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 268 PetscCall(PetscLogStagePush(stage)); 269 for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD)); 270 PetscCall(PetscLogStagePop()); 271 PetscCall(VecDestroy(&cD)); 272 PetscCall(VecDestroy(&cC)); 273 PetscCall(MatDestroy(&E)); 274 PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 275 PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 276 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 277 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 278 } 279 } else { 280 const PetscScalar *c_ptr; 281 PetscScalar *d_ptr; 282 283 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 284 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 285 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 286 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 287 PetscCall(PetscLogStagePush(stage)); 288 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 289 PetscCall(PetscLogStagePop()); 290 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 291 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 292 } 293 } else if (maij) { 294 PetscCall(MatDestroy(&C)); 295 PetscCall(MatDestroy(&D)); 296 continue; 297 } else if (!mkl) { 298 Vec cC, cD; 299 300 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 301 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 302 PetscCall(MatMult(A, cC, cD)); 303 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 304 PetscCall(PetscLogStagePush(stage)); 305 for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD)); 306 PetscCall(PetscLogStagePop()); 307 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 308 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 309 } else { 310 const PetscScalar *c_ptr; 311 PetscScalar *d_ptr; 312 313 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 314 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 315 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 316 PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 317 PetscCall(PetscLogStagePush(stage)); 318 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 319 PetscCall(PetscLogStagePop()); 320 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 321 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 322 } 323 324 if (check) { 325 PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 326 if (!flg) { 327 MatType Dtype; 328 329 PetscCall(MatGetType(D, &Dtype)); 330 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 331 } 332 } 333 334 /* MKL implementation seems buggy for ABt */ 335 /* A*Bt */ 336 if (!mkl && trans && N[k] > 1) { 337 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 338 if (flg) { 339 PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 340 PetscCall(MatGetType(C, &Ctype)); 341 if (!mkl) { 342 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 343 PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 344 PetscCall(MatProductSetFromOptions(D)); 345 PetscCall(MatProductSymbolic(D)); 346 PetscCall(MatProductNumeric(D)); 347 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 348 PetscCall(PetscLogStagePush(tstage)); 349 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 350 PetscCall(PetscLogStagePop()); 351 } else { 352 const PetscScalar *c_ptr; 353 PetscScalar *d_ptr; 354 355 PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 356 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 357 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 358 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 359 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 360 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 361 PetscCall(PetscLogStagePush(stage)); 362 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 363 PetscCall(PetscLogStagePop()); 364 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 365 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 366 } 367 } 368 } 369 370 if (!mkl && trans && N[k] > 1 && flg && check) { 371 PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 372 if (!flg) { 373 MatType Dtype; 374 PetscCall(MatGetType(D, &Dtype)); 375 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 376 } 377 } 378 PetscCall(MatDestroy(&C)); 379 PetscCall(MatDestroy(&D)); 380 } 381 if (mkl) { 382 PetscCallMKLSparse(mkl_sparse_destroy, (spr)); 383 PetscCall(PetscFree(ia_ptr)); 384 PetscCall(PetscFree(ja_ptr)); 385 } 386 if (cuda && i != ntype - 1) { 387 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 388 break; 389 } 390 } 391 if (E != A) PetscCall(MatDestroy(&E)); 392 PetscCall(MatDestroy(&A)); 393 } 394 for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 395 PetscCall(PetscFinalize()); 396 return 0; 397 } 398 399 /*TEST 400 build: 401 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 402 403 testset: 404 nsize: 1 405 filter: sed "/Benchmarking/d" 406 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 407 test: 408 suffix: basic 409 args: -type aij,sbaij,baij 410 output_file: output/ex237.out 411 test: 412 suffix: maij 413 args: -type aij,maij 414 output_file: output/ex237.out 415 test: 416 suffix: cuda 417 requires: cuda 418 args: -type aij,aijcusparse 419 output_file: output/ex237.out 420 test: 421 suffix: mkl 422 requires: mkl_sparse_optimize 423 args: -type aij,aijmkl,baijmkl,sbaijmkl 424 output_file: output/ex237.out 425 426 TEST*/ 427