1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscCallMKLSparse(func, args) do { \ 15 sparse_status_t __ierr; \ 16 PetscStackPushExternal(#func); \ 17 __ierr = func args; \ 18 PetscStackPop; \ 19 PetscCheck(__ierr == SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20 } while (0) 21 #else 22 #define PetscCallMKLSparse(func, args) do { \ 23 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24 } while (0) 25 #endif 26 27 int main(int argc, char** argv) 28 { 29 Mat A, C, D, E; 30 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 31 PetscViewer viewer; 32 PetscInt bs[10], N[8]; 33 char *type[10]; 34 PetscMPIInt size; 35 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 36 char file[PETSC_MAX_PATH_LEN]; 37 38 PetscFunctionBeginUser; 39 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 40 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 41 PetscCheck(size == 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 42 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 43 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 44 PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 45 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 46 if (!flg) { 47 nbs = 1; 48 bs[0] = 1; 49 } 50 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 51 if (!flg) { 52 nN = 8; 53 N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 54 N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 55 } 56 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 57 if (!flg) { 58 ntype = 1; 59 PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 60 } 61 PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 62 PetscCall(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL)); 63 PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 64 for (PetscInt j = 0; j < nbs; ++j) { 65 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 66 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 67 PetscCall(MatSetFromOptions(A)); 68 PetscCall(MatLoad(A, viewer)); 69 PetscCall(PetscViewerDestroy(&viewer)); 70 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 71 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 72 PetscCall(MatGetSize(A, &m, &M)); 73 if (m == M) { 74 Mat oA; 75 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 76 PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 77 PetscCall(MatDestroy(&oA)); 78 } 79 PetscCall(MatGetLocalSize(A, &m, NULL)); 80 PetscCall(MatGetSize(A, &M, NULL)); 81 if (bs[j] > 1) { 82 Mat T, Tt, B; 83 const PetscScalar *ptr; 84 PetscScalar *val, *Aa; 85 const PetscInt *Ai, *Aj; 86 PetscInt An, i, k; 87 PetscBool done; 88 89 PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 90 PetscCall(MatSetRandom(T, NULL)); 91 PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 92 PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 93 PetscCall(MatDestroy(&Tt)); 94 PetscCall(MatDenseGetArrayRead(T, &ptr)); 95 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 96 PetscCheck(done && An == m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 97 PetscCall(MatSeqAIJGetArray(A, &Aa)); 98 PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 99 PetscCall(MatSetType(B, MATSEQBAIJ)); 100 PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 101 PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 102 for (i = 0; i < Ai[An]; ++i) 103 for (k = 0; k < bs[j] * bs[j]; ++k) 104 val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 105 PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 106 PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 107 PetscCall(PetscFree(val)); 108 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 109 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 110 PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 111 PetscCall(MatDestroy(&T)); 112 PetscCall(MatDestroy(&A)); 113 A = B; 114 } 115 /* reconvert back to SeqAIJ before converting to the desired type later */ 116 if (!convert) E = A; 117 PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 118 PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 119 for (PetscInt i = 0; i < ntype; ++i) { 120 char *tmp; 121 PetscInt *ia_ptr, *ja_ptr, k; 122 PetscScalar *a_ptr; 123 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 124 struct matrix_descr descr; 125 sparse_matrix_t spr; 126 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 127 descr.diag = SPARSE_DIAG_NON_UNIT; 128 #endif 129 if (convert) { 130 PetscCall(MatDestroy(&A)); 131 } 132 PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 133 if (tmp) { 134 size_t mlen, tlen; 135 char base[256]; 136 137 mkl = PETSC_TRUE; 138 PetscCall(PetscStrlen(tmp, &mlen)); 139 PetscCall(PetscStrlen(type[i], &tlen)); 140 PetscCall(PetscStrncpy(base, type[i], tlen-mlen + 1)); 141 PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 142 } else { 143 mkl = PETSC_FALSE; 144 PetscCall(PetscStrstr(type[i], "maij", &tmp)); 145 if (!tmp) { 146 PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 147 } else { 148 PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 149 maij = PETSC_TRUE; 150 } 151 } 152 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 153 if (mkl) { 154 const PetscInt *Ai, *Aj; 155 PetscInt An; 156 PetscBool done; 157 158 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 159 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 160 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 161 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 162 PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 163 PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 164 PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 165 if (flg) { /* SeqAIJ */ 166 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 167 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 168 PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 169 PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 170 } else { 171 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 172 if (flg) { 173 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 174 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 175 PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 176 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 177 } else { 178 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 179 if (flg) { 180 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 182 PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 183 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 184 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 185 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 186 descr.mode = SPARSE_FILL_MODE_UPPER; 187 descr.diag = SPARSE_DIAG_NON_UNIT; 188 #endif 189 } 190 } 191 } 192 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 193 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 194 } 195 196 PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 197 198 for (k = 0; k < nN; ++k) { 199 MatType Atype, Ctype; 200 PetscInt AM, AN, CM, CN, t; 201 #if defined(PETSC_USE_LOG) 202 PetscLogStage stage, tstage; 203 char stage_s[256]; 204 #endif 205 206 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 207 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 208 PetscCall(MatSetRandom(C, NULL)); 209 if (cuda) { /* convert to GPU if needed */ 210 PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 211 PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 212 } 213 if (mkl) { 214 if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 215 else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 216 PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 217 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 218 } 219 PetscCall(MatGetType(A, &Atype)); 220 PetscCall(MatGetType(C, &Ctype)); 221 PetscCall(MatGetSize(A, &AM, &AN)); 222 PetscCall(MatGetSize(C, &CM, &CN)); 223 224 #if defined(PETSC_USE_LOG) 225 if (!maij || N[k] > 1) { 226 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 227 PetscCall(PetscLogStageRegister(stage_s, &stage)); 228 } 229 if (trans && N[k] > 1) { 230 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 231 PetscCall(PetscLogStageRegister(stage_s, &tstage)); 232 } 233 #endif 234 /* A*B */ 235 if (N[k] > 1) { 236 if (!maij) { 237 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 238 PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 239 PetscCall(MatProductSetFromOptions(D)); 240 PetscCall(MatProductSymbolic(D)); 241 } 242 243 if (!mkl) { 244 if (!maij) { 245 PetscCall(MatProductNumeric(D)); 246 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 247 PetscCall(PetscLogStagePush(stage)); 248 for (t = 0; t < trial; ++t) { 249 PetscCall(MatProductNumeric(D)); 250 } 251 PetscCall(PetscLogStagePop()); 252 } else { 253 Mat E, Ct, Dt; 254 Vec cC, cD; 255 const PetscScalar *c_ptr; 256 PetscScalar *d_ptr; 257 PetscCall(MatCreateMAIJ(A, N[k], &E)); 258 PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 259 PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 260 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 261 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 262 PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 263 PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 264 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 265 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 266 PetscCall(MatMult(E, cC, cD)); 267 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 268 PetscCall(PetscLogStagePush(stage)); 269 for (t = 0; t < trial; ++t) { 270 PetscCall(MatMult(E, cC, cD)); 271 } 272 PetscCall(PetscLogStagePop()); 273 PetscCall(VecDestroy(&cD)); 274 PetscCall(VecDestroy(&cC)); 275 PetscCall(MatDestroy(&E)); 276 PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 277 PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 278 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 279 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 280 } 281 } else { 282 const PetscScalar *c_ptr; 283 PetscScalar *d_ptr; 284 285 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 286 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 287 PetscCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 288 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 289 PetscCall(PetscLogStagePush(stage)); 290 for (t = 0; t < trial; ++t) { 291 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 292 } 293 PetscCall(PetscLogStagePop()); 294 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 295 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 296 } 297 } else if (maij) { 298 PetscCall(MatDestroy(&C)); 299 PetscCall(MatDestroy(&D)); 300 continue; 301 } else if (!mkl) { 302 Vec cC, cD; 303 304 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 305 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 306 PetscCall(MatMult(A, cC, cD)); 307 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 308 PetscCall(PetscLogStagePush(stage)); 309 for (t = 0; t < trial; ++t) { 310 PetscCall(MatMult(A, cC, cD)); 311 } 312 PetscCall(PetscLogStagePop()); 313 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 314 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 315 } else { 316 const PetscScalar *c_ptr; 317 PetscScalar *d_ptr; 318 319 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 320 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 321 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 322 PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 323 PetscCall(PetscLogStagePush(stage)); 324 for (t = 0; t < trial; ++t) { 325 PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 326 } 327 PetscCall(PetscLogStagePop()); 328 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 329 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 330 } 331 332 if (check) { 333 PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 334 if (!flg) { 335 MatType Dtype; 336 337 PetscCall(MatGetType(D, &Dtype)); 338 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 339 } 340 } 341 342 /* MKL implementation seems buggy for ABt */ 343 /* A*Bt */ 344 if (!mkl && trans && N[k] > 1) { 345 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 346 if (flg) { 347 PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 348 PetscCall(MatGetType(C, &Ctype)); 349 if (!mkl) { 350 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 351 PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 352 PetscCall(MatProductSetFromOptions(D)); 353 PetscCall(MatProductSymbolic(D)); 354 PetscCall(MatProductNumeric(D)); 355 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 356 PetscCall(PetscLogStagePush(tstage)); 357 for (t = 0; t < trial; ++t) { 358 PetscCall(MatProductNumeric(D)); 359 } 360 PetscCall(PetscLogStagePop()); 361 } else { 362 const PetscScalar *c_ptr; 363 PetscScalar *d_ptr; 364 365 PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 366 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 367 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 368 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 369 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 370 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 371 PetscCall(PetscLogStagePush(stage)); 372 for (t = 0; t < trial; ++t) { 373 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 374 } 375 PetscCall(PetscLogStagePop()); 376 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 377 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 378 } 379 } 380 } 381 382 if (!mkl && trans && N[k] > 1 && flg && check) { 383 PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 384 if (!flg) { 385 MatType Dtype; 386 PetscCall(MatGetType(D, &Dtype)); 387 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 388 } 389 } 390 PetscCall(MatDestroy(&C)); 391 PetscCall(MatDestroy(&D)); 392 } 393 if (mkl) { 394 PetscCallMKLSparse(mkl_sparse_destroy, (spr)); 395 PetscCall(PetscFree(ia_ptr)); 396 PetscCall(PetscFree(ja_ptr)); 397 } 398 if (cuda && i != ntype - 1) { 399 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 400 break; 401 } 402 } 403 if (E != A) PetscCall(MatDestroy(&E)); 404 PetscCall(MatDestroy(&A)); 405 } 406 for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 407 PetscCall(PetscFinalize()); 408 return 0; 409 } 410 411 /*TEST 412 build: 413 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 414 415 testset: 416 nsize: 1 417 filter: sed "/Benchmarking/d" 418 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 419 test: 420 suffix: basic 421 args: -type aij,sbaij,baij 422 output_file: output/ex237.out 423 test: 424 suffix: maij 425 args: -type aij,maij 426 output_file: output/ex237.out 427 test: 428 suffix: cuda 429 requires: cuda 430 args: -type aij,aijcusparse 431 output_file: output/ex237.out 432 test: 433 suffix: mkl 434 requires: mkl_sparse_optimize 435 args: -type aij,aijmkl,baijmkl,sbaijmkl 436 output_file: output/ex237.out 437 438 TEST*/ 439