1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscStackCallMKLSparse(func, args) do { \ 15 sparse_status_t __ierr; \ 16 PetscStackPush(#func); \ 17 __ierr = func args; \ 18 PetscStackPop; \ 19 PetscCheckFalse(__ierr != SPARSE_STATUS_SUCCESS,PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20 } while (0) 21 #else 22 #define PetscStackCallMKLSparse(func, args) do { \ 23 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24 } while (0) 25 #endif 26 27 int main(int argc, char** argv) 28 { 29 Mat A, C, D, E; 30 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 31 PetscViewer viewer; 32 PetscInt bs[10], N[8]; 33 char *type[10]; 34 PetscMPIInt size; 35 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 36 char file[PETSC_MAX_PATH_LEN]; 37 PetscErrorCode ierr; 38 39 ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr; 40 if (ierr) return ierr; 41 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 42 PetscCheckFalse(size != 1,PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 43 CHKERRQ(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 44 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 45 CHKERRQ(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 46 CHKERRQ(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 47 if (!flg) { 48 nbs = 1; 49 bs[0] = 1; 50 } 51 CHKERRQ(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 52 if (!flg) { 53 nN = 8; 54 N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 55 N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 56 } 57 CHKERRQ(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 58 if (!flg) { 59 ntype = 1; 60 CHKERRQ(PetscStrallocpy(MATSEQAIJ, &type[0])); 61 } 62 CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 63 CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL)); 64 CHKERRQ(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 65 for (PetscInt j = 0; j < nbs; ++j) { 66 CHKERRQ(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 67 CHKERRQ(MatCreate(PETSC_COMM_WORLD, &A)); 68 CHKERRQ(MatSetFromOptions(A)); 69 CHKERRQ(MatLoad(A, viewer)); 70 CHKERRQ(PetscViewerDestroy(&viewer)); 71 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 72 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 73 CHKERRQ(MatGetSize(A, &m, &M)); 74 if (m == M) { 75 Mat oA; 76 CHKERRQ(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 77 CHKERRQ(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 78 CHKERRQ(MatDestroy(&oA)); 79 } 80 CHKERRQ(MatGetLocalSize(A, &m, NULL)); 81 CHKERRQ(MatGetSize(A, &M, NULL)); 82 if (bs[j] > 1) { 83 Mat T, Tt, B; 84 const PetscScalar *ptr; 85 PetscScalar *val, *Aa; 86 const PetscInt *Ai, *Aj; 87 PetscInt An, i, k; 88 PetscBool done; 89 90 CHKERRQ(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 91 CHKERRQ(MatSetRandom(T, NULL)); 92 CHKERRQ(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 93 CHKERRQ(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 94 CHKERRQ(MatDestroy(&Tt)); 95 CHKERRQ(MatDenseGetArrayRead(T, &ptr)); 96 CHKERRQ(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 97 PetscCheckFalse(!done || An != m,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 98 CHKERRQ(MatSeqAIJGetArray(A, &Aa)); 99 CHKERRQ(MatCreate(PETSC_COMM_WORLD, &B)); 100 CHKERRQ(MatSetType(B, MATSEQBAIJ)); 101 CHKERRQ(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 102 CHKERRQ(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 103 for (i = 0; i < Ai[An]; ++i) 104 for (k = 0; k < bs[j] * bs[j]; ++k) 105 val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 106 CHKERRQ(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 107 CHKERRQ(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 108 CHKERRQ(PetscFree(val)); 109 CHKERRQ(MatSeqAIJRestoreArray(A, &Aa)); 110 CHKERRQ(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 111 CHKERRQ(MatDenseRestoreArrayRead(T, &ptr)); 112 CHKERRQ(MatDestroy(&T)); 113 CHKERRQ(MatDestroy(&A)); 114 A = B; 115 } 116 /* reconvert back to SeqAIJ before converting to the desired type later */ 117 if (!convert) E = A; 118 CHKERRQ(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 119 CHKERRQ(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 120 for (PetscInt i = 0; i < ntype; ++i) { 121 char *tmp; 122 PetscInt *ia_ptr, *ja_ptr, k; 123 PetscScalar *a_ptr; 124 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 125 struct matrix_descr descr; 126 sparse_matrix_t spr; 127 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 128 descr.diag = SPARSE_DIAG_NON_UNIT; 129 #endif 130 if (convert) { 131 CHKERRQ(MatDestroy(&A)); 132 } 133 CHKERRQ(PetscStrstr(type[i], "mkl", &tmp)); 134 if (tmp) { 135 size_t mlen, tlen; 136 char base[256]; 137 138 mkl = PETSC_TRUE; 139 CHKERRQ(PetscStrlen(tmp, &mlen)); 140 CHKERRQ(PetscStrlen(type[i], &tlen)); 141 CHKERRQ(PetscStrncpy(base, type[i], tlen-mlen + 1)); 142 CHKERRQ(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 143 } else { 144 mkl = PETSC_FALSE; 145 CHKERRQ(PetscStrstr(type[i], "maij", &tmp)); 146 if (!tmp) { 147 CHKERRQ(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 148 } else { 149 CHKERRQ(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 150 maij = PETSC_TRUE; 151 } 152 } 153 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 154 if (mkl) { 155 const PetscInt *Ai, *Aj; 156 PetscInt An; 157 PetscBool done; 158 159 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 160 PetscCheck(flg,PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 161 CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 162 CHKERRQ(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 163 PetscCheck(done,PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 164 CHKERRQ(PetscMalloc1(An + 1, &ia_ptr)); 165 CHKERRQ(PetscMalloc1(Ai[An], &ja_ptr)); 166 if (flg) { /* SeqAIJ */ 167 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 168 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 169 CHKERRQ(MatSeqAIJGetArray(A, &a_ptr)); 170 PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 171 } else { 172 CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 173 if (flg) { 174 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 175 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 176 CHKERRQ(MatSeqBAIJGetArray(A, &a_ptr)); 177 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 178 } else { 179 CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 180 if (flg) { 181 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 182 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 183 CHKERRQ(MatSeqSBAIJGetArray(A, &a_ptr)); 184 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 185 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 186 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 187 descr.mode = SPARSE_FILL_MODE_UPPER; 188 descr.diag = SPARSE_DIAG_NON_UNIT; 189 #endif 190 } 191 } 192 } 193 CHKERRQ(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 194 CHKERRQ(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 195 } 196 197 CHKERRQ(MatViewFromOptions(A, NULL, "-A_view")); 198 199 for (k = 0; k < nN; ++k) { 200 MatType Atype, Ctype; 201 PetscInt AM, AN, CM, CN, t; 202 #if defined(PETSC_USE_LOG) 203 PetscLogStage stage, tstage; 204 char stage_s[256]; 205 #endif 206 207 CHKERRQ(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 208 CHKERRQ(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 209 CHKERRQ(MatSetRandom(C, NULL)); 210 if (cuda) { /* convert to GPU if needed */ 211 CHKERRQ(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 212 CHKERRQ(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 213 } 214 if (mkl) { 215 if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 216 else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 217 PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 218 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 219 } 220 CHKERRQ(MatGetType(A, &Atype)); 221 CHKERRQ(MatGetType(C, &Ctype)); 222 CHKERRQ(MatGetSize(A, &AM, &AN)); 223 CHKERRQ(MatGetSize(C, &CM, &CN)); 224 225 #if defined(PETSC_USE_LOG) 226 if (!maij || N[k] > 1) { 227 CHKERRQ(PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 228 CHKERRQ(PetscLogStageRegister(stage_s, &stage)); 229 } 230 if (trans && N[k] > 1) { 231 CHKERRQ(PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 232 CHKERRQ(PetscLogStageRegister(stage_s, &tstage)); 233 } 234 #endif 235 /* A*B */ 236 if (N[k] > 1) { 237 if (!maij) { 238 CHKERRQ(MatProductCreateWithMat(A, C, NULL, D)); 239 CHKERRQ(MatProductSetType(D, MATPRODUCT_AB)); 240 CHKERRQ(MatProductSetFromOptions(D)); 241 CHKERRQ(MatProductSymbolic(D)); 242 } 243 244 if (!mkl) { 245 if (!maij) { 246 CHKERRQ(MatProductNumeric(D)); 247 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 248 CHKERRQ(PetscLogStagePush(stage)); 249 for (t = 0; t < trial; ++t) { 250 CHKERRQ(MatProductNumeric(D)); 251 } 252 CHKERRQ(PetscLogStagePop()); 253 } else { 254 Mat E, Ct, Dt; 255 Vec cC, cD; 256 const PetscScalar *c_ptr; 257 PetscScalar *d_ptr; 258 CHKERRQ(MatCreateMAIJ(A, N[k], &E)); 259 CHKERRQ(MatDenseGetLocalMatrix(C, &Ct)); 260 CHKERRQ(MatDenseGetLocalMatrix(D, &Dt)); 261 CHKERRQ(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 262 CHKERRQ(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 263 CHKERRQ(MatDenseGetArrayRead(Ct, &c_ptr)); 264 CHKERRQ(MatDenseGetArrayWrite(Dt, &d_ptr)); 265 CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 266 CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 267 CHKERRQ(MatMult(E, cC, cD)); 268 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 269 CHKERRQ(PetscLogStagePush(stage)); 270 for (t = 0; t < trial; ++t) { 271 CHKERRQ(MatMult(E, cC, cD)); 272 } 273 CHKERRQ(PetscLogStagePop()); 274 CHKERRQ(VecDestroy(&cD)); 275 CHKERRQ(VecDestroy(&cC)); 276 CHKERRQ(MatDestroy(&E)); 277 CHKERRQ(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 278 CHKERRQ(MatDenseRestoreArrayRead(Ct, &c_ptr)); 279 CHKERRQ(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 280 CHKERRQ(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 281 } 282 } else { 283 const PetscScalar *c_ptr; 284 PetscScalar *d_ptr; 285 286 CHKERRQ(MatDenseGetArrayRead(C, &c_ptr)); 287 CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr)); 288 PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 289 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 290 CHKERRQ(PetscLogStagePush(stage)); 291 for (t = 0; t < trial; ++t) { 292 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 293 } 294 CHKERRQ(PetscLogStagePop()); 295 CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr)); 296 CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr)); 297 } 298 } else if (maij) { 299 CHKERRQ(MatDestroy(&C)); 300 CHKERRQ(MatDestroy(&D)); 301 continue; 302 } else if (!mkl) { 303 Vec cC, cD; 304 305 CHKERRQ(MatDenseGetColumnVecRead(C, 0, &cC)); 306 CHKERRQ(MatDenseGetColumnVecWrite(D, 0, &cD)); 307 CHKERRQ(MatMult(A, cC, cD)); 308 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 309 CHKERRQ(PetscLogStagePush(stage)); 310 for (t = 0; t < trial; ++t) { 311 CHKERRQ(MatMult(A, cC, cD)); 312 } 313 CHKERRQ(PetscLogStagePop()); 314 CHKERRQ(MatDenseRestoreColumnVecRead(C, 0, &cC)); 315 CHKERRQ(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 316 } else { 317 const PetscScalar *c_ptr; 318 PetscScalar *d_ptr; 319 320 CHKERRQ(MatDenseGetArrayRead(C, &c_ptr)); 321 CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr)); 322 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 323 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 324 CHKERRQ(PetscLogStagePush(stage)); 325 for (t = 0; t < trial; ++t) { 326 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 327 } 328 CHKERRQ(PetscLogStagePop()); 329 CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr)); 330 CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr)); 331 } 332 333 if (check) { 334 CHKERRQ(MatMatMultEqual(A, C, D, 10, &flg)); 335 if (!flg) { 336 MatType Dtype; 337 338 CHKERRQ(MatGetType(D, &Dtype)); 339 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 340 } 341 } 342 343 /* MKL implementation seems buggy for ABt */ 344 /* A*Bt */ 345 if (!mkl && trans && N[k] > 1) { 346 CHKERRQ(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 347 if (flg) { 348 CHKERRQ(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 349 CHKERRQ(MatGetType(C, &Ctype)); 350 if (!mkl) { 351 CHKERRQ(MatProductCreateWithMat(A, C, NULL, D)); 352 CHKERRQ(MatProductSetType(D, MATPRODUCT_ABt)); 353 CHKERRQ(MatProductSetFromOptions(D)); 354 CHKERRQ(MatProductSymbolic(D)); 355 CHKERRQ(MatProductNumeric(D)); 356 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 357 CHKERRQ(PetscLogStagePush(tstage)); 358 for (t = 0; t < trial; ++t) { 359 CHKERRQ(MatProductNumeric(D)); 360 } 361 CHKERRQ(PetscLogStagePop()); 362 } else { 363 const PetscScalar *c_ptr; 364 PetscScalar *d_ptr; 365 366 PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 367 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 368 CHKERRQ(MatDenseGetArrayRead(C, &c_ptr)); 369 CHKERRQ(MatDenseGetArrayWrite(D, &d_ptr)); 370 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 371 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 372 CHKERRQ(PetscLogStagePush(stage)); 373 for (t = 0; t < trial; ++t) { 374 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 375 } 376 CHKERRQ(PetscLogStagePop()); 377 CHKERRQ(MatDenseRestoreArrayWrite(D, &d_ptr)); 378 CHKERRQ(MatDenseRestoreArrayRead(C, &c_ptr)); 379 } 380 } 381 } 382 383 if (!mkl && trans && N[k] > 1 && flg && check) { 384 CHKERRQ(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 385 if (!flg) { 386 MatType Dtype; 387 CHKERRQ(MatGetType(D, &Dtype)); 388 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 389 } 390 } 391 CHKERRQ(MatDestroy(&C)); 392 CHKERRQ(MatDestroy(&D)); 393 } 394 if (mkl) { 395 PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 396 CHKERRQ(PetscFree(ia_ptr)); 397 CHKERRQ(PetscFree(ja_ptr)); 398 } 399 if (cuda && i != ntype - 1) { 400 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 401 break; 402 } 403 } 404 if (E != A) { 405 CHKERRQ(MatDestroy(&E)); 406 } 407 CHKERRQ(MatDestroy(&A)); 408 } 409 for (m = 0; m < ntype; ++m) { 410 CHKERRQ(PetscFree(type[m])); 411 } 412 ierr = PetscFinalize(); 413 return 0; 414 } 415 416 /*TEST 417 build: 418 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 419 420 testset: 421 nsize: 1 422 filter: sed "/Benchmarking/d" 423 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 424 test: 425 suffix: basic 426 args: -type aij,sbaij,baij 427 output_file: output/ex237.out 428 test: 429 suffix: maij 430 args: -type aij,maij 431 output_file: output/ex237.out 432 test: 433 suffix: cuda 434 requires: cuda 435 args: -type aij,aijcusparse 436 output_file: output/ex237.out 437 test: 438 suffix: mkl 439 requires: mkl_sparse_optimize 440 args: -type aij,aijmkl,baijmkl,sbaijmkl 441 output_file: output/ex237.out 442 443 TEST*/ 444