1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscCallMKLSparse(func, args) \ 15 do { \ 16 sparse_status_t __ierr; \ 17 PetscStackPushExternal(#func); \ 18 __ierr = func args; \ 19 PetscStackPop; \ 20 PetscCheck(__ierr == SPARSE_STATUS_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 21 } while (0) 22 #else 23 #define PetscCallMKLSparse(func, args) \ 24 do { \ 25 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 26 } while (0) 27 #endif 28 29 int main(int argc, char **argv) 30 { 31 Mat A, C, D, E; 32 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 33 PetscViewer viewer; 34 PetscInt bs[10], N[8]; 35 char *type[10]; 36 PetscMPIInt size; 37 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, abt = PETSC_FALSE, atb = PETSC_FALSE, convert = PETSC_FALSE, mkl; 38 char file[PETSC_MAX_PATH_LEN]; 39 40 PetscFunctionBeginUser; 41 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 42 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 43 PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 44 PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg)); 45 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 46 PetscCall(PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL)); 47 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg)); 48 if (!flg) { 49 nbs = 1; 50 bs[0] = 1; 51 } 52 PetscCall(PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg)); 53 if (!flg) { 54 nN = 8; 55 N[0] = 1; 56 N[1] = 2; 57 N[2] = 4; 58 N[3] = 8; 59 N[4] = 16; 60 N[5] = 32; 61 N[6] = 64; 62 N[7] = 128; 63 } 64 PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg)); 65 if (!flg) { 66 ntype = 1; 67 PetscCall(PetscStrallocpy(MATSEQAIJ, &type[0])); 68 } 69 PetscCall(PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL)); 70 PetscCall(PetscOptionsGetBool(NULL, NULL, "-ABt", &abt, NULL)); 71 PetscCall(PetscOptionsGetBool(NULL, NULL, "-AtB", &atb, NULL)); 72 PetscCall(PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL)); 73 for (PetscInt j = 0; j < nbs; ++j) { 74 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer)); 75 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 76 PetscCall(MatSetFromOptions(A)); 77 PetscCall(MatLoad(A, viewer)); 78 PetscCall(PetscViewerDestroy(&viewer)); 79 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 80 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 81 PetscCall(MatGetSize(A, &m, &M)); 82 if (m == M) { 83 Mat oA; 84 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &oA)); 85 PetscCall(MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN)); 86 PetscCall(MatDestroy(&oA)); 87 } 88 PetscCall(MatGetLocalSize(A, &m, NULL)); 89 PetscCall(MatGetSize(A, &M, NULL)); 90 if (bs[j] > 1) { 91 Mat T, Tt, B; 92 const PetscScalar *ptr; 93 PetscScalar *val, *Aa; 94 const PetscInt *Ai, *Aj; 95 PetscInt An, i, k; 96 PetscBool done; 97 98 PetscCall(MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T)); 99 PetscCall(MatSetRandom(T, NULL)); 100 PetscCall(MatTranspose(T, MAT_INITIAL_MATRIX, &Tt)); 101 PetscCall(MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN)); 102 PetscCall(MatDestroy(&Tt)); 103 PetscCall(MatDenseGetArrayRead(T, &ptr)); 104 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 105 PetscCheck(done && An == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 106 PetscCall(MatSeqAIJGetArray(A, &Aa)); 107 PetscCall(MatCreate(PETSC_COMM_WORLD, &B)); 108 PetscCall(MatSetType(B, MATSEQBAIJ)); 109 PetscCall(MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE)); 110 PetscCall(PetscMalloc1(Ai[An] * bs[j] * bs[j], &val)); 111 for (i = 0; i < Ai[An]; ++i) 112 for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 113 PetscCall(MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE)); 114 PetscCall(MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val)); 115 PetscCall(PetscFree(val)); 116 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 117 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done)); 118 PetscCall(MatDenseRestoreArrayRead(T, &ptr)); 119 PetscCall(MatDestroy(&T)); 120 PetscCall(MatDestroy(&A)); 121 A = B; 122 } 123 /* reconvert back to SeqAIJ before converting to the desired type later */ 124 if (!convert) E = A; 125 PetscCall(MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E)); 126 PetscCall(MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE)); 127 for (PetscInt i = 0; i < ntype; ++i) { 128 char *tmp = NULL; 129 PetscInt *ia_ptr, *ja_ptr, k; 130 PetscScalar *a_ptr; 131 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 132 struct matrix_descr descr; 133 sparse_matrix_t spr; 134 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 135 descr.diag = SPARSE_DIAG_NON_UNIT; 136 #endif 137 if (convert) PetscCall(MatDestroy(&A)); 138 PetscCall(PetscStrstr(type[i], "mkl", &tmp)); 139 if (tmp) { 140 size_t mlen, tlen; 141 char base[256]; 142 143 mkl = PETSC_TRUE; 144 PetscCall(PetscStrlen(tmp, &mlen)); 145 PetscCall(PetscStrlen(type[i], &tlen)); 146 PetscCall(PetscStrncpy(base, type[i], tlen - mlen + 1)); 147 PetscCall(MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 148 } else { 149 mkl = PETSC_FALSE; 150 PetscCall(PetscStrstr(type[i], "maij", &tmp)); 151 if (!tmp) { 152 PetscCall(MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 153 } else { 154 PetscCall(MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A)); 155 maij = PETSC_TRUE; 156 } 157 } 158 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 159 if (mkl) { 160 const PetscInt *Ai, *Aj; 161 PetscInt An; 162 PetscBool done; 163 164 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "")); 165 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 166 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 167 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 168 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 169 PetscCall(PetscMalloc1(An + 1, &ia_ptr)); 170 PetscCall(PetscMalloc1(Ai[An], &ja_ptr)); 171 if (flg) { /* SeqAIJ */ 172 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 173 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 174 PetscCall(MatSeqAIJGetArray(A, &a_ptr)); 175 PetscCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 176 } else { 177 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg)); 178 if (flg) { 179 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 180 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181 PetscCall(MatSeqBAIJGetArray(A, &a_ptr)); 182 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 183 } else { 184 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg)); 185 if (flg) { 186 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 187 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 188 PetscCall(MatSeqSBAIJGetArray(A, &a_ptr)); 189 PetscCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 190 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 191 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 192 descr.mode = SPARSE_FILL_MODE_UPPER; 193 descr.diag = SPARSE_DIAG_NON_UNIT; 194 #endif 195 } 196 } 197 } 198 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 199 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done)); 200 } 201 202 PetscCall(MatViewFromOptions(A, NULL, "-A_view")); 203 204 for (k = 0; k < nN; ++k) { 205 MatType Atype, Ctype; 206 PetscInt AM, AN, CM, CN, t; 207 PetscBool set = PETSC_FALSE; 208 PetscLogStage ab_stage, abt_stage, atb_stage; 209 char stage_s[256]; 210 211 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C)); 212 PetscCall(MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D)); 213 PetscCall(MatSetRandom(C, NULL)); 214 if (cuda) { /* convert to GPU if needed */ 215 PetscCall(MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C)); 216 PetscCall(MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D)); 217 } 218 if (mkl) { 219 if (N[k] > 1) PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 220 else PetscCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 221 PetscCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 222 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 223 } 224 PetscCall(MatGetType(A, &Atype)); 225 PetscCall(MatGetType(C, &Ctype)); 226 PetscCall(MatGetSize(A, &AM, &AN)); 227 PetscCall(MatGetSize(C, &CM, &CN)); 228 229 if (!maij || N[k] > 1) { 230 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "ab_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 231 PetscCall(PetscLogStageRegister(stage_s, &ab_stage)); 232 } 233 if (abt && N[k] > 1) { 234 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "abt_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 235 PetscCall(PetscLogStageRegister(stage_s, &abt_stage)); 236 } 237 if (atb) { 238 PetscCall(PetscSNPrintf(stage_s, sizeof(stage_s), "atb_type_%s-bs_%" PetscInt_FMT "-N_%02d", type[i], bs[j], (int)N[k])); 239 PetscCall(PetscLogStageRegister(stage_s, &atb_stage)); 240 } 241 /* A*B */ 242 if (N[k] > 1) { 243 if (!maij) { 244 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 245 PetscCall(MatProductSetType(D, MATPRODUCT_AB)); 246 PetscCall(MatProductSetFromOptions(D)); 247 PetscCall(MatProductSymbolic(D)); 248 } 249 250 if (!mkl) { 251 if (!maij) { 252 PetscCall(MatProductNumeric(D)); 253 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN)); 254 PetscCall(PetscLogStagePush(ab_stage)); 255 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 256 PetscCall(PetscLogStagePop()); 257 } else { 258 Mat E, Ct, Dt; 259 Vec cC, cD; 260 const PetscScalar *c_ptr; 261 PetscScalar *d_ptr; 262 PetscCall(MatCreateMAIJ(A, N[k], &E)); 263 PetscCall(MatDenseGetLocalMatrix(C, &Ct)); 264 PetscCall(MatDenseGetLocalMatrix(D, &Dt)); 265 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 266 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 267 PetscCall(MatDenseGetArrayRead(Ct, &c_ptr)); 268 PetscCall(MatDenseGetArrayWrite(Dt, &d_ptr)); 269 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC)); 270 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD)); 271 PetscCall(MatMult(E, cC, cD)); 272 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1)); 273 PetscCall(PetscLogStagePush(ab_stage)); 274 for (t = 0; t < trial; ++t) PetscCall(MatMult(E, cC, cD)); 275 PetscCall(PetscLogStagePop()); 276 PetscCall(VecDestroy(&cD)); 277 PetscCall(VecDestroy(&cC)); 278 PetscCall(MatDestroy(&E)); 279 PetscCall(MatDenseRestoreArrayWrite(Dt, &d_ptr)); 280 PetscCall(MatDenseRestoreArrayRead(Ct, &c_ptr)); 281 PetscCall(MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct)); 282 PetscCall(MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt)); 283 } 284 } else { 285 const PetscScalar *c_ptr; 286 PetscScalar *d_ptr; 287 288 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 289 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 290 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 291 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 292 PetscCall(PetscLogStagePush(ab_stage)); 293 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 294 PetscCall(PetscLogStagePop()); 295 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 296 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 297 } 298 } else if (maij) { 299 PetscCall(MatDestroy(&C)); 300 PetscCall(MatDestroy(&D)); 301 continue; 302 } else if (!mkl) { 303 Vec cC, cD; 304 305 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 306 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 307 PetscCall(MatMult(A, cC, cD)); 308 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 309 PetscCall(PetscLogStagePush(ab_stage)); 310 for (t = 0; t < trial; ++t) PetscCall(MatMult(A, cC, cD)); 311 PetscCall(PetscLogStagePop()); 312 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 313 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 314 } else { 315 const PetscScalar *c_ptr; 316 PetscScalar *d_ptr; 317 318 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 319 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 320 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 321 PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 322 PetscCall(PetscLogStagePush(ab_stage)); 323 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 324 PetscCall(PetscLogStagePop()); 325 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 326 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 327 } 328 329 if (check) { 330 PetscCall(MatMatMultEqual(A, C, D, 10, &flg)); 331 if (!flg) { 332 MatType Dtype; 333 334 PetscCall(MatGetType(D, &Dtype)); 335 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 336 } 337 } 338 339 /* MKL implementation seems buggy for ABt */ 340 /* A*Bt */ 341 if (!mkl && abt && N[k] > 1) { 342 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "")); 343 if (flg) { 344 PetscCall(MatTranspose(C, MAT_INPLACE_MATRIX, &C)); 345 PetscCall(MatGetType(C, &Ctype)); 346 if (!mkl) { 347 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 348 PetscCall(MatProductSetType(D, MATPRODUCT_ABt)); 349 PetscCall(MatProductSetFromOptions(D)); 350 PetscCall(MatProductSymbolic(D)); 351 PetscCall(MatProductNumeric(D)); 352 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and Bt %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN)); 353 PetscCall(PetscLogStagePush(abt_stage)); 354 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 355 PetscCall(PetscLogStagePop()); 356 } else { 357 const PetscScalar *c_ptr; 358 PetscScalar *d_ptr; 359 360 PetscCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 361 PetscCallMKLSparse(mkl_sparse_optimize, (spr)); 362 PetscCall(MatDenseGetArrayRead(C, &c_ptr)); 363 PetscCall(MatDenseGetArrayWrite(D, &d_ptr)); 364 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN, Ctype, CM, CN)); 365 PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 366 PetscCall(PetscLogStagePush(ab_stage)); 367 for (t = 0; t < trial; ++t) PetscCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 368 PetscCall(PetscLogStagePop()); 369 PetscCall(MatDenseRestoreArrayWrite(D, &d_ptr)); 370 PetscCall(MatDenseRestoreArrayRead(C, &c_ptr)); 371 } 372 } 373 } 374 375 if (!mkl && abt && N[k] > 1 && flg && check) { 376 PetscCall(MatMatTransposeMultEqual(A, C, D, 10, &flg)); 377 if (!flg) { 378 MatType Dtype; 379 PetscCall(MatGetType(D, &Dtype)); 380 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 381 } 382 } 383 384 /* At*B */ 385 if (!mkl && atb) { 386 PetscCall(MatIsSymmetricKnown(A, &set, &flg)); 387 set = (PetscBool)(set && flg); 388 PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_FALSE)); 389 if (N[k] > 1) { 390 PetscCall(MatProductCreateWithMat(A, C, NULL, D)); 391 PetscCall(MatProductSetType(D, MATPRODUCT_AtB)); 392 PetscCall(MatProductSetFromOptions(D)); 393 PetscCall(MatProductSymbolic(D)); 394 PetscCall(MatProductNumeric(D)); 395 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with At %s %" PetscInt_FMT "x%" PetscInt_FMT " and B %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", MatProductTypes[MATPRODUCT_AtB], Atype, AM, AN, Ctype, CM, CN)); 396 PetscCall(PetscLogStagePush(atb_stage)); 397 for (t = 0; t < trial; ++t) PetscCall(MatProductNumeric(D)); 398 PetscCall(PetscLogStagePop()); 399 } else { 400 Vec cC, cD; 401 402 PetscCall(MatDenseGetColumnVecRead(C, 0, &cC)); 403 PetscCall(MatDenseGetColumnVecWrite(D, 0, &cD)); 404 PetscCall(MatMultTranspose(A, cC, cD)); 405 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMultTranspose: with A %s %" PetscInt_FMT "x%" PetscInt_FMT "\n", Atype, AM, AN)); 406 PetscCall(PetscLogStagePush(atb_stage)); 407 for (t = 0; t < trial; ++t) PetscCall(MatMultTranspose(A, cC, cD)); 408 PetscCall(PetscLogStagePop()); 409 PetscCall(MatDenseRestoreColumnVecRead(C, 0, &cC)); 410 PetscCall(MatDenseRestoreColumnVecWrite(D, 0, &cD)); 411 } 412 } 413 414 if (!mkl && atb && N[k] > 1 && check) { 415 PetscCall(MatTransposeMatMultEqual(A, C, D, 10, &flg)); 416 if (!flg) { 417 MatType Dtype; 418 PetscCall(MatGetType(D, &Dtype)); 419 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %" PetscInt_FMT "\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k])); 420 } 421 } 422 if (set) PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); 423 PetscCall(MatDestroy(&C)); 424 PetscCall(MatDestroy(&D)); 425 } 426 if (mkl) { 427 PetscCallMKLSparse(mkl_sparse_destroy, (spr)); 428 PetscCall(PetscFree(ia_ptr)); 429 PetscCall(PetscFree(ja_ptr)); 430 } 431 if (cuda && i != ntype - 1) { 432 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n")); 433 break; 434 } 435 } 436 if (E != A) PetscCall(MatDestroy(&E)); 437 PetscCall(MatDestroy(&A)); 438 } 439 for (m = 0; m < ntype; ++m) PetscCall(PetscFree(type[m])); 440 PetscCall(PetscFinalize()); 441 return 0; 442 } 443 444 /*TEST 445 build: 446 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 447 448 testset: 449 nsize: 1 450 filter: sed "/Benchmarking/d" 451 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -ABt -convert_aij {{false true}shared output} 452 output_file: output/empty.out 453 test: 454 suffix: basic 455 args: -type aij,sbaij,baij 456 test: 457 suffix: maij 458 args: -type aij,maij 459 test: 460 suffix: cuda 461 requires: cuda 462 args: -type aij,aijcusparse 463 test: 464 suffix: mkl 465 requires: mkl_sparse_optimize 466 args: -type aij,aijmkl,baijmkl,sbaijmkl 467 468 test: 469 nsize: 1 470 filter: sed "/Benchmarking/d" 471 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3,4,5,6 -N 1,8 -check -AtB -type baij 472 output_file: output/empty.out 473 474 TEST*/ 475