1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 13 #include <mkl.h> 14 #define PetscStackCallMKLSparse(func, args) do { \ 15 sparse_status_t __ierr; \ 16 PetscStackPush(#func); \ 17 __ierr = func args; \ 18 PetscStackPop; \ 19 if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20 } while (0) 21 #else 22 #define PetscStackCallMKLSparse(func, args) do { \ 23 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24 } while (0) 25 #endif 26 27 int main(int argc, char** argv) 28 { 29 Mat A, C, D, E; 30 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 31 PetscViewer viewer; 32 PetscInt bs[10], N[8]; 33 char *type[10]; 34 PetscMPIInt size; 35 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 36 char file[PETSC_MAX_PATH_LEN]; 37 PetscErrorCode ierr; 38 39 ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr; 40 if (ierr) return ierr; 41 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRMPI(ierr); 42 if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 43 ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr); 44 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 45 ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr); 46 ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr); 47 if (!flg) { 48 nbs = 1; 49 bs[0] = 1; 50 } 51 ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr); 52 if (!flg) { 53 nN = 8; 54 N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 55 N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 56 } 57 ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr); 58 if (!flg) { 59 ntype = 1; 60 ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr); 61 } 62 ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr); 63 ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr); 64 ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr); 65 for (PetscInt j = 0; j < nbs; ++j) { 66 ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr); 67 ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); 68 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 69 ierr = MatLoad(A, viewer);CHKERRQ(ierr); 70 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 71 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 72 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 73 ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr); 74 if (m == M) { 75 Mat oA; 76 ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr); 77 ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 78 ierr = MatDestroy(&oA);CHKERRQ(ierr); 79 } 80 ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr); 81 ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr); 82 if (bs[j] > 1) { 83 Mat T, Tt, B; 84 const PetscScalar *ptr; 85 PetscScalar *val, *Aa; 86 const PetscInt *Ai, *Aj; 87 PetscInt An, i, k; 88 PetscBool done; 89 90 ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr); 91 ierr = MatSetRandom(T, NULL);CHKERRQ(ierr); 92 ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr); 93 ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr); 94 ierr = MatDestroy(&Tt);CHKERRQ(ierr); 95 ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr); 96 ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 97 if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 98 ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr); 99 ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr); 100 ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr); 101 ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); 102 ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j], &val);CHKERRQ(ierr); 103 for (i = 0; i < Ai[An]; ++i) 104 for (k = 0; k < bs[j] * bs[j]; ++k) 105 val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 106 ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr); 107 ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr); 108 ierr = PetscFree(val);CHKERRQ(ierr); 109 ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr); 110 ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 111 ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr); 112 ierr = MatDestroy(&T);CHKERRQ(ierr); 113 ierr = MatDestroy(&A);CHKERRQ(ierr); 114 A = B; 115 } 116 /* reconvert back to SeqAIJ before converting to the desired type later */ 117 if (!convert) E = A; 118 ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr); 119 ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr); 120 for (PetscInt i = 0; i < ntype; ++i) { 121 char *tmp; 122 PetscInt *ia_ptr, *ja_ptr, k; 123 PetscScalar *a_ptr; 124 #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 125 struct matrix_descr descr; 126 sparse_matrix_t spr; 127 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 128 descr.diag = SPARSE_DIAG_NON_UNIT; 129 #endif 130 if (convert) { 131 ierr = MatDestroy(&A);CHKERRQ(ierr); 132 } 133 ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr); 134 if (tmp) { 135 size_t mlen, tlen; 136 char base[256]; 137 138 mkl = PETSC_TRUE; 139 ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr); 140 ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr); 141 ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr); 142 ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 143 } else { 144 mkl = PETSC_FALSE; 145 ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr); 146 if (!tmp) { 147 ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 148 } else { 149 ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 150 maij = PETSC_TRUE; 151 } 152 } 153 ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr); 154 if (mkl) { 155 const PetscInt *Ai, *Aj; 156 PetscInt An; 157 PetscBool done; 158 159 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr); 160 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 161 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 162 ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 163 if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 164 ierr = PetscMalloc1(An + 1, &ia_ptr);CHKERRQ(ierr); 165 ierr = PetscMalloc1(Ai[An], &ja_ptr);CHKERRQ(ierr); 166 if (flg) { /* SeqAIJ */ 167 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 168 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 169 ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 170 PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 171 } else { 172 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr); 173 if (flg) { 174 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 175 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 176 ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 177 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 178 } else { 179 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr); 180 if (flg) { 181 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 182 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 183 ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 184 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 185 #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 186 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 187 descr.mode = SPARSE_FILL_MODE_UPPER; 188 descr.diag = SPARSE_DIAG_NON_UNIT; 189 #endif 190 } 191 } 192 } 193 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 194 ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 195 } 196 197 ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr); 198 199 for (k = 0; k < nN; ++k) { 200 MatType Atype, Ctype; 201 PetscInt AM, AN, CM, CN, t; 202 #if defined(PETSC_USE_LOG) 203 PetscLogStage stage, tstage; 204 char stage_s[256]; 205 #endif 206 207 ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr); 208 ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr); 209 ierr = MatSetRandom(C, NULL);CHKERRQ(ierr); 210 if (cuda) { /* convert to GPU if needed */ 211 ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 212 ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr); 213 } 214 if (mkl) { 215 if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 216 else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 217 PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 218 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 219 } 220 ierr = MatGetType(A, &Atype);CHKERRQ(ierr); 221 ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 222 ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr); 223 ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr); 224 225 #if defined(PETSC_USE_LOG) 226 if (!maij || N[k] > 1) { 227 ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 228 ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr); 229 } 230 if (trans && N[k] > 1) { 231 ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 232 ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr); 233 } 234 #endif 235 /* A*B */ 236 if (N[k] > 1) { 237 if (!maij) { 238 ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 239 ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr); 240 ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 241 ierr = MatProductSymbolic(D);CHKERRQ(ierr); 242 } 243 244 if (!mkl) { 245 if (!maij) { 246 ierr = MatProductNumeric(D);CHKERRQ(ierr); 247 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 248 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 249 for (t = 0; t < trial; ++t) { 250 ierr = MatProductNumeric(D);CHKERRQ(ierr); 251 } 252 ierr = PetscLogStagePop();CHKERRQ(ierr); 253 } else { 254 Mat E, Ct, Dt; 255 Vec cC, cD; 256 const PetscScalar *c_ptr; 257 PetscScalar *d_ptr; 258 ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr); 259 ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr); 260 ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr); 261 ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 262 ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 263 ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 264 ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 265 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr); 266 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr); 267 ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 268 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);CHKERRQ(ierr); 269 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 270 for (t = 0; t < trial; ++t) { 271 ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 272 } 273 ierr = PetscLogStagePop();CHKERRQ(ierr); 274 ierr = VecDestroy(&cD);CHKERRQ(ierr); 275 ierr = VecDestroy(&cC);CHKERRQ(ierr); 276 ierr = MatDestroy(&E);CHKERRQ(ierr); 277 ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 278 ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 279 ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 280 ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 281 } 282 } else { 283 const PetscScalar *c_ptr; 284 PetscScalar *d_ptr; 285 286 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 287 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 288 PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 289 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 290 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 291 for (t = 0; t < trial; ++t) { 292 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 293 } 294 ierr = PetscLogStagePop();CHKERRQ(ierr); 295 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 296 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 297 } 298 } else if (maij) { 299 ierr = MatDestroy(&C);CHKERRQ(ierr); 300 ierr = MatDestroy(&D);CHKERRQ(ierr); 301 continue; 302 } else if (!mkl) { 303 Vec cC, cD; 304 305 ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 306 ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 307 ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 308 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); 309 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 310 for (t = 0; t < trial; ++t) { 311 ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 312 } 313 ierr = PetscLogStagePop();CHKERRQ(ierr); 314 ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 315 ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 316 } else { 317 const PetscScalar *c_ptr; 318 PetscScalar *d_ptr; 319 320 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 321 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 322 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); 323 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 324 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 325 for (t = 0; t < trial; ++t) { 326 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 327 } 328 ierr = PetscLogStagePop();CHKERRQ(ierr); 329 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 330 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 331 } 332 333 if (check) { 334 ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 335 if (!flg) { 336 MatType Dtype; 337 338 ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 339 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 340 } 341 } 342 343 /* MKL implementation seems buggy for ABt */ 344 /* A*Bt */ 345 if (!mkl && trans && N[k] > 1) { 346 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 347 if (flg) { 348 ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 349 ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 350 if (!mkl) { 351 ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 352 ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr); 353 ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 354 ierr = MatProductSymbolic(D);CHKERRQ(ierr); 355 ierr = MatProductNumeric(D);CHKERRQ(ierr); 356 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 357 ierr = PetscLogStagePush(tstage);CHKERRQ(ierr); 358 for (t = 0; t < trial; ++t) { 359 ierr = MatProductNumeric(D);CHKERRQ(ierr); 360 } 361 ierr = PetscLogStagePop();CHKERRQ(ierr); 362 } else { 363 const PetscScalar *c_ptr; 364 PetscScalar *d_ptr; 365 366 PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 367 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 368 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 369 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 370 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); 371 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 372 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 373 for (t = 0; t < trial; ++t) { 374 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 375 } 376 ierr = PetscLogStagePop();CHKERRQ(ierr); 377 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 378 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 379 } 380 } 381 } 382 383 if (!mkl && trans && N[k] > 1 && flg && check) { 384 ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 385 if (!flg) { 386 MatType Dtype; 387 ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 388 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 389 } 390 } 391 ierr = MatDestroy(&C);CHKERRQ(ierr); 392 ierr = MatDestroy(&D);CHKERRQ(ierr); 393 } 394 if (mkl) { 395 PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 396 ierr = PetscFree(ia_ptr);CHKERRQ(ierr); 397 ierr = PetscFree(ja_ptr);CHKERRQ(ierr); 398 } 399 if (cuda && i != ntype - 1) { 400 ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr); 401 break; 402 } 403 } 404 if (E != A) { 405 ierr = MatDestroy(&E);CHKERRQ(ierr); 406 } 407 ierr = MatDestroy(&A);CHKERRQ(ierr); 408 } 409 for (m = 0; m < ntype; ++m) { 410 ierr = PetscFree(type[m]);CHKERRQ(ierr); 411 } 412 ierr = PetscFinalize(); 413 return 0; 414 } 415 416 /*TEST 417 build: 418 requires: double !complex !defined(PETSC_USE_64BIT_INDICES) 419 420 testset: 421 nsize: 1 422 filter: sed "/Benchmarking/d" 423 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 424 test: 425 suffix: basic 426 args: -type aij,sbaij,baij 427 output_file: output/ex237.out 428 test: 429 suffix: maij 430 args: -type aij,maij 431 output_file: output/ex237.out 432 test: 433 suffix: cuda 434 requires: cuda 435 args: -type aij,aijcusparse 436 output_file: output/ex237.out 437 test: 438 suffix: mkl 439 requires: mkl 440 args: -type aij,aijmkl,baijmkl,sbaijmkl 441 output_file: output/ex237.out 442 443 TEST*/ 444