1 static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; 2 3 /* 4 See the paper below for more information 5 6 "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", 7 P. Jolivet, J. E. Roman, and S. Zampini (2020). 8 */ 9 10 #include <petsc.h> 11 12 #if defined(PETSC_HAVE_MKL) 13 #include <mkl.h> 14 #define PetscStackCallMKLSparse(func, args) do { \ 15 sparse_status_t __ierr; \ 16 PetscStackPush(#func); \ 17 __ierr = func args; \ 18 PetscStackPop; \ 19 if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ 20 } while (0) 21 #else 22 #define PetscStackCallMKLSparse(func, args) do { \ 23 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ 24 } while (0) 25 #endif 26 27 int main(int argc, char** argv) { 28 Mat A, C, D, E; 29 PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; 30 PetscViewer viewer; 31 PetscInt bs[10], N[8]; 32 char *type[10]; 33 PetscMPIInt size; 34 PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; 35 char file[PETSC_MAX_PATH_LEN]; 36 PetscErrorCode ierr; 37 38 ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr; 39 if (ierr) return ierr; 40 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 41 if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); 42 ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr); 43 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); 44 ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr); 45 ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr); 46 if (!flg) { 47 nbs = 1; 48 bs[0] = 1; 49 } 50 ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr); 51 if (!flg) { 52 nN = 8; 53 N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; 54 N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; 55 } 56 ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr); 57 if (!flg) { 58 ntype = 1; 59 ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr); 60 } 61 ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr); 62 ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr); 63 ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr); 64 for (PetscInt j = 0; j < nbs; ++j) { 65 ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr); 66 ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); 67 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 68 ierr = MatLoad(A, viewer);CHKERRQ(ierr); 69 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 70 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 71 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); 72 ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr); 73 if (m == M) { 74 Mat oA; 75 ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr); 76 ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 77 ierr = MatDestroy(&oA);CHKERRQ(ierr); 78 } 79 ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr); 80 ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr); 81 if (bs[j] > 1) { 82 Mat T, Tt, B; 83 const PetscScalar *ptr; 84 PetscScalar *val, *Aa; 85 const PetscInt *Ai, *Aj; 86 PetscInt An, i, k; 87 PetscBool done; 88 89 ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr); 90 ierr = MatSetRandom(T, NULL);CHKERRQ(ierr); 91 ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr); 92 ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr); 93 ierr = MatDestroy(&Tt);CHKERRQ(ierr); 94 ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr); 95 ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 96 if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 97 ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr); 98 ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr); 99 ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr); 100 ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); 101 ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j],&val);CHKERRQ(ierr); 102 for (i = 0; i < Ai[An]; ++i) 103 for (k = 0; k < bs[j] * bs[j]; ++k) 104 val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; 105 ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr); 106 ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr); 107 ierr = PetscFree(val);CHKERRQ(ierr); 108 ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr); 109 ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 110 ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr); 111 ierr = MatDestroy(&T);CHKERRQ(ierr); 112 ierr = MatDestroy(&A);CHKERRQ(ierr); 113 A = B; 114 } 115 /* reconvert back to SeqAIJ before converting to the desired type later */ 116 if (!convert) E = A; 117 ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr); 118 ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr); 119 for (PetscInt i = 0; i < ntype; ++i) { 120 char *tmp; 121 PetscInt *ia_ptr, *ja_ptr, k; 122 PetscScalar *a_ptr; 123 #if defined(PETSC_HAVE_MKL) 124 struct matrix_descr descr; 125 sparse_matrix_t spr; 126 descr.type = SPARSE_MATRIX_TYPE_GENERAL; 127 descr.diag = SPARSE_DIAG_NON_UNIT; 128 #endif 129 if (convert) { 130 ierr = MatDestroy(&A);CHKERRQ(ierr); 131 } 132 ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr); 133 if (tmp) { 134 size_t mlen, tlen; 135 char base[256]; 136 137 mkl = PETSC_TRUE; 138 ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr); 139 ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr); 140 ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr); 141 ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 142 } else { 143 mkl = PETSC_FALSE; 144 ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr); 145 if (!tmp) { 146 ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 147 } else { 148 ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); 149 maij = PETSC_TRUE; 150 } 151 } 152 ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr); 153 if (mkl) { 154 const PetscInt *Ai, *Aj; 155 PetscInt An; 156 PetscBool done; 157 158 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr); 159 if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); 160 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 161 ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 162 if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); 163 ierr = PetscMalloc1(An + 1,&ia_ptr);CHKERRQ(ierr); 164 ierr = PetscMalloc1(Ai[An],&ja_ptr);CHKERRQ(ierr); 165 if (flg) { /* SeqAIJ */ 166 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; 167 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; 168 ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 169 PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 170 } else { 171 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr); 172 if (flg) { 173 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 174 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 175 ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 176 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 177 } else { 178 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr); 179 if (flg) { 180 for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 181 for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ 182 ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); 183 PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); 184 #if defined(PETSC_HAVE_MKL) 185 descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 186 descr.mode = SPARSE_FILL_MODE_UPPER; 187 descr.diag = SPARSE_DIAG_NON_UNIT; 188 #endif 189 } 190 } 191 } 192 ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); 193 ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); 194 } 195 196 ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr); 197 198 for (k = 0; k < nN; ++k) { 199 MatType Atype, Ctype; 200 PetscInt AM, AN, CM, CN, t; 201 PetscLogStage stage, tstage; 202 char stage_s[256]; 203 204 ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr); 205 ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr); 206 ierr = MatSetRandom(C, NULL);CHKERRQ(ierr); 207 if (cuda) { /* convert to GPU if needed */ 208 ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 209 ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr); 210 } 211 if (mkl) { 212 if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); 213 else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); 214 PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); 215 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 216 } 217 ierr = MatGetType(A, &Atype);CHKERRQ(ierr); 218 ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 219 ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr); 220 ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr); 221 222 if (!maij || N[k] > 1) { 223 ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 224 ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr); 225 } 226 if (trans && N[k] > 1) { 227 ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); 228 ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr); 229 } 230 231 /* A*B */ 232 if (N[k] > 1) { 233 if (!maij) { 234 ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 235 ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr); 236 ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 237 ierr = MatProductSymbolic(D);CHKERRQ(ierr); 238 } 239 240 if (!mkl) { 241 if (!maij) { 242 ierr = MatProductNumeric(D);CHKERRQ(ierr); 243 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN); 244 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 245 for (t = 0; t < trial; ++t) { 246 ierr = MatProductNumeric(D);CHKERRQ(ierr); 247 } 248 ierr = PetscLogStagePop();CHKERRQ(ierr); 249 } else { 250 Mat E, Ct, Dt; 251 Vec cC, cD; 252 const PetscScalar *c_ptr; 253 PetscScalar *d_ptr; 254 ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr); 255 ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr); 256 ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr); 257 ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 258 ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 259 ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 260 ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 261 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr); 262 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr); 263 ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 264 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1); 265 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 266 for (t = 0; t < trial; ++t) { 267 ierr = MatMult(E, cC, cD);CHKERRQ(ierr); 268 } 269 ierr = PetscLogStagePop();CHKERRQ(ierr); 270 ierr = VecDestroy(&cD);CHKERRQ(ierr); 271 ierr = VecDestroy(&cC);CHKERRQ(ierr); 272 ierr = MatDestroy(&E);CHKERRQ(ierr); 273 ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); 274 ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr); 275 ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); 276 ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); 277 } 278 } else { 279 const PetscScalar *c_ptr; 280 PetscScalar *d_ptr; 281 282 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 283 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 284 PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 285 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN); 286 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 287 for (t = 0; t < trial; ++t) { 288 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 289 } 290 ierr = PetscLogStagePop();CHKERRQ(ierr); 291 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 292 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 293 } 294 } else if (maij) { 295 ierr = MatDestroy(&C);CHKERRQ(ierr); 296 ierr = MatDestroy(&D);CHKERRQ(ierr); 297 continue; 298 } else if (!mkl) { 299 Vec cC, cD; 300 301 ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 302 ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 303 ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 304 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN); 305 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 306 for (t = 0; t < trial; ++t) { 307 ierr = MatMult(A, cC, cD);CHKERRQ(ierr); 308 } 309 ierr = PetscLogStagePop();CHKERRQ(ierr); 310 ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr); 311 ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); 312 } else { 313 const PetscScalar *c_ptr; 314 PetscScalar *d_ptr; 315 316 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 317 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 318 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN); 319 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 320 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 321 for (t = 0; t < trial; ++t) { 322 PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); 323 } 324 ierr = PetscLogStagePop();CHKERRQ(ierr); 325 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 326 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 327 } 328 329 if (check) { 330 ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 331 if (!flg) { 332 MatType Dtype; 333 334 ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 335 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 336 } 337 } 338 339 /* MKL implementation seems buggy for ABt */ 340 /* A*Bt */ 341 if (!mkl && trans && N[k] > 1) { 342 ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); 343 if (flg) { 344 ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); 345 ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); 346 if (!mkl) { 347 ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); 348 ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr); 349 ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); 350 ierr = MatProductSymbolic(D);CHKERRQ(ierr); 351 ierr = MatProductNumeric(D);CHKERRQ(ierr); 352 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN); 353 ierr = PetscLogStagePush(tstage);CHKERRQ(ierr); 354 for (t = 0; t < trial; ++t) { 355 ierr = MatProductNumeric(D);CHKERRQ(ierr); 356 } 357 ierr = PetscLogStagePop();CHKERRQ(ierr); 358 } else { 359 const PetscScalar *c_ptr; 360 PetscScalar *d_ptr; 361 362 PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); 363 PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); 364 ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); 365 ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); 366 ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN); 367 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 368 ierr = PetscLogStagePush(stage);CHKERRQ(ierr); 369 for (t = 0; t < trial; ++t) { 370 PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); 371 } 372 ierr = PetscLogStagePop();CHKERRQ(ierr); 373 ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); 374 ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); 375 } 376 } 377 } 378 379 if (!mkl && trans && N[k] > 1 && flg && check) { 380 ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); 381 if (!flg) { 382 MatType Dtype; 383 ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); 384 ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); 385 } 386 } 387 ierr = MatDestroy(&C);CHKERRQ(ierr); 388 ierr = MatDestroy(&D);CHKERRQ(ierr); 389 } 390 if (mkl) { 391 PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); 392 ierr = PetscFree(ia_ptr);CHKERRQ(ierr); 393 ierr = PetscFree(ja_ptr);CHKERRQ(ierr); 394 } 395 if (cuda && i != ntype - 1) { 396 ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr); 397 break; 398 } 399 } 400 if (E != A) { 401 ierr = MatDestroy(&E);CHKERRQ(ierr); 402 } 403 ierr = MatDestroy(&A);CHKERRQ(ierr); 404 } 405 for (m = 0; m < ntype; ++m) { 406 ierr = PetscFree(type[m]);CHKERRQ(ierr); 407 } 408 ierr = PetscFinalize(); 409 return 0; 410 } 411 412 /*TEST 413 414 testset: 415 nsize: 1 416 requires: double !complex !define(PETSC_USE_64BIT_INDICES) 417 filter: sed "/Benchmarking/d" 418 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} 419 test: 420 suffix: basic 421 args: -type aij,sbaij,baij 422 output_file: output/ex237.out 423 test: 424 suffix: maij 425 args: -type aij,maij 426 output_file: output/ex237.out 427 test: 428 suffix: cuda 429 requires: cuda 430 args: -type aij,aijcusparse 431 output_file: output/ex237.out 432 test: 433 suffix: mkl 434 requires: mkl 435 args: -type aij,aijmkl,baijmkl,sbaijmkl 436 output_file: output/ex237.out 437 438 TEST*/ 439