static char help[] = "Mini-app to benchmark matrix--matrix multiplication\n\n"; /* See the paper below for more information "KSPHPDDM and PCHPDDM: Extending PETSc with Robust Overlapping Schwarz Preconditioners and Advanced Krylov Methods", P. Jolivet, J. E. Roman, and S. Zampini (2020). */ #include #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) #include #define PetscStackCallMKLSparse(func, args) do { \ sparse_status_t __ierr; \ PetscStackPush(#func); \ __ierr = func args; \ PetscStackPop; \ if (__ierr != SPARSE_STATUS_SUCCESS) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in %s(): error code %d", #func, (int)__ierr); \ } while (0) #else #define PetscStackCallMKLSparse(func, args) do { \ SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No MKL support"); \ } while (0) #endif int main(int argc, char** argv) { Mat A, C, D, E; PetscInt nbs = 10, ntype = 10, nN = 8, m, M, trial = 5; PetscViewer viewer; PetscInt bs[10], N[8]; char *type[10]; PetscMPIInt size; PetscBool flg, cuda, maij = PETSC_FALSE, check = PETSC_FALSE, trans = PETSC_FALSE, convert = PETSC_FALSE, mkl; char file[PETSC_MAX_PATH_LEN]; PetscErrorCode ierr; ierr = PetscInitialize(&argc, &argv, NULL, help);if (ierr) return ierr; if (ierr) return ierr; ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRMPI(ierr); if (size != 1) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only"); ierr = PetscOptionsGetString(NULL, NULL, "-f", file, PETSC_MAX_PATH_LEN, &flg);CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f option"); ierr = PetscOptionsGetInt(NULL, NULL, "-trial", &trial, NULL);CHKERRQ(ierr); ierr = PetscOptionsGetIntArray(NULL, NULL, "-bs", bs, &nbs, &flg);CHKERRQ(ierr); if (!flg) { nbs = 1; bs[0] = 1; } ierr = PetscOptionsGetIntArray(NULL, NULL, "-N", N, &nN, &flg);CHKERRQ(ierr); if (!flg) { nN = 8; N[0] = 1; N[1] = 2; N[2] = 4; N[3] = 8; N[4] = 16; N[5] = 32; N[6] = 64; N[7] = 128; } ierr = PetscOptionsGetStringArray(NULL, NULL, "-type", type, &ntype, &flg);CHKERRQ(ierr); if (!flg) { ntype = 1; ierr = PetscStrallocpy(MATSEQAIJ, &type[0]);CHKERRQ(ierr); } ierr = PetscOptionsGetBool(NULL, NULL, "-check", &check, NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL, NULL, "-trans", &trans, NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL, NULL, "-convert_aij", &convert, NULL);CHKERRQ(ierr); for (PetscInt j = 0; j < nbs; ++j) { ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatLoad(A, viewer);CHKERRQ(ierr); ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate a MatAIJ input matrix"); ierr = MatGetSize(A, &m, &M);CHKERRQ(ierr); if (m == M) { Mat oA; ierr = MatTranspose(A, MAT_INITIAL_MATRIX, &oA);CHKERRQ(ierr); ierr = MatAXPY(A, 1.0, oA, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatDestroy(&oA);CHKERRQ(ierr); } ierr = MatGetLocalSize(A, &m, NULL);CHKERRQ(ierr); ierr = MatGetSize(A, &M, NULL);CHKERRQ(ierr); if (bs[j] > 1) { Mat T, Tt, B; const PetscScalar *ptr; PetscScalar *val, *Aa; const PetscInt *Ai, *Aj; PetscInt An, i, k; PetscBool done; ierr = MatCreateDense(PETSC_COMM_SELF, bs[j], bs[j], bs[j], bs[j], NULL, &T);CHKERRQ(ierr); ierr = MatSetRandom(T, NULL);CHKERRQ(ierr); ierr = MatTranspose(T, MAT_INITIAL_MATRIX, &Tt);CHKERRQ(ierr); ierr = MatAXPY(T, 1.0, Tt, SAME_NONZERO_PATTERN);CHKERRQ(ierr); ierr = MatDestroy(&Tt);CHKERRQ(ierr); ierr = MatDenseGetArrayRead(T, &ptr);CHKERRQ(ierr); ierr = MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); if (!done || An != m) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); ierr = MatSeqAIJGetArray(A, &Aa);CHKERRQ(ierr); ierr = MatCreate(PETSC_COMM_WORLD, &B);CHKERRQ(ierr); ierr = MatSetType(B, MATSEQBAIJ);CHKERRQ(ierr); ierr = MatSetSizes(B, bs[j] * An, bs[j] * An, PETSC_DECIDE, PETSC_DECIDE);CHKERRQ(ierr); ierr = PetscMalloc1(Ai[An] * bs[j] * bs[j], &val);CHKERRQ(ierr); for (i = 0; i < Ai[An]; ++i) for (k = 0; k < bs[j] * bs[j]; ++k) val[i * bs[j] * bs[j] + k] = Aa[i] * ptr[k]; ierr = MatSetOption(B, MAT_ROW_ORIENTED, PETSC_FALSE);CHKERRQ(ierr); ierr = MatSeqBAIJSetPreallocationCSR(B, bs[j], Ai, Aj, val);CHKERRQ(ierr); ierr = PetscFree(val);CHKERRQ(ierr); ierr = MatSeqAIJRestoreArray(A, &Aa);CHKERRQ(ierr); ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(T, &ptr);CHKERRQ(ierr); ierr = MatDestroy(&T);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); A = B; } /* reconvert back to SeqAIJ before converting to the desired type later */ if (!convert) E = A; ierr = MatConvert(A, MATSEQAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &E);CHKERRQ(ierr); ierr = MatSetOption(E, MAT_SYMMETRIC, PETSC_TRUE);CHKERRQ(ierr); for (PetscInt i = 0; i < ntype; ++i) { char *tmp; PetscInt *ia_ptr, *ja_ptr, k; PetscScalar *a_ptr; #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) struct matrix_descr descr; sparse_matrix_t spr; descr.type = SPARSE_MATRIX_TYPE_GENERAL; descr.diag = SPARSE_DIAG_NON_UNIT; #endif if (convert) { ierr = MatDestroy(&A);CHKERRQ(ierr); } ierr = PetscStrstr(type[i], "mkl", &tmp);CHKERRQ(ierr); if (tmp) { size_t mlen, tlen; char base[256]; mkl = PETSC_TRUE; ierr = PetscStrlen(tmp, &mlen);CHKERRQ(ierr); ierr = PetscStrlen(type[i], &tlen);CHKERRQ(ierr); ierr = PetscStrncpy(base, type[i], tlen-mlen + 1);CHKERRQ(ierr); ierr = MatConvert(E, base, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); } else { mkl = PETSC_FALSE; ierr = PetscStrstr(type[i], "maij", &tmp);CHKERRQ(ierr); if (!tmp) { ierr = MatConvert(E, type[i], convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); } else { ierr = MatConvert(E, MATAIJ, convert ? MAT_INITIAL_MATRIX : MAT_INPLACE_MATRIX, &A);CHKERRQ(ierr); maij = PETSC_TRUE; } } ierr = PetscObjectTypeCompareAny((PetscObject)A, &cuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "");CHKERRQ(ierr); if (mkl) { const PetscInt *Ai, *Aj; PetscInt An; PetscBool done; ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATSEQBAIJ, MATSEQSBAIJ, "");CHKERRQ(ierr); if (!flg) SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Not implemented"); ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); ierr = MatGetRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); if (!done) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Inconsistent sizes"); ierr = PetscMalloc1(An + 1, &ia_ptr);CHKERRQ(ierr); ierr = PetscMalloc1(Ai[An], &ja_ptr);CHKERRQ(ierr); if (flg) { /* SeqAIJ */ for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k]; for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k]; ierr = MatSeqAIJGetArray(A, &a_ptr);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_create_csr, (&spr, SPARSE_INDEX_BASE_ZERO, An, An, ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); } else { ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &flg);CHKERRQ(ierr); if (flg) { for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ ierr = MatSeqBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); } else { ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &flg);CHKERRQ(ierr); if (flg) { for (k = 0; k < An + 1; ++k) ia_ptr[k] = Ai[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ for (k = 0; k < Ai[An]; ++k) ja_ptr[k] = Aj[k] + 1; /* Fortran indexing to maximize cases covered by _mm routines */ ierr = MatSeqSBAIJGetArray(A, &a_ptr);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_create_bsr, (&spr, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, An, An, bs[j], ia_ptr, ia_ptr + 1, ja_ptr, a_ptr)); #if defined(PETSC_HAVE_MKL) && defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) descr.type = SPARSE_MATRIX_TYPE_SYMMETRIC; descr.mode = SPARSE_FILL_MODE_UPPER; descr.diag = SPARSE_DIAG_NON_UNIT; #endif } } } ierr = PetscObjectTypeCompare((PetscObject)A, MATSEQAIJ, &flg);CHKERRQ(ierr); ierr = MatRestoreRowIJ(A, 0, PETSC_FALSE, flg ? PETSC_FALSE : PETSC_TRUE, &An, &Ai, &Aj, &done);CHKERRQ(ierr); } ierr = MatViewFromOptions(A, NULL, "-A_view");CHKERRQ(ierr); for (k = 0; k < nN; ++k) { MatType Atype, Ctype; PetscInt AM, AN, CM, CN, t; #if defined(PETSC_USE_LOG) PetscLogStage stage, tstage; char stage_s[256]; #endif ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &C);CHKERRQ(ierr); ierr = MatCreateDense(PETSC_COMM_WORLD, bs[j] * m, PETSC_DECIDE, bs[j] * M, N[k], NULL, &D);CHKERRQ(ierr); ierr = MatSetRandom(C, NULL);CHKERRQ(ierr); if (cuda) { /* convert to GPU if needed */ ierr = MatConvert(C, MATDENSECUDA, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); ierr = MatConvert(D, MATDENSECUDA, MAT_INPLACE_MATRIX, &D);CHKERRQ(ierr); } if (mkl) { if (N[k] > 1) PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_COLUMN_MAJOR, N[k], 1 + trial)); else PetscStackCallMKLSparse(mkl_sparse_set_mv_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, 1 + trial)); PetscStackCallMKLSparse(mkl_sparse_set_memory_hint, (spr, SPARSE_MEMORY_AGGRESSIVE)); PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); } ierr = MatGetType(A, &Atype);CHKERRQ(ierr); ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); ierr = MatGetSize(A, &AM, &AN);CHKERRQ(ierr); ierr = MatGetSize(C, &CM, &CN);CHKERRQ(ierr); #if defined(PETSC_USE_LOG) if (!maij || N[k] > 1) { ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); ierr = PetscLogStageRegister(stage_s, &stage);CHKERRQ(ierr); } if (trans && N[k] > 1) { ierr = PetscSNPrintf(stage_s, sizeof(stage_s), "trans_type_%s-bs_%D-N_%02d", type[i], bs[j], (int)N[k]);CHKERRQ(ierr); ierr = PetscLogStageRegister(stage_s, &tstage);CHKERRQ(ierr); } #endif /* A*B */ if (N[k] > 1) { if (!maij) { ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); ierr = MatProductSetType(D, MATPRODUCT_AB);CHKERRQ(ierr); ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); ierr = MatProductSymbolic(D);CHKERRQ(ierr); } if (!mkl) { if (!maij) { ierr = MatProductNumeric(D);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and B %s %Dx%D\n", MatProductTypes[MATPRODUCT_AB], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { ierr = MatProductNumeric(D);CHKERRQ(ierr); } ierr = PetscLogStagePop();CHKERRQ(ierr); } else { Mat E, Ct, Dt; Vec cC, cD; const PetscScalar *c_ptr; PetscScalar *d_ptr; ierr = MatCreateMAIJ(A, N[k], &E);CHKERRQ(ierr); ierr = MatDenseGetLocalMatrix(C, &Ct);CHKERRQ(ierr); ierr = MatDenseGetLocalMatrix(D, &Dt);CHKERRQ(ierr); ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); ierr = MatDenseGetArrayRead(Ct, &c_ptr);CHKERRQ(ierr); ierr = MatDenseGetArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, c_ptr, &cC);CHKERRQ(ierr); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, AM * N[k], PETSC_DECIDE, d_ptr, &cD);CHKERRQ(ierr); ierr = MatMult(E, cC, cD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D and B %s %Dx%D\n", MATMAIJ, AM, AN, VECMPI, AM * N[k], 1);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { ierr = MatMult(E, cC, cD);CHKERRQ(ierr); } ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = VecDestroy(&cD);CHKERRQ(ierr); ierr = VecDestroy(&cC);CHKERRQ(ierr); ierr = MatDestroy(&E);CHKERRQ(ierr); ierr = MatDenseRestoreArrayWrite(Dt, &d_ptr);CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(Ct, &c_ptr);CHKERRQ(ierr); ierr = MatTranspose(Ct, MAT_INPLACE_MATRIX, &Ct);CHKERRQ(ierr); ierr = MatTranspose(Dt, MAT_INPLACE_MATRIX, &Dt);CHKERRQ(ierr); } } else { const PetscScalar *c_ptr; PetscScalar *d_ptr; ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_mm,(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (COLUMN_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_COLUMN_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); } ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); } } else if (maij) { ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); continue; } else if (!mkl) { Vec cC, cD; ierr = MatDenseGetColumnVecRead(C, 0, &cC);CHKERRQ(ierr); ierr = MatDenseGetColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); ierr = MatMult(A, cC, cD);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatMult: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { ierr = MatMult(A, cC, cD);CHKERRQ(ierr); } ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = MatDenseRestoreColumnVecRead(C, 0, &cC);CHKERRQ(ierr); ierr = MatDenseRestoreColumnVecWrite(D, 0, &cD);CHKERRQ(ierr); } else { const PetscScalar *c_ptr; PetscScalar *d_ptr; ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mv: with A %s %Dx%D\n", Atype, AM, AN);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { PetscStackCallMKLSparse(mkl_sparse_d_mv, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, c_ptr, 0.0, d_ptr)); } ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); } if (check) { ierr = MatMatMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); if (!flg) { MatType Dtype; ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); } } /* MKL implementation seems buggy for ABt */ /* A*Bt */ if (!mkl && trans && N[k] > 1) { ierr = PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJ, MATMPIAIJ, "");CHKERRQ(ierr); if (flg) { ierr = MatTranspose(C, MAT_INPLACE_MATRIX, &C);CHKERRQ(ierr); ierr = MatGetType(C, &Ctype);CHKERRQ(ierr); if (!mkl) { ierr = MatProductCreateWithMat(A, C, NULL, D);CHKERRQ(ierr); ierr = MatProductSetType(D, MATPRODUCT_ABt);CHKERRQ(ierr); ierr = MatProductSetFromOptions(D);CHKERRQ(ierr); ierr = MatProductSymbolic(D);CHKERRQ(ierr); ierr = MatProductNumeric(D);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking MatProduct %s: with A %s %Dx%D and Bt %s %Dx%D\n", MatProductTypes[MATPRODUCT_ABt], Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); ierr = PetscLogStagePush(tstage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { ierr = MatProductNumeric(D);CHKERRQ(ierr); } ierr = PetscLogStagePop();CHKERRQ(ierr); } else { const PetscScalar *c_ptr; PetscScalar *d_ptr; PetscStackCallMKLSparse(mkl_sparse_set_mm_hint, (spr, SPARSE_OPERATION_NON_TRANSPOSE, descr, SPARSE_LAYOUT_ROW_MAJOR, N[k], 1 + trial)); PetscStackCallMKLSparse(mkl_sparse_optimize, (spr)); ierr = MatDenseGetArrayRead(C, &c_ptr);CHKERRQ(ierr); ierr = MatDenseGetArrayWrite(D, &d_ptr);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Benchmarking mkl_sparse_d_mm (ROW_MAJOR): with A %s %Dx%D and B %s %Dx%D\n", Atype, AM, AN, Ctype, CM, CN);CHKERRQ(ierr); PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); ierr = PetscLogStagePush(stage);CHKERRQ(ierr); for (t = 0; t < trial; ++t) { PetscStackCallMKLSparse(mkl_sparse_d_mm, (SPARSE_OPERATION_NON_TRANSPOSE, 1.0, spr, descr, SPARSE_LAYOUT_ROW_MAJOR, c_ptr, CN, CM, 0.0, d_ptr, CM)); } ierr = PetscLogStagePop();CHKERRQ(ierr); ierr = MatDenseRestoreArrayWrite(D, &d_ptr);CHKERRQ(ierr); ierr = MatDenseRestoreArrayRead(C, &c_ptr);CHKERRQ(ierr); } } } if (!mkl && trans && N[k] > 1 && flg && check) { ierr = MatMatTransposeMultEqual(A, C, D, 10, &flg);CHKERRQ(ierr); if (!flg) { MatType Dtype; ierr = MatGetType(D, &Dtype);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD, "Error with A %s%s, C %s, D %s, Nk %D\n", Atype, mkl ? "mkl" : "", Ctype, Dtype, N[k]);CHKERRQ(ierr); } } ierr = MatDestroy(&C);CHKERRQ(ierr); ierr = MatDestroy(&D);CHKERRQ(ierr); } if (mkl) { PetscStackCallMKLSparse(mkl_sparse_destroy, (spr)); ierr = PetscFree(ia_ptr);CHKERRQ(ierr); ierr = PetscFree(ja_ptr);CHKERRQ(ierr); } if (cuda && i != ntype - 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "AIJCUSPARSE must be last, otherwise MatConvert() to another MatType is too slow\n");CHKERRQ(ierr); break; } } if (E != A) { ierr = MatDestroy(&E);CHKERRQ(ierr); } ierr = MatDestroy(&A);CHKERRQ(ierr); } for (m = 0; m < ntype; ++m) { ierr = PetscFree(type[m]);CHKERRQ(ierr); } ierr = PetscFinalize(); return 0; } /*TEST build: requires: double !complex !defined(PETSC_USE_64BIT_INDICES) testset: nsize: 1 filter: sed "/Benchmarking/d" args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/spd-real-int32-float64 -bs 1,2,3 -N 1,2,18 -check -trans -convert_aij {{false true}shared output} test: suffix: basic args: -type aij,sbaij,baij output_file: output/ex237.out test: suffix: maij args: -type aij,maij output_file: output/ex237.out test: suffix: cuda requires: cuda args: -type aij,aijcusparse output_file: output/ex237.out test: suffix: mkl requires: mkl args: -type aij,aijmkl,baijmkl,sbaijmkl output_file: output/ex237.out TEST*/