xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision c5e72636b2d9a321f94c0ae5541b656f747ad7ff) !
17072be85SIrina Sokolova /*
27072be85SIrina Sokolova   Defines basic operations for the MATSEQBAIJMKL matrix class.
37072be85SIrina Sokolova   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4da81f932SPierre Jolivet   wherever possible. If used MKL version is older than 11.3 PETSc default
57072be85SIrina Sokolova   code for sparse matrix operations is used.
67072be85SIrina Sokolova */
77072be85SIrina Sokolova 
87072be85SIrina Sokolova #include <../src/mat/impls/baij/seq/baij.h>
9*5d83a8b1SBarry Smith #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> /*I   "petscmat.h"   I*/
10bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11bdfea6dbSBarry Smith   #define MKL_ILP64
12bdfea6dbSBarry Smith #endif
13b9e7e5c1SBarry Smith #include <mkl_spblas.h>
147072be85SIrina Sokolova 
PetscSeqBAIJSupportsZeroBased(void)15d71ae5a4SJacob Faibussowitsch static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16d71ae5a4SJacob Faibussowitsch {
17b9e7e5c1SBarry Smith   static PetscBool set = PETSC_FALSE, value;
18b9e7e5c1SBarry Smith   int              n   = 1, ia[1], ja[1];
19b9e7e5c1SBarry Smith   float            a[1];
20b9e7e5c1SBarry Smith   sparse_status_t  status;
21b9e7e5c1SBarry Smith   sparse_matrix_t  A;
227072be85SIrina Sokolova 
23b9e7e5c1SBarry Smith   if (!set) {
24bdfea6dbSBarry Smith     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25b9e7e5c1SBarry Smith     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26b9e7e5c1SBarry Smith     (void)mkl_sparse_destroy(A);
27b9e7e5c1SBarry Smith     set = PETSC_TRUE;
28b9e7e5c1SBarry Smith   }
29b9e7e5c1SBarry Smith   return value;
30b9e7e5c1SBarry Smith }
317072be85SIrina Sokolova 
327072be85SIrina Sokolova typedef struct {
337072be85SIrina Sokolova   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
347072be85SIrina Sokolova   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
357072be85SIrina Sokolova   struct matrix_descr descr;
367072be85SIrina Sokolova   PetscInt           *ai1;
377072be85SIrina Sokolova   PetscInt           *aj1;
387072be85SIrina Sokolova } Mat_SeqBAIJMKL;
397072be85SIrina Sokolova 
40b9e7e5c1SBarry Smith static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
417072be85SIrina Sokolova 
MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)42d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
43d71ae5a4SJacob Faibussowitsch {
447072be85SIrina Sokolova   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
457072be85SIrina Sokolova   /* so we will ignore 'MatType type'. */
467072be85SIrina Sokolova   Mat             B       = *newmat;
477072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
487072be85SIrina Sokolova 
497072be85SIrina Sokolova   PetscFunctionBegin;
509566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
517072be85SIrina Sokolova 
527072be85SIrina Sokolova   /* Reset the original function pointers. */
537072be85SIrina Sokolova   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
547072be85SIrina Sokolova   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
557072be85SIrina Sokolova   B->ops->destroy          = MatDestroy_SeqBAIJ;
567072be85SIrina Sokolova   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
577072be85SIrina Sokolova   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
587072be85SIrina Sokolova   B->ops->scale            = MatScale_SeqBAIJ;
597072be85SIrina Sokolova   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
607072be85SIrina Sokolova   B->ops->axpy             = MatAXPY_SeqBAIJ;
617072be85SIrina Sokolova 
627072be85SIrina Sokolova   switch (A->rmap->bs) {
637072be85SIrina Sokolova   case 1:
647072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_1;
657072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
667072be85SIrina Sokolova     break;
677072be85SIrina Sokolova   case 2:
687072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_2;
697072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
707072be85SIrina Sokolova     break;
717072be85SIrina Sokolova   case 3:
727072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_3;
737072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
747072be85SIrina Sokolova     break;
757072be85SIrina Sokolova   case 4:
767072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_4;
777072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
787072be85SIrina Sokolova     break;
797072be85SIrina Sokolova   case 5:
807072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_5;
817072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
827072be85SIrina Sokolova     break;
837072be85SIrina Sokolova   case 6:
847072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_6;
857072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
867072be85SIrina Sokolova     break;
877072be85SIrina Sokolova   case 7:
887072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_7;
897072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
907072be85SIrina Sokolova     break;
917072be85SIrina Sokolova   case 11:
927072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_11;
937072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
947072be85SIrina Sokolova     break;
957072be85SIrina Sokolova   case 15:
967072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
977072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
987072be85SIrina Sokolova     break;
997072be85SIrina Sokolova   default:
1007072be85SIrina Sokolova     B->ops->mult    = MatMult_SeqBAIJ_N;
1017072be85SIrina Sokolova     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
1027072be85SIrina Sokolova     break;
1037072be85SIrina Sokolova   }
1049566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
1057072be85SIrina Sokolova 
1067072be85SIrina Sokolova   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
1077072be85SIrina Sokolova    * simply involves destroying the MKL sparse matrix handle and then freeing
1087072be85SIrina Sokolova    * the spptr pointer. */
1097072be85SIrina Sokolova   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
1107072be85SIrina Sokolova 
111792fecdfSBarry Smith   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1129566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1139566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
1147072be85SIrina Sokolova 
1157072be85SIrina Sokolova   /* Change the type of B to MATSEQBAIJ. */
1169566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
1177072be85SIrina Sokolova 
1187072be85SIrina Sokolova   *newmat = B;
1193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1207072be85SIrina Sokolova }
1217072be85SIrina Sokolova 
MatDestroy_SeqBAIJMKL(Mat A)122d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
123d71ae5a4SJacob Faibussowitsch {
1247072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1257072be85SIrina Sokolova 
1267072be85SIrina Sokolova   PetscFunctionBegin;
1277072be85SIrina Sokolova   if (baijmkl) {
1287072be85SIrina Sokolova     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129792fecdfSBarry Smith     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
1309566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1319566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
1327072be85SIrina Sokolova   }
1337072be85SIrina Sokolova 
1347072be85SIrina Sokolova   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
1357072be85SIrina Sokolova    * to destroy everything that remains. */
1369566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
1379566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqBAIJ(A));
1383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1397072be85SIrina Sokolova }
1407072be85SIrina Sokolova 
MatSeqBAIJMKL_create_mkl_handle(Mat A)141d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
142d71ae5a4SJacob Faibussowitsch {
1437072be85SIrina Sokolova   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
1447072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
1450835cbf9SRichard Tran Mills   PetscInt        mbs, nbs, nz, bs;
1467072be85SIrina Sokolova   MatScalar      *aa;
1477072be85SIrina Sokolova   PetscInt       *aj, *ai;
14880278ffbSSatish Balay   PetscInt        i;
1497072be85SIrina Sokolova 
1507072be85SIrina Sokolova   PetscFunctionBegin;
1517072be85SIrina Sokolova   if (baijmkl->sparse_optimized) {
1527072be85SIrina Sokolova     /* Matrix has been previously assembled and optimized. Must destroy old
1537072be85SIrina Sokolova      * matrix handle before running the optimization step again. */
1549566063dSJacob Faibussowitsch     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
1559566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
1567072be85SIrina Sokolova   }
1577072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
1587072be85SIrina Sokolova 
1597072be85SIrina Sokolova   /* Now perform the SpMV2 setup and matrix optimization. */
1607072be85SIrina Sokolova   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
1617072be85SIrina Sokolova   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
1627072be85SIrina Sokolova   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
1637072be85SIrina Sokolova   mbs                 = a->mbs;
1647072be85SIrina Sokolova   nbs                 = a->nbs;
1657072be85SIrina Sokolova   nz                  = a->nz;
1667072be85SIrina Sokolova   bs                  = A->rmap->bs;
1677072be85SIrina Sokolova   aa                  = a->a;
1687072be85SIrina Sokolova 
169f4f49eeaSPierre Jolivet   if ((nz != 0) & !A->structure_only) {
1707072be85SIrina Sokolova     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1717072be85SIrina Sokolova      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
172b9e7e5c1SBarry Smith     if (PetscSeqBAIJSupportsZeroBased()) {
1737072be85SIrina Sokolova       aj = a->j;
1747072be85SIrina Sokolova       ai = a->i;
175f4f49eeaSPierre Jolivet       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
176b9e7e5c1SBarry Smith     } else {
1779566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
178b9e7e5c1SBarry Smith       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
179b9e7e5c1SBarry Smith       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
1807072be85SIrina Sokolova       aa = a->a;
18145d28df7SStefano Zampini       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
1827072be85SIrina Sokolova       baijmkl->ai1 = ai;
1837072be85SIrina Sokolova       baijmkl->aj1 = aj;
184b9e7e5c1SBarry Smith     }
1859566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
1869566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
1879566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
1887072be85SIrina Sokolova     baijmkl->sparse_optimized = PETSC_TRUE;
1897072be85SIrina Sokolova   }
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1917072be85SIrina Sokolova }
1927072be85SIrina Sokolova 
MatDuplicate_SeqBAIJMKL(Mat A,MatDuplicateOption op,Mat * M)193d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
194d71ae5a4SJacob Faibussowitsch {
1957072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
1967072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl_dest;
1977072be85SIrina Sokolova 
1987072be85SIrina Sokolova   PetscFunctionBegin;
1999566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
2007072be85SIrina Sokolova   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2014dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl_dest));
20271bc03e0SIrina Sokolova   (*M)->spptr = (void *)baijmkl_dest;
2039566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
2047072be85SIrina Sokolova   baijmkl_dest->sparse_optimized = PETSC_FALSE;
2059566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2077072be85SIrina Sokolova }
2087072be85SIrina Sokolova 
MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)209d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
210d71ae5a4SJacob Faibussowitsch {
2117072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2127072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2137072be85SIrina Sokolova   const PetscScalar *x;
2147072be85SIrina Sokolova   PetscScalar       *y;
2157072be85SIrina Sokolova 
2167072be85SIrina Sokolova   PetscFunctionBegin;
2177072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2187072be85SIrina Sokolova   if (!a->nz) {
2199566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2203ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2217072be85SIrina Sokolova   }
2227072be85SIrina Sokolova 
2239566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2249566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2257072be85SIrina Sokolova 
2267072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2277072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2287072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
22948a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2307072be85SIrina Sokolova 
2317072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMult. */
2329566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2337072be85SIrina Sokolova 
2349566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2359566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2387072be85SIrina Sokolova }
2397072be85SIrina Sokolova 
MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)240d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
241d71ae5a4SJacob Faibussowitsch {
2427072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2437072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2447072be85SIrina Sokolova   const PetscScalar *x;
2457072be85SIrina Sokolova   PetscScalar       *y;
2467072be85SIrina Sokolova 
2477072be85SIrina Sokolova   PetscFunctionBegin;
2487072be85SIrina Sokolova   /* If there are no nonzero entries, zero yy and return immediately. */
2497072be85SIrina Sokolova   if (!a->nz) {
2509566063dSJacob Faibussowitsch     PetscCall(VecSet(yy, 0.0));
2513ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2527072be85SIrina Sokolova   }
2537072be85SIrina Sokolova 
2549566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2559566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
2567072be85SIrina Sokolova 
2577072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2587072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2597072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
26048a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2617072be85SIrina Sokolova 
2627072be85SIrina Sokolova   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
2639566063dSJacob Faibussowitsch   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
2647072be85SIrina Sokolova 
2659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
2669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
2679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
2683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2697072be85SIrina Sokolova }
2707072be85SIrina Sokolova 
MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)271d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
272d71ae5a4SJacob Faibussowitsch {
2737072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
2747072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
2757072be85SIrina Sokolova   const PetscScalar *x;
2767072be85SIrina Sokolova   PetscScalar       *y, *z;
2777072be85SIrina Sokolova   PetscInt           m = a->mbs * A->rmap->bs;
2787072be85SIrina Sokolova   PetscInt           i;
2797072be85SIrina Sokolova 
2807072be85SIrina Sokolova   PetscFunctionBegin;
2817072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
2827072be85SIrina Sokolova   if (!a->nz) {
2839566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
2843ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2857072be85SIrina Sokolova   }
2867072be85SIrina Sokolova 
2879566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
2889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
2897072be85SIrina Sokolova 
2907072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
2917072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
2927072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
29348a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
2947072be85SIrina Sokolova 
2957072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
2967072be85SIrina Sokolova   if (zz == yy) {
2977072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
2987072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
2999566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3007072be85SIrina Sokolova   } else {
3017072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3027072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3039566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
304ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
3057072be85SIrina Sokolova   }
3067072be85SIrina Sokolova 
3079566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3089566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3117072be85SIrina Sokolova }
3127072be85SIrina Sokolova 
MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)313d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
314d71ae5a4SJacob Faibussowitsch {
3157072be85SIrina Sokolova   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
3167072be85SIrina Sokolova   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
3177072be85SIrina Sokolova   const PetscScalar *x;
3187072be85SIrina Sokolova   PetscScalar       *y, *z;
3197072be85SIrina Sokolova   PetscInt           n = a->nbs * A->rmap->bs;
3207072be85SIrina Sokolova   PetscInt           i;
3217072be85SIrina Sokolova   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
3227072be85SIrina Sokolova 
3237072be85SIrina Sokolova   PetscFunctionBegin;
3247072be85SIrina Sokolova   /* If there are no nonzero entries, set zz = yy and return immediately. */
3257072be85SIrina Sokolova   if (!a->nz) {
3269566063dSJacob Faibussowitsch     PetscCall(VecCopy(yy, zz));
3273ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
3287072be85SIrina Sokolova   }
3297072be85SIrina Sokolova 
3309566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3319566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
3327072be85SIrina Sokolova 
3337072be85SIrina Sokolova   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3347072be85SIrina Sokolova    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3357072be85SIrina Sokolova    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
33648a46eb9SPierre Jolivet   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3377072be85SIrina Sokolova 
3387072be85SIrina Sokolova   /* Call MKL sparse BLAS routine to do the MatMult. */
3397072be85SIrina Sokolova   if (zz == yy) {
3407072be85SIrina Sokolova     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
3417072be85SIrina Sokolova      * with alpha and beta both set to 1.0. */
3429566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
3437072be85SIrina Sokolova   } else {
3447072be85SIrina Sokolova     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
3457072be85SIrina Sokolova      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
3469566063dSJacob Faibussowitsch     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
347ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
3487072be85SIrina Sokolova   }
3497072be85SIrina Sokolova 
3509566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
3519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3529566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
3533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3547072be85SIrina Sokolova }
3557072be85SIrina Sokolova 
MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)356d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
357d71ae5a4SJacob Faibussowitsch {
3587072be85SIrina Sokolova   PetscFunctionBegin;
3599566063dSJacob Faibussowitsch   PetscCall(MatScale_SeqBAIJ(inA, alpha));
3609566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
3613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3627072be85SIrina Sokolova }
3637072be85SIrina Sokolova 
MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)364d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
365d71ae5a4SJacob Faibussowitsch {
3667072be85SIrina Sokolova   PetscFunctionBegin;
3679566063dSJacob Faibussowitsch   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
3689566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
3693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3707072be85SIrina Sokolova }
3717072be85SIrina Sokolova 
MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)372d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
373d71ae5a4SJacob Faibussowitsch {
3747072be85SIrina Sokolova   PetscFunctionBegin;
3759566063dSJacob Faibussowitsch   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
3767072be85SIrina Sokolova   if (str == SAME_NONZERO_PATTERN) {
3777072be85SIrina Sokolova     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
3789566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
3797072be85SIrina Sokolova   }
3803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3817072be85SIrina Sokolova }
3827072be85SIrina Sokolova /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
3837072be85SIrina Sokolova  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
3847072be85SIrina Sokolova  * routine, but can also be used to convert an assembled SeqBAIJ matrix
3857072be85SIrina Sokolova  * into a SeqBAIJMKL one. */
MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)386d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
387d71ae5a4SJacob Faibussowitsch {
3887072be85SIrina Sokolova   Mat             B = *newmat;
3897072be85SIrina Sokolova   Mat_SeqBAIJMKL *baijmkl;
3907072be85SIrina Sokolova   PetscBool       sametype;
3917072be85SIrina Sokolova 
3927072be85SIrina Sokolova   PetscFunctionBegin;
3939566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
3947072be85SIrina Sokolova 
3959566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
3963ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
3977072be85SIrina Sokolova 
3984dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&baijmkl));
3997072be85SIrina Sokolova   B->spptr = (void *)baijmkl;
4007072be85SIrina Sokolova 
4017072be85SIrina Sokolova   /* Set function pointers for methods that we inherit from BAIJ but override.
4027072be85SIrina Sokolova    * We also parse some command line options below, since those determine some of the methods we point to. */
4037072be85SIrina Sokolova   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
4047072be85SIrina Sokolova 
4057072be85SIrina Sokolova   baijmkl->sparse_optimized = PETSC_FALSE;
4067072be85SIrina Sokolova 
40714e4dea2SJose E. Roman   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
4089566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
4097072be85SIrina Sokolova 
4109566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
4117072be85SIrina Sokolova   *newmat = B;
4123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4137072be85SIrina Sokolova }
4149c46acdfSRichard Tran Mills 
MatAssemblyEnd_SeqBAIJMKL(Mat A,MatAssemblyType mode)415d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
416d71ae5a4SJacob Faibussowitsch {
4174d6dccb5SIrina Sokolova   PetscFunctionBegin;
4183ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
4199566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
4209566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
4214d6dccb5SIrina Sokolova   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
4224d6dccb5SIrina Sokolova   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
4234d6dccb5SIrina Sokolova   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
4244d6dccb5SIrina Sokolova   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
4254d6dccb5SIrina Sokolova   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
4264d6dccb5SIrina Sokolova   A->ops->scale            = MatScale_SeqBAIJMKL;
4274d6dccb5SIrina Sokolova   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
4284d6dccb5SIrina Sokolova   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
4294d6dccb5SIrina Sokolova   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
4303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4314d6dccb5SIrina Sokolova }
4329c46acdfSRichard Tran Mills 
433*5d83a8b1SBarry Smith /*@
43411a5261eSBarry Smith   MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
43511a5261eSBarry Smith   This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
4367072be85SIrina Sokolova   routines from Intel MKL whenever possible.
4377072be85SIrina Sokolova 
4387072be85SIrina Sokolova   Input Parameters:
43911a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
44020f4b53cSBarry Smith . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
44120f4b53cSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
4427072be85SIrina Sokolova . m    - number of rows
4437072be85SIrina Sokolova . n    - number of columns
4447072be85SIrina Sokolova . nz   - number of nonzero blocks  per block row (same for all rows)
4457072be85SIrina Sokolova - nnz  - array containing the number of nonzero blocks in the various block rows
44620f4b53cSBarry Smith          (possibly different for each block row) or `NULL`
4477072be85SIrina Sokolova 
4487072be85SIrina Sokolova   Output Parameter:
4497072be85SIrina Sokolova . A - the matrix
4507072be85SIrina Sokolova 
45111a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
452f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
45311a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
4547072be85SIrina Sokolova 
4557072be85SIrina Sokolova   Options Database Keys:
45611a5261eSBarry Smith + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
457a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
4587072be85SIrina Sokolova 
4597072be85SIrina Sokolova   Level: intermediate
4607072be85SIrina Sokolova 
4617072be85SIrina Sokolova   Notes:
4627072be85SIrina Sokolova   The number of rows and columns must be divisible by blocksize.
4637072be85SIrina Sokolova 
46420f4b53cSBarry Smith   If the `nnz` parameter is given then the `nz` parameter is ignored
4657072be85SIrina Sokolova 
4667072be85SIrina Sokolova   A nonzero block is any block that as 1 or more nonzeros in it
4677072be85SIrina Sokolova 
46820f4b53cSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
46920f4b53cSBarry Smith   operations are currently supported.
47020f4b53cSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
47120f4b53cSBarry Smith   inspector-executor routines, then those are used by default.
47220f4b53cSBarry Smith   Default PETSc kernels are used otherwise.
47320f4b53cSBarry Smith 
4742ef1f0ffSBarry Smith   The `MATSEQBAIJ` format is fully compatible with standard Fortran
4757072be85SIrina Sokolova   storage.  That is, the stored row and column indices can begin at
4767072be85SIrina Sokolova   either one (as in Fortran) or zero.  See the users' manual for details.
4777072be85SIrina Sokolova 
4787072be85SIrina Sokolova   Specify the preallocated storage with either nz or nnz (not both).
47911a5261eSBarry Smith   Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
480651615e1SBarry Smith   allocation.  See [Sparse Matrices](sec_matsparse) for details.
4817072be85SIrina Sokolova   matrices.
4827072be85SIrina Sokolova 
483651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
4847072be85SIrina Sokolova @*/
MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)485d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
486d71ae5a4SJacob Faibussowitsch {
4877072be85SIrina Sokolova   PetscFunctionBegin;
4889566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
4899566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
4909566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
4919566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
4923ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4937072be85SIrina Sokolova }
4949c46acdfSRichard Tran Mills 
MatCreate_SeqBAIJMKL(Mat A)495d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
496d71ae5a4SJacob Faibussowitsch {
4977072be85SIrina Sokolova   PetscFunctionBegin;
4989566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQBAIJ));
4999566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
5003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5017072be85SIrina Sokolova }
502