xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
14a2a386eSRichard Tran Mills /*
24a2a386eSRichard Tran Mills   Defines basic operations for the MATSEQAIJMKL matrix class.
34a2a386eSRichard Tran Mills   This class is derived from the MATSEQAIJ class and retains the
44a2a386eSRichard Tran Mills   compressed row storage (aka Yale sparse matrix format) but uses
54a2a386eSRichard Tran Mills   sparse BLAS operations from the Intel Math Kernel Library (MKL)
64a2a386eSRichard Tran Mills   wherever possible.
74a2a386eSRichard Tran Mills */
84a2a386eSRichard Tran Mills 
94a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aij.h>
104a2a386eSRichard Tran Mills #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11bdfea6dbSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12bdfea6dbSBarry Smith   #define MKL_ILP64
13bdfea6dbSBarry Smith #endif
14b9e7e5c1SBarry Smith #include <mkl_spblas.h>
154a2a386eSRichard Tran Mills 
164a2a386eSRichard Tran Mills typedef struct {
17c9d46305SRichard Tran Mills   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
185b49642aSRichard Tran Mills   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
194abfa3b3SRichard Tran Mills   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20551aa5c8SRichard Tran Mills   PetscObjectState state;
21ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22df555b71SRichard Tran Mills   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23df555b71SRichard Tran Mills   struct matrix_descr descr;
24b8cbc1fbSRichard Tran Mills #endif
254a2a386eSRichard Tran Mills } Mat_SeqAIJMKL;
264a2a386eSRichard Tran Mills 
MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)27d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28d71ae5a4SJacob Faibussowitsch {
294a2a386eSRichard Tran Mills   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
304a2a386eSRichard Tran Mills   /* so we will ignore 'MatType type'. */
314a2a386eSRichard Tran Mills   Mat B = *newmat;
32ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
334a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34c1d5218aSRichard Tran Mills #endif
354a2a386eSRichard Tran Mills 
364a2a386eSRichard Tran Mills   PetscFunctionBegin;
379566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
384a2a386eSRichard Tran Mills 
394a2a386eSRichard Tran Mills   /* Reset the original function pointers. */
4054871a98SRichard Tran Mills   B->ops->duplicate               = MatDuplicate_SeqAIJ;
414a2a386eSRichard Tran Mills   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
424a2a386eSRichard Tran Mills   B->ops->destroy                 = MatDestroy_SeqAIJ;
4354871a98SRichard Tran Mills   B->ops->mult                    = MatMult_SeqAIJ;
44ff03dc53SRichard Tran Mills   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
4554871a98SRichard Tran Mills   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46ff03dc53SRichard Tran Mills   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48431879ecSRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49e8be1fc7SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
524f53af40SRichard Tran Mills   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
534a2a386eSRichard Tran Mills 
549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
554222ddf1SHong Zhang 
56ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
574abfa3b3SRichard Tran Mills   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58e9c94282SRichard Tran Mills    * simply involves destroying the MKL sparse matrix handle and then freeing
59e9c94282SRichard Tran Mills    * the spptr pointer. */
60a8327b06SKarl Rupp   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61a8327b06SKarl Rupp 
62792fecdfSBarry Smith   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63ddf6f99aSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
649566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->spptr));
654a2a386eSRichard Tran Mills 
664a2a386eSRichard Tran Mills   /* Change the type of B to MATSEQAIJ. */
679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
684a2a386eSRichard Tran Mills 
694a2a386eSRichard Tran Mills   *newmat = B;
703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
714a2a386eSRichard Tran Mills }
724a2a386eSRichard Tran Mills 
MatDestroy_SeqAIJMKL(Mat A)7366976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74d71ae5a4SJacob Faibussowitsch {
754a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
764a2a386eSRichard Tran Mills 
774a2a386eSRichard Tran Mills   PetscFunctionBegin;
78edc89de7SRichard Tran Mills   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
79e9c94282SRichard Tran Mills   if (aijmkl) {
804a2a386eSRichard Tran Mills     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
81ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
82792fecdfSBarry Smith     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
834abfa3b3SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
849566063dSJacob Faibussowitsch     PetscCall(PetscFree(A->spptr));
85e9c94282SRichard Tran Mills   }
864a2a386eSRichard Tran Mills 
874a2a386eSRichard Tran Mills   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
884a2a386eSRichard Tran Mills    * to destroy everything that remains. */
899566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
902fe279fdSBarry Smith   /* I don't call MatSetType().  I believe this is because that
914a2a386eSRichard Tran Mills    * is only to be called when *building* a matrix.  I could be wrong, but
924a2a386eSRichard Tran Mills    * that is how things work for the SuperLU matrix class. */
932e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
949566063dSJacob Faibussowitsch   PetscCall(MatDestroy_SeqAIJ(A));
953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
964a2a386eSRichard Tran Mills }
974a2a386eSRichard Tran Mills 
98190ae7a4SRichard Tran Mills /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
995b49642aSRichard Tran Mills  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
1005b49642aSRichard Tran Mills  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
1015b49642aSRichard Tran Mills  * handle, creates a new one, and then calls mkl_sparse_optimize().
1025b49642aSRichard Tran Mills  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
1035b49642aSRichard Tran Mills  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
1045b49642aSRichard Tran Mills  * an unoptimized matrix handle here. */
MatSeqAIJMKL_create_mkl_handle(Mat A)105d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106d71ae5a4SJacob Faibussowitsch {
107ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1086e369cd5SRichard Tran Mills   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
1096e369cd5SRichard Tran Mills    * does nothing. We make it callable anyway in this case because it cuts
1106e369cd5SRichard Tran Mills    * down on littering the code with #ifdefs. */
11145fbe478SRichard Tran Mills   PetscFunctionBegin;
1123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1136e369cd5SRichard Tran Mills #else
114a8327b06SKarl Rupp   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
115a8327b06SKarl Rupp   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
116a8327b06SKarl Rupp   PetscInt       m, n;
117a8327b06SKarl Rupp   MatScalar     *aa;
118a8327b06SKarl Rupp   PetscInt      *aj, *ai;
1194a2a386eSRichard Tran Mills 
120a8327b06SKarl Rupp   PetscFunctionBegin;
121e626a176SRichard Tran Mills   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122e626a176SRichard Tran Mills   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123e626a176SRichard Tran Mills    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124e626a176SRichard Tran Mills    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125e626a176SRichard Tran Mills    * mkl_sparse_optimize() later. */
1263ba16761SJacob Faibussowitsch   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
1274d51fa23SRichard Tran Mills   #endif
1286e369cd5SRichard Tran Mills 
1290632b357SRichard Tran Mills   if (aijmkl->sparse_optimized) {
1300632b357SRichard Tran Mills     /* Matrix has been previously assembled and optimized. Must destroy old
1310632b357SRichard Tran Mills      * matrix handle before running the optimization step again. */
132792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
1330632b357SRichard Tran Mills   }
1348d3fe1b0SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1356e369cd5SRichard Tran Mills 
136c9d46305SRichard Tran Mills   /* Now perform the SpMV2 setup and matrix optimization. */
137df555b71SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138df555b71SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139df555b71SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
14058678438SRichard Tran Mills   m                  = A->rmap->n;
14158678438SRichard Tran Mills   n                  = A->cmap->n;
142df555b71SRichard Tran Mills   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
143df555b71SRichard Tran Mills   aa                 = a->a; /* Nonzero elements stored row-by-row. */
144df555b71SRichard Tran Mills   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
1451495fedeSRichard Tran Mills   if (a->nz && aa && !A->structure_only) {
1468d3fe1b0SRichard Tran Mills     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
1478d3fe1b0SRichard Tran Mills      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148bdfea6dbSBarry Smith     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
15148a46eb9SPierre Jolivet     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
1524abfa3b3SRichard Tran Mills     aijmkl->sparse_optimized = PETSC_TRUE;
153f4f49eeaSPierre Jolivet     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
154190ae7a4SRichard Tran Mills   } else {
155f3fa974cSJacob Faibussowitsch     aijmkl->csrA = NULL;
156c9d46305SRichard Tran Mills   }
1573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
158d995685eSRichard Tran Mills #endif
1596e369cd5SRichard Tran Mills }
1606e369cd5SRichard Tran Mills 
161b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
162190ae7a4SRichard Tran Mills /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)163d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
164d71ae5a4SJacob Faibussowitsch {
16519afcda9SRichard Tran Mills   sparse_index_base_t indexing;
166190ae7a4SRichard Tran Mills   PetscInt            m, n;
1672a8381b2SBarry Smith   PetscInt           *aj, *ai, *unused;
16819afcda9SRichard Tran Mills   MatScalar          *aa;
16919afcda9SRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl;
17019afcda9SRichard Tran Mills 
171362febeeSStefano Zampini   PetscFunctionBegin;
172190ae7a4SRichard Tran Mills   if (csrA) {
1732a8381b2SBarry Smith     /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
1742a8381b2SBarry Smith     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
1755f80ce2aSJacob Faibussowitsch     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
176190ae7a4SRichard Tran Mills   } else {
177f3fa974cSJacob Faibussowitsch     aj = ai = NULL;
178f3fa974cSJacob Faibussowitsch     aa      = NULL;
179aab60f1bSRichard Tran Mills   }
180190ae7a4SRichard Tran Mills 
1819566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
1829566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
183aab60f1bSRichard Tran Mills   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184aab60f1bSRichard Tran Mills    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185aab60f1bSRichard Tran Mills    * they will be destroyed when the MKL handle is destroyed.
186aab60f1bSRichard Tran Mills    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187190ae7a4SRichard Tran Mills   if (csrA) {
1889566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
189190ae7a4SRichard Tran Mills   } else {
190190ae7a4SRichard Tran Mills     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
1919566063dSJacob Faibussowitsch     PetscCall(MatSetUp(A));
1929566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1939566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
194190ae7a4SRichard Tran Mills   }
19519afcda9SRichard Tran Mills 
19619afcda9SRichard Tran Mills   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
19719afcda9SRichard Tran Mills    * Now turn it into a MATSEQAIJMKL. */
1989566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1996c87cf42SRichard Tran Mills 
20019afcda9SRichard Tran Mills   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
20119afcda9SRichard Tran Mills   aijmkl->csrA = csrA;
2026c87cf42SRichard Tran Mills 
20319afcda9SRichard Tran Mills   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
20419afcda9SRichard Tran Mills    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
20519afcda9SRichard Tran Mills    * and just need to be able to run the MKL optimization step. */
206f3fd1758SRichard Tran Mills   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207f3fd1758SRichard Tran Mills   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208f3fd1758SRichard Tran Mills   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209190ae7a4SRichard Tran Mills   if (csrA) {
210792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
2121950a7e7SRichard Tran Mills   }
213f4f49eeaSPierre Jolivet   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
2143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
21519afcda9SRichard Tran Mills }
216b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
217190ae7a4SRichard Tran Mills 
218e8be1fc7SRichard Tran Mills /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219e8be1fc7SRichard Tran Mills  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220e8be1fc7SRichard Tran Mills  * MatMatMultNumeric(). */
221b50dddd8SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatSeqAIJMKL_update_from_mkl_handle(Mat A)222d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223d71ae5a4SJacob Faibussowitsch {
224e8be1fc7SRichard Tran Mills   PetscInt            i;
225e8be1fc7SRichard Tran Mills   PetscInt            nrows, ncols;
226e8be1fc7SRichard Tran Mills   PetscInt            nz;
2272a8381b2SBarry Smith   PetscInt           *ai, *aj, *unused;
228e8be1fc7SRichard Tran Mills   PetscScalar        *aa;
2291495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230e8be1fc7SRichard Tran Mills   sparse_index_base_t indexing;
231e8be1fc7SRichard Tran Mills 
232362febeeSStefano Zampini   PetscFunctionBegin;
233190ae7a4SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2343ba16761SJacob Faibussowitsch   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
235190ae7a4SRichard Tran Mills 
2362a8381b2SBarry Smith   /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
2372a8381b2SBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
238e8be1fc7SRichard Tran Mills 
239e8be1fc7SRichard Tran Mills   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
240e8be1fc7SRichard Tran Mills    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
241e8be1fc7SRichard Tran Mills   for (i = 0; i < nrows; i++) {
242e8be1fc7SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2439566063dSJacob Faibussowitsch     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
244e8be1fc7SRichard Tran Mills   }
245e8be1fc7SRichard Tran Mills 
2469566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2479566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
248e8be1fc7SRichard Tran Mills 
249f4f49eeaSPierre Jolivet   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
250a7180b50SRichard Tran Mills   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
251a7180b50SRichard Tran Mills    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
252a7180b50SRichard Tran Mills    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
253a7180b50SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
2543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
255e8be1fc7SRichard Tran Mills }
256b50dddd8SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
257e8be1fc7SRichard Tran Mills 
2583849ddb2SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)259d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
260d71ae5a4SJacob Faibussowitsch {
2613849ddb2SRichard Tran Mills   PetscInt            i, j, k;
2623849ddb2SRichard Tran Mills   PetscInt            nrows, ncols;
2633849ddb2SRichard Tran Mills   PetscInt            nz;
2642a8381b2SBarry Smith   PetscInt           *ai, *aj, *unused;
2653849ddb2SRichard Tran Mills   PetscScalar        *aa;
2661495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
2673849ddb2SRichard Tran Mills   sparse_index_base_t indexing;
2683849ddb2SRichard Tran Mills 
269362febeeSStefano Zampini   PetscFunctionBegin;
2709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
2713849ddb2SRichard Tran Mills 
2723849ddb2SRichard Tran Mills   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
2733849ddb2SRichard Tran Mills   if (!aijmkl->csrA) {
2749566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
2753ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2763849ddb2SRichard Tran Mills   }
2773849ddb2SRichard Tran Mills 
2782a8381b2SBarry Smith   /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
2792a8381b2SBarry Smith   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
2803849ddb2SRichard Tran Mills 
2813849ddb2SRichard Tran Mills   k = 0;
2823849ddb2SRichard Tran Mills   for (i = 0; i < nrows; i++) {
2839566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
2843849ddb2SRichard Tran Mills     nz = ai[i + 1] - ai[i];
2853849ddb2SRichard Tran Mills     for (j = 0; j < nz; j++) {
2863849ddb2SRichard Tran Mills       if (aa) {
2879566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
2883849ddb2SRichard Tran Mills       } else {
2899566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
2903849ddb2SRichard Tran Mills       }
2913849ddb2SRichard Tran Mills       k++;
2923849ddb2SRichard Tran Mills     }
2939566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
2943849ddb2SRichard Tran Mills   }
2953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2963849ddb2SRichard Tran Mills }
2973849ddb2SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
2983849ddb2SRichard Tran Mills 
MatDuplicate_SeqAIJMKL(Mat A,MatDuplicateOption op,Mat * M)29966976f2fSJacob Faibussowitsch static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
300d71ae5a4SJacob Faibussowitsch {
3011495fedeSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3026e369cd5SRichard Tran Mills   Mat_SeqAIJMKL *aijmkl_dest;
3036e369cd5SRichard Tran Mills 
3046e369cd5SRichard Tran Mills   PetscFunctionBegin;
3059566063dSJacob Faibussowitsch   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
3066e369cd5SRichard Tran Mills   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
3079566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
3086e369cd5SRichard Tran Mills   aijmkl_dest->sparse_optimized = PETSC_FALSE;
3091baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3116e369cd5SRichard Tran Mills }
3126e369cd5SRichard Tran Mills 
MatAssemblyEnd_SeqAIJMKL(Mat A,MatAssemblyType mode)31366976f2fSJacob Faibussowitsch static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314d71ae5a4SJacob Faibussowitsch {
3156e369cd5SRichard Tran Mills   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
3165b49642aSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
3176e369cd5SRichard Tran Mills 
3186e369cd5SRichard Tran Mills   PetscFunctionBegin;
3193ba16761SJacob Faibussowitsch   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
3206e369cd5SRichard Tran Mills 
3216e369cd5SRichard Tran Mills   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
3226e369cd5SRichard Tran Mills    * extra information and some different methods, call the AssemblyEnd
3236e369cd5SRichard Tran Mills    * routine for a MATSEQAIJ.
3246e369cd5SRichard Tran Mills    * I'm not sure if this is the best way to do this, but it avoids
325d96e85feSRichard Tran Mills    * a lot of code duplication. */
3266e369cd5SRichard Tran Mills   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
3279566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
3286e369cd5SRichard Tran Mills 
3295b49642aSRichard Tran Mills   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
3305b49642aSRichard Tran Mills    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
3315b49642aSRichard Tran Mills   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
3321baa6e33SBarry Smith   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3333ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3344a2a386eSRichard Tran Mills }
3354a2a386eSRichard Tran Mills 
336e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)33766976f2fSJacob Faibussowitsch static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
338d71ae5a4SJacob Faibussowitsch {
3394a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3404a2a386eSRichard Tran Mills   const PetscScalar *x;
3414a2a386eSRichard Tran Mills   PetscScalar       *y;
3424a2a386eSRichard Tran Mills   const MatScalar   *aa;
3434a2a386eSRichard Tran Mills   PetscInt           m     = A->rmap->n;
344db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
345db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
346db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
3474a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
348db63039fSRichard Tran Mills   char               matdescra[6];
349db63039fSRichard Tran Mills 
3504a2a386eSRichard Tran Mills   /* Variables not in MatMult_SeqAIJ. */
351ff03dc53SRichard Tran Mills   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
352ff03dc53SRichard Tran Mills 
353ff03dc53SRichard Tran Mills   PetscFunctionBegin;
354db63039fSRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355db63039fSRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
3569566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
358ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
359ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
360ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
361ff03dc53SRichard Tran Mills 
362ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
363db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
364ff03dc53SRichard Tran Mills 
3659566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
3669566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
3679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
3683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
369ff03dc53SRichard Tran Mills }
3701950a7e7SRichard Tran Mills #endif
371ff03dc53SRichard Tran Mills 
372ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)373d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
374d71ae5a4SJacob Faibussowitsch {
375df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
376df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
377df555b71SRichard Tran Mills   const PetscScalar *x;
378df555b71SRichard Tran Mills   PetscScalar       *y;
379551aa5c8SRichard Tran Mills   PetscObjectState   state;
380df555b71SRichard Tran Mills 
381df555b71SRichard Tran Mills   PetscFunctionBegin;
38238987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
38338987b35SRichard Tran Mills   if (!a->nz) {
3849566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
3859566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->rmap->n));
3869566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
3873ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
38838987b35SRichard Tran Mills   }
389f36dfe3fSRichard Tran Mills 
3909566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
3919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
392df555b71SRichard Tran Mills 
3933fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
3943fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
3953fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
3969566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
3979566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
3983fa15762SRichard Tran Mills 
399df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMult. */
400792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
401df555b71SRichard Tran Mills 
4029566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4039566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4049566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
406df555b71SRichard Tran Mills }
407d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
408df555b71SRichard Tran Mills 
409e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)41066976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
411d71ae5a4SJacob Faibussowitsch {
412ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
413ff03dc53SRichard Tran Mills   const PetscScalar *x;
414ff03dc53SRichard Tran Mills   PetscScalar       *y;
415ff03dc53SRichard Tran Mills   const MatScalar   *aa;
416ff03dc53SRichard Tran Mills   PetscInt           m     = A->rmap->n;
417db63039fSRichard Tran Mills   PetscInt           n     = A->cmap->n;
418db63039fSRichard Tran Mills   PetscScalar        alpha = 1.0;
419db63039fSRichard Tran Mills   PetscScalar        beta  = 0.0;
420ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
421db63039fSRichard Tran Mills   char               matdescra[6];
422ff03dc53SRichard Tran Mills 
423ff03dc53SRichard Tran Mills   /* Variables not in MatMultTranspose_SeqAIJ. */
424ff03dc53SRichard Tran Mills   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
4254a2a386eSRichard Tran Mills 
4264a2a386eSRichard Tran Mills   PetscFunctionBegin;
427969800c5SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
428969800c5SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
4299566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4309566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
4314a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
4324a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
4334a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
4344a2a386eSRichard Tran Mills 
4354a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
436db63039fSRichard Tran Mills   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
4374a2a386eSRichard Tran Mills 
4389566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4399566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4409566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4424a2a386eSRichard Tran Mills }
4431950a7e7SRichard Tran Mills #endif
4444a2a386eSRichard Tran Mills 
445ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)446d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
447d71ae5a4SJacob Faibussowitsch {
448df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
449df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
450df555b71SRichard Tran Mills   const PetscScalar *x;
451df555b71SRichard Tran Mills   PetscScalar       *y;
452551aa5c8SRichard Tran Mills   PetscObjectState   state;
453df555b71SRichard Tran Mills 
454df555b71SRichard Tran Mills   PetscFunctionBegin;
45538987b35SRichard Tran Mills   /* If there are no nonzero entries, zero yy and return immediately. */
45638987b35SRichard Tran Mills   if (!a->nz) {
4579566063dSJacob Faibussowitsch     PetscCall(VecGetArray(yy, &y));
4589566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(y, A->cmap->n));
4599566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(yy, &y));
4603ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
46138987b35SRichard Tran Mills   }
462f36dfe3fSRichard Tran Mills 
4639566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
4649566063dSJacob Faibussowitsch   PetscCall(VecGetArray(yy, &y));
465df555b71SRichard Tran Mills 
4663fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
4673fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
4683fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
4699566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
4709566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
4713fa15762SRichard Tran Mills 
472df555b71SRichard Tran Mills   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
474df555b71SRichard Tran Mills 
4759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
4769566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
4779566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(yy, &y));
4783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
479df555b71SRichard Tran Mills }
480d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
481df555b71SRichard Tran Mills 
482e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)48366976f2fSJacob Faibussowitsch static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
484d71ae5a4SJacob Faibussowitsch {
4854a2a386eSRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
4864a2a386eSRichard Tran Mills   const PetscScalar *x;
4874a2a386eSRichard Tran Mills   PetscScalar       *y, *z;
4884a2a386eSRichard Tran Mills   const MatScalar   *aa;
4894a2a386eSRichard Tran Mills   PetscInt           m = A->rmap->n;
490db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
4914a2a386eSRichard Tran Mills   const PetscInt    *aj, *ai;
4924a2a386eSRichard Tran Mills   PetscInt           i;
4934a2a386eSRichard Tran Mills 
494ff03dc53SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
495ff03dc53SRichard Tran Mills   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
496a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
497db63039fSRichard Tran Mills   PetscScalar beta;
498a84739b8SRichard Tran Mills   char        matdescra[6];
499ff03dc53SRichard Tran Mills 
500ff03dc53SRichard Tran Mills   PetscFunctionBegin;
501a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
502a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
503a84739b8SRichard Tran Mills 
5049566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5059566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
506ff03dc53SRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
507ff03dc53SRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
508ff03dc53SRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
509ff03dc53SRichard Tran Mills 
510ff03dc53SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
511a84739b8SRichard Tran Mills   if (zz == yy) {
512a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
513db63039fSRichard Tran Mills     beta = 1.0;
514db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
515a84739b8SRichard Tran Mills   } else {
516db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
517db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
518db63039fSRichard Tran Mills     beta = 0.0;
519db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520ad540459SPierre Jolivet     for (i = 0; i < m; i++) z[i] += y[i];
521a84739b8SRichard Tran Mills   }
522ff03dc53SRichard Tran Mills 
5239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5259566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
527ff03dc53SRichard Tran Mills }
5281950a7e7SRichard Tran Mills #endif
529ff03dc53SRichard Tran Mills 
530ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)531d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
532d71ae5a4SJacob Faibussowitsch {
533df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
534df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
535df555b71SRichard Tran Mills   const PetscScalar *x;
536df555b71SRichard Tran Mills   PetscScalar       *y, *z;
537df555b71SRichard Tran Mills   PetscInt           m = A->rmap->n;
538df555b71SRichard Tran Mills   PetscInt           i;
539df555b71SRichard Tran Mills 
540df555b71SRichard Tran Mills   /* Variables not in MatMultAdd_SeqAIJ. */
541551aa5c8SRichard Tran Mills   PetscObjectState state;
542df555b71SRichard Tran Mills 
543df555b71SRichard Tran Mills   PetscFunctionBegin;
54438987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
54538987b35SRichard Tran Mills   if (!a->nz) {
5469566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
5479566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, m));
5489566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5493ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
55038987b35SRichard Tran Mills   }
551df555b71SRichard Tran Mills 
5529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
5539566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
554df555b71SRichard Tran Mills 
5553fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
5563fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
5573fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
5589566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
5599566063dSJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
5603fa15762SRichard Tran Mills 
561df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
562df555b71SRichard Tran Mills   if (zz == yy) {
563df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
565792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
566df555b71SRichard Tran Mills   } else {
567df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
5705f80ce2aSJacob Faibussowitsch     for (i = 0; i < m; i++) z[i] += y[i];
571df555b71SRichard Tran Mills   }
572df555b71SRichard Tran Mills 
5739566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
5749566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
5759566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
5763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
577df555b71SRichard Tran Mills }
578d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
579df555b71SRichard Tran Mills 
580e626a176SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)58166976f2fSJacob Faibussowitsch static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
582d71ae5a4SJacob Faibussowitsch {
583ff03dc53SRichard Tran Mills   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
584ff03dc53SRichard Tran Mills   const PetscScalar *x;
585ff03dc53SRichard Tran Mills   PetscScalar       *y, *z;
586ff03dc53SRichard Tran Mills   const MatScalar   *aa;
587ff03dc53SRichard Tran Mills   PetscInt           m = A->rmap->n;
588db63039fSRichard Tran Mills   PetscInt           n = A->cmap->n;
589ff03dc53SRichard Tran Mills   const PetscInt    *aj, *ai;
590ff03dc53SRichard Tran Mills   PetscInt           i;
591ff03dc53SRichard Tran Mills 
592ff03dc53SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593ff03dc53SRichard Tran Mills   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
594a84739b8SRichard Tran Mills   PetscScalar alpha  = 1.0;
595db63039fSRichard Tran Mills   PetscScalar beta;
596a84739b8SRichard Tran Mills   char        matdescra[6];
5974a2a386eSRichard Tran Mills 
5984a2a386eSRichard Tran Mills   PetscFunctionBegin;
599a84739b8SRichard Tran Mills   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
600a84739b8SRichard Tran Mills   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
601a84739b8SRichard Tran Mills 
6029566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6039566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6044a2a386eSRichard Tran Mills   aj = a->j; /* aj[k] gives column index for element aa[k]. */
6054a2a386eSRichard Tran Mills   aa = a->a; /* Nonzero elements stored row-by-row. */
6064a2a386eSRichard Tran Mills   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
6074a2a386eSRichard Tran Mills 
6084a2a386eSRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
609a84739b8SRichard Tran Mills   if (zz == yy) {
610a84739b8SRichard Tran Mills     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611db63039fSRichard Tran Mills     beta = 1.0;
612969800c5SRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
613a84739b8SRichard Tran Mills   } else {
614db63039fSRichard Tran Mills     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615db63039fSRichard Tran Mills      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616db63039fSRichard Tran Mills     beta = 0.0;
617db63039fSRichard Tran Mills     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
618ad540459SPierre Jolivet     for (i = 0; i < n; i++) z[i] += y[i];
619a84739b8SRichard Tran Mills   }
6204a2a386eSRichard Tran Mills 
6219566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6229566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6239566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6254a2a386eSRichard Tran Mills }
6261950a7e7SRichard Tran Mills #endif
6274a2a386eSRichard Tran Mills 
628ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
630d71ae5a4SJacob Faibussowitsch {
631df555b71SRichard Tran Mills   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
632df555b71SRichard Tran Mills   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
633df555b71SRichard Tran Mills   const PetscScalar *x;
634df555b71SRichard Tran Mills   PetscScalar       *y, *z;
635969800c5SRichard Tran Mills   PetscInt           n = A->cmap->n;
636df555b71SRichard Tran Mills   PetscInt           i;
637551aa5c8SRichard Tran Mills   PetscObjectState   state;
638df555b71SRichard Tran Mills 
639df555b71SRichard Tran Mills   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
640df555b71SRichard Tran Mills 
641df555b71SRichard Tran Mills   PetscFunctionBegin;
64238987b35SRichard Tran Mills   /* If there are no nonzero entries, set zz = yy and return immediately. */
64338987b35SRichard Tran Mills   if (!a->nz) {
6449566063dSJacob Faibussowitsch     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
6459566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(z, y, n));
6469566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6473ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
64838987b35SRichard Tran Mills   }
649f36dfe3fSRichard Tran Mills 
6509566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
6519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
652df555b71SRichard Tran Mills 
6533fa15762SRichard Tran Mills   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
6543fa15762SRichard Tran Mills    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
6553fa15762SRichard Tran Mills    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
6569566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6573ba16761SJacob Faibussowitsch   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6583fa15762SRichard Tran Mills 
659df555b71SRichard Tran Mills   /* Call MKL sparse BLAS routine to do the MatMult. */
660df555b71SRichard Tran Mills   if (zz == yy) {
661df555b71SRichard Tran Mills     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
662df555b71SRichard Tran Mills      * with alpha and beta both set to 1.0. */
663792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
664df555b71SRichard Tran Mills   } else {
665df555b71SRichard Tran Mills     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
666df555b71SRichard Tran Mills      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
667792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
6685f80ce2aSJacob Faibussowitsch     for (i = 0; i < n; i++) z[i] += y[i];
669df555b71SRichard Tran Mills   }
670df555b71SRichard Tran Mills 
6719566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
6729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
6739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
6743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
675df555b71SRichard Tran Mills }
676d995685eSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
677df555b71SRichard Tran Mills 
6788a369200SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)679d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
680d71ae5a4SJacob Faibussowitsch {
6811495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
682431879ecSRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
683190ae7a4SRichard Tran Mills   PetscInt            nrows, ncols;
684431879ecSRichard Tran Mills   struct matrix_descr descr_type_gen;
685431879ecSRichard Tran Mills   PetscObjectState    state;
686431879ecSRichard Tran Mills 
687431879ecSRichard Tran Mills   PetscFunctionBegin;
688190ae7a4SRichard Tran Mills   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
689190ae7a4SRichard Tran Mills    * not handle sparse matrices with zero rows or columns. */
690190ae7a4SRichard Tran Mills   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
691190ae7a4SRichard Tran Mills   else nrows = A->cmap->N;
692190ae7a4SRichard Tran Mills   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
693190ae7a4SRichard Tran Mills   else ncols = B->rmap->N;
694190ae7a4SRichard Tran Mills 
6959566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
6969566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
6979566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
6989566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
699431879ecSRichard Tran Mills   csrA                = a->csrA;
700431879ecSRichard Tran Mills   csrB                = b->csrA;
701431879ecSRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
702431879ecSRichard Tran Mills 
703190ae7a4SRichard Tran Mills   if (csrA && csrB) {
704792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
705190ae7a4SRichard Tran Mills   } else {
706f3fa974cSJacob Faibussowitsch     csrC = NULL;
707190ae7a4SRichard Tran Mills   }
708190ae7a4SRichard Tran Mills 
7099566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
7103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
711431879ecSRichard Tran Mills }
712431879ecSRichard Tran Mills 
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)713d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
714d71ae5a4SJacob Faibussowitsch {
7151495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
716e8be1fc7SRichard Tran Mills   sparse_matrix_t     csrA, csrB, csrC;
717e8be1fc7SRichard Tran Mills   struct matrix_descr descr_type_gen;
718e8be1fc7SRichard Tran Mills   PetscObjectState    state;
719e8be1fc7SRichard Tran Mills 
720e8be1fc7SRichard Tran Mills   PetscFunctionBegin;
7219566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
7229566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
7239566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
7249566063dSJacob Faibussowitsch   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
725e8be1fc7SRichard Tran Mills   csrA                = a->csrA;
726e8be1fc7SRichard Tran Mills   csrB                = b->csrA;
727e8be1fc7SRichard Tran Mills   csrC                = c->csrA;
728e8be1fc7SRichard Tran Mills   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
729e8be1fc7SRichard Tran Mills 
730190ae7a4SRichard Tran Mills   if (csrA && csrB) {
731792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
732190ae7a4SRichard Tran Mills   } else {
733f3fa974cSJacob Faibussowitsch     csrC = NULL;
734190ae7a4SRichard Tran Mills   }
735e8be1fc7SRichard Tran Mills 
736e8be1fc7SRichard Tran Mills   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
7379566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
7383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
739e8be1fc7SRichard Tran Mills }
740e8be1fc7SRichard Tran Mills 
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
742d71ae5a4SJacob Faibussowitsch {
743190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7449566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
746190ae7a4SRichard Tran Mills }
747190ae7a4SRichard Tran Mills 
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)748d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
749d71ae5a4SJacob Faibussowitsch {
750190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7519566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
753190ae7a4SRichard Tran Mills }
754190ae7a4SRichard Tran Mills 
MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)755d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
756d71ae5a4SJacob Faibussowitsch {
757190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7589566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
760190ae7a4SRichard Tran Mills }
761190ae7a4SRichard Tran Mills 
MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)762d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
763d71ae5a4SJacob Faibussowitsch {
764190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7659566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
7663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
767190ae7a4SRichard Tran Mills }
768190ae7a4SRichard Tran Mills 
MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)769d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
770d71ae5a4SJacob Faibussowitsch {
771190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7729566063dSJacob Faibussowitsch   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
774190ae7a4SRichard Tran Mills }
775190ae7a4SRichard Tran Mills 
MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)776d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
777d71ae5a4SJacob Faibussowitsch {
778190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7799566063dSJacob Faibussowitsch   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
7803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
781190ae7a4SRichard Tran Mills }
782190ae7a4SRichard Tran Mills 
MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)783d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
784d71ae5a4SJacob Faibussowitsch {
785190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
786190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
787190ae7a4SRichard Tran Mills 
788190ae7a4SRichard Tran Mills   PetscFunctionBegin;
7899566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
7903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
791190ae7a4SRichard Tran Mills }
792190ae7a4SRichard Tran Mills 
MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)793d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794d71ae5a4SJacob Faibussowitsch {
795190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
796190ae7a4SRichard Tran Mills   Mat          A = product->A, B = product->B;
797190ae7a4SRichard Tran Mills   PetscReal    fill = product->fill;
798190ae7a4SRichard Tran Mills 
799190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8009566063dSJacob Faibussowitsch   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
801190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
8023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
803190ae7a4SRichard Tran Mills }
804190ae7a4SRichard Tran Mills 
MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)805d71ae5a4SJacob Faibussowitsch PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
806d71ae5a4SJacob Faibussowitsch {
807190ae7a4SRichard Tran Mills   Mat                 Ct;
808190ae7a4SRichard Tran Mills   Vec                 zeros;
8091495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
8104f53af40SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
8114f53af40SRichard Tran Mills   PetscBool           set, flag;
812b9e1dd46SRichard Tran Mills   struct matrix_descr descr_type_sym;
8134f53af40SRichard Tran Mills   PetscObjectState    state;
8144f53af40SRichard Tran Mills 
8154f53af40SRichard Tran Mills   PetscFunctionBegin;
8169566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
817b94d7dedSBarry Smith   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
8184f53af40SRichard Tran Mills 
8199566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8209566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8219566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8229566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
8234f53af40SRichard Tran Mills   csrA                = a->csrA;
8244f53af40SRichard Tran Mills   csrP                = p->csrA;
8254f53af40SRichard Tran Mills   csrC                = c->csrA;
826b9e1dd46SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
827190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
828b9e1dd46SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
8294f53af40SRichard Tran Mills 
8302fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
831792fecdfSBarry Smith   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
8324f53af40SRichard Tran Mills 
833190ae7a4SRichard Tran Mills   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
834190ae7a4SRichard Tran Mills    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
835190ae7a4SRichard Tran Mills    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
8367301b172SPierre Jolivet    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
837190ae7a4SRichard Tran Mills    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
838190ae7a4SRichard Tran Mills    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
839190ae7a4SRichard Tran Mills    * the full matrix. */
8409566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
8419566063dSJacob Faibussowitsch   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
8429566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(C, &zeros, NULL));
8439566063dSJacob Faibussowitsch   PetscCall(VecSetFromOptions(zeros));
8449566063dSJacob Faibussowitsch   PetscCall(VecZeroEntries(zeros));
8459566063dSJacob Faibussowitsch   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
8469566063dSJacob Faibussowitsch   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
847190ae7a4SRichard Tran Mills   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
848f3fa974cSJacob Faibussowitsch   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
8499566063dSJacob Faibussowitsch   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
8509566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
8519566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&zeros));
8529566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Ct));
8533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8544f53af40SRichard Tran Mills }
855190ae7a4SRichard Tran Mills 
MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)856d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
857d71ae5a4SJacob Faibussowitsch {
858190ae7a4SRichard Tran Mills   Mat_Product        *product = C->product;
859190ae7a4SRichard Tran Mills   Mat                 A = product->A, P = product->B;
8601495fedeSRichard Tran Mills   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
861190ae7a4SRichard Tran Mills   sparse_matrix_t     csrA, csrP, csrC;
862190ae7a4SRichard Tran Mills   struct matrix_descr descr_type_sym;
863190ae7a4SRichard Tran Mills   PetscObjectState    state;
864190ae7a4SRichard Tran Mills 
865190ae7a4SRichard Tran Mills   PetscFunctionBegin;
8669566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
8679566063dSJacob Faibussowitsch   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
8689566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
8699566063dSJacob Faibussowitsch   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
870190ae7a4SRichard Tran Mills   csrA                = a->csrA;
871190ae7a4SRichard Tran Mills   csrP                = p->csrA;
872190ae7a4SRichard Tran Mills   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873190ae7a4SRichard Tran Mills   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
874190ae7a4SRichard Tran Mills   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
875190ae7a4SRichard Tran Mills 
8762fe279fdSBarry Smith   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877190ae7a4SRichard Tran Mills   if (csrP && csrA) {
878792fecdfSBarry Smith     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
879190ae7a4SRichard Tran Mills   } else {
880f3fa974cSJacob Faibussowitsch     csrC = NULL;
881190ae7a4SRichard Tran Mills   }
882190ae7a4SRichard Tran Mills 
883190ae7a4SRichard Tran Mills   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
884190ae7a4SRichard Tran Mills    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
88549ba5396SRichard Tran Mills    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
88649ba5396SRichard Tran Mills    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
88749ba5396SRichard Tran Mills    * is guaranteed. */
8889566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
889190ae7a4SRichard Tran Mills 
890190ae7a4SRichard Tran Mills   C->ops->productnumeric = MatProductNumeric_PtAP;
8913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
892190ae7a4SRichard Tran Mills }
893190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)894d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
895d71ae5a4SJacob Faibussowitsch {
896190ae7a4SRichard Tran Mills   PetscFunctionBegin;
897190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AB;
898190ae7a4SRichard Tran Mills   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
8993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
900190ae7a4SRichard Tran Mills }
901190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)902d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
903d71ae5a4SJacob Faibussowitsch {
904190ae7a4SRichard Tran Mills   PetscFunctionBegin;
905190ae7a4SRichard Tran Mills   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
9063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
907190ae7a4SRichard Tran Mills }
908190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)909d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
910d71ae5a4SJacob Faibussowitsch {
911190ae7a4SRichard Tran Mills   PetscFunctionBegin;
912190ae7a4SRichard Tran Mills   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
913190ae7a4SRichard Tran Mills   C->ops->productsymbolic          = MatProductSymbolic_ABt;
9143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
915190ae7a4SRichard Tran Mills }
916190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)917d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
918d71ae5a4SJacob Faibussowitsch {
919190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
920190ae7a4SRichard Tran Mills   Mat          A       = product->A;
921190ae7a4SRichard Tran Mills   PetscBool    set, flag;
922190ae7a4SRichard Tran Mills 
923190ae7a4SRichard Tran Mills   PetscFunctionBegin;
924190ae7a4SRichard Tran Mills   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
9259566063dSJacob Faibussowitsch   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
926f497284bSJasper Hatton   PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL");
927f497284bSJasper Hatton   /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */
928f497284bSJasper Hatton   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
9293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
930190ae7a4SRichard Tran Mills }
931190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)932d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
933d71ae5a4SJacob Faibussowitsch {
934190ae7a4SRichard Tran Mills   PetscFunctionBegin;
9352ab6f6a8SStefano Zampini   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
9363ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
937190ae7a4SRichard Tran Mills }
938190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)939d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
940d71ae5a4SJacob Faibussowitsch {
941190ae7a4SRichard Tran Mills   PetscFunctionBegin;
942f497284bSJasper Hatton   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL");
943190ae7a4SRichard Tran Mills }
944190ae7a4SRichard Tran Mills 
MatProductSetFromOptions_SeqAIJMKL(Mat C)945d71ae5a4SJacob Faibussowitsch PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
946d71ae5a4SJacob Faibussowitsch {
947190ae7a4SRichard Tran Mills   Mat_Product *product = C->product;
948190ae7a4SRichard Tran Mills 
949190ae7a4SRichard Tran Mills   PetscFunctionBegin;
950190ae7a4SRichard Tran Mills   switch (product->type) {
951d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AB:
952d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
953d71ae5a4SJacob Faibussowitsch     break;
954d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_AtB:
955d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
956d71ae5a4SJacob Faibussowitsch     break;
957d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABt:
958d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
959d71ae5a4SJacob Faibussowitsch     break;
960d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_PtAP:
961d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
962d71ae5a4SJacob Faibussowitsch     break;
963d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_RARt:
964d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
965d71ae5a4SJacob Faibussowitsch     break;
966d71ae5a4SJacob Faibussowitsch   case MATPRODUCT_ABC:
967d71ae5a4SJacob Faibussowitsch     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
968d71ae5a4SJacob Faibussowitsch     break;
969d71ae5a4SJacob Faibussowitsch   default:
970d71ae5a4SJacob Faibussowitsch     break;
971190ae7a4SRichard Tran Mills   }
9723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
973190ae7a4SRichard Tran Mills }
974431879ecSRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
9754f53af40SRichard Tran Mills 
9764a2a386eSRichard Tran Mills /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
977510b72f4SRichard Tran Mills  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
9784a2a386eSRichard Tran Mills  * routine, but can also be used to convert an assembled SeqAIJ matrix
9794a2a386eSRichard Tran Mills  * into a SeqAIJMKL one. */
MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)980d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
981d71ae5a4SJacob Faibussowitsch {
9824a2a386eSRichard Tran Mills   Mat            B = *newmat;
9834a2a386eSRichard Tran Mills   Mat_SeqAIJMKL *aijmkl;
984c9d46305SRichard Tran Mills   PetscBool      set;
985e9c94282SRichard Tran Mills   PetscBool      sametype;
9864a2a386eSRichard Tran Mills 
9874a2a386eSRichard Tran Mills   PetscFunctionBegin;
9889566063dSJacob Faibussowitsch   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
9894a2a386eSRichard Tran Mills 
9909566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
9913ba16761SJacob Faibussowitsch   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
992e9c94282SRichard Tran Mills 
9934dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&aijmkl));
9944a2a386eSRichard Tran Mills   B->spptr = (void *)aijmkl;
9954a2a386eSRichard Tran Mills 
996df555b71SRichard Tran Mills   /* Set function pointers for methods that we inherit from AIJ but override.
997969800c5SRichard Tran Mills    * We also parse some command line options below, since those determine some of the methods we point to. */
9984a2a386eSRichard Tran Mills   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
9994a2a386eSRichard Tran Mills   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
10004a2a386eSRichard Tran Mills   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1001c9d46305SRichard Tran Mills 
10024abfa3b3SRichard Tran Mills   aijmkl->sparse_optimized = PETSC_FALSE;
1003*fc2fb351SPierre Jolivet   aijmkl->no_SpMV2         = PetscDefined(HAVE_MKL_SPARSE_OPTIMIZE) ? PETSC_FALSE : PETSC_TRUE; /* Default to using the SpMV2 routines if our MKL supports them. */
10045b49642aSRichard Tran Mills   aijmkl->eager_inspection = PETSC_FALSE;
10054abfa3b3SRichard Tran Mills 
10064abfa3b3SRichard Tran Mills   /* Parse command line options. */
1007d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
10089566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
10099566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1010d0609cedSBarry Smith   PetscOptionsEnd();
1011ffcab697SRichard Tran Mills #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1012d995685eSRichard Tran Mills   if (!aijmkl->no_SpMV2) {
10139566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1014d995685eSRichard Tran Mills     aijmkl->no_SpMV2 = PETSC_TRUE;
1015d995685eSRichard Tran Mills   }
1016d995685eSRichard Tran Mills #endif
1017c9d46305SRichard Tran Mills 
1018ffcab697SRichard Tran Mills #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1019df555b71SRichard Tran Mills   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1020969800c5SRichard Tran Mills   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1021df555b71SRichard Tran Mills   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1022969800c5SRichard Tran Mills   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
10238a369200SRichard Tran Mills   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1024190ae7a4SRichard Tran Mills   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1025190ae7a4SRichard Tran Mills   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1026190ae7a4SRichard Tran Mills   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1027190ae7a4SRichard Tran Mills   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1028190ae7a4SRichard Tran Mills   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1029ffcab697SRichard Tran Mills     #if !defined(PETSC_USE_COMPLEX)
103049ba5396SRichard Tran Mills   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1031190ae7a4SRichard Tran Mills     #else
1032190ae7a4SRichard Tran Mills   B->ops->ptapnumeric = NULL;
10334f53af40SRichard Tran Mills     #endif
1034e8be1fc7SRichard Tran Mills   #endif
10351950a7e7SRichard Tran Mills #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
10361950a7e7SRichard Tran Mills 
1037213898a2SRichard Tran Mills #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1038213898a2SRichard Tran Mills   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1039213898a2SRichard Tran Mills    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1040213898a2SRichard Tran Mills    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1041213898a2SRichard Tran Mills    * versions in which the older interface has not been deprecated, we use the old interface. */
10421950a7e7SRichard Tran Mills   if (aijmkl->no_SpMV2) {
10434a2a386eSRichard Tran Mills     B->ops->mult             = MatMult_SeqAIJMKL;
1044969800c5SRichard Tran Mills     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
10454a2a386eSRichard Tran Mills     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1046969800c5SRichard Tran Mills     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1047c9d46305SRichard Tran Mills   }
10481950a7e7SRichard Tran Mills #endif
10494a2a386eSRichard Tran Mills 
10509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
10514a2a386eSRichard Tran Mills 
10529566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
10534a2a386eSRichard Tran Mills   *newmat = B;
10543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
10554a2a386eSRichard Tran Mills }
10564a2a386eSRichard Tran Mills 
10574a2a386eSRichard Tran Mills /*@C
105811a5261eSBarry Smith   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
105990147e49SRichard Tran Mills 
1060d083f849SBarry Smith   Collective
10614a2a386eSRichard Tran Mills 
10624a2a386eSRichard Tran Mills   Input Parameters:
106311a5261eSBarry Smith + comm - MPI communicator, set to `PETSC_COMM_SELF`
10644a2a386eSRichard Tran Mills . m    - number of rows
10654a2a386eSRichard Tran Mills . n    - number of columns
10664a2a386eSRichard Tran Mills . nz   - number of nonzeros per row (same for all rows)
10674a2a386eSRichard Tran Mills - nnz  - array containing the number of nonzeros in the various rows
10682ef1f0ffSBarry Smith          (possibly different for each row) or `NULL`
10694a2a386eSRichard Tran Mills 
10704a2a386eSRichard Tran Mills   Output Parameter:
10714a2a386eSRichard Tran Mills . A - the matrix
10724a2a386eSRichard Tran Mills 
107390147e49SRichard Tran Mills   Options Database Keys:
107466b7eeb6SRichard Tran Mills + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
10752ef1f0ffSBarry Smith - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
10762ef1f0ffSBarry Smith                                   performing this step the first time the matrix is applied
10774a2a386eSRichard Tran Mills 
10784a2a386eSRichard Tran Mills   Level: intermediate
10794a2a386eSRichard Tran Mills 
10802fe279fdSBarry Smith   Notes:
10812ef1f0ffSBarry Smith   If `nnz` is given then `nz` is ignored
10822ef1f0ffSBarry Smith 
10832fe279fdSBarry Smith   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
10842fe279fdSBarry Smith   routines from Intel MKL whenever possible.
10852fe279fdSBarry Smith 
10862fe279fdSBarry Smith   If the installed version of MKL supports the "SpMV2" sparse
10872fe279fdSBarry Smith   inspector-executor routines, then those are used by default.
10882fe279fdSBarry Smith 
10892fe279fdSBarry Smith   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
10902fe279fdSBarry Smith   (for symmetric A) operations are currently supported.
10912fe279fdSBarry Smith   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
10922fe279fdSBarry Smith 
10931cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
10944a2a386eSRichard Tran Mills @*/
MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)1095d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1096d71ae5a4SJacob Faibussowitsch {
10974a2a386eSRichard Tran Mills   PetscFunctionBegin;
10989566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
10999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
11009566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQAIJMKL));
11019566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
11023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11034a2a386eSRichard Tran Mills }
11044a2a386eSRichard Tran Mills 
MatCreate_SeqAIJMKL(Mat A)1105d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1106d71ae5a4SJacob Faibussowitsch {
11074a2a386eSRichard Tran Mills   PetscFunctionBegin;
11089566063dSJacob Faibussowitsch   PetscCall(MatSetType(A, MATSEQAIJ));
11099566063dSJacob Faibussowitsch   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
11103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
11114a2a386eSRichard Tran Mills }
1112