xref: /petsc/src/mat/impls/aij/seq/aijmkl/aijmkl.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
1 /*
2   Defines basic operations for the MATSEQAIJMKL matrix class.
3   This class is derived from the MATSEQAIJ class and retains the
4   compressed row storage (aka Yale sparse matrix format) but uses
5   sparse BLAS operations from the Intel Math Kernel Library (MKL)
6   wherever possible.
7 */
8 
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12   #define MKL_ILP64
13 #endif
14 #include <mkl_spblas.h>
15 
16 typedef struct {
17   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20   PetscObjectState state;
21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23   struct matrix_descr descr;
24 #endif
25 } Mat_SeqAIJMKL;
26 
MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28 {
29   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30   /* so we will ignore 'MatType type'. */
31   Mat B = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34 #endif
35 
36   PetscFunctionBegin;
37   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
38 
39   /* Reset the original function pointers. */
40   B->ops->duplicate               = MatDuplicate_SeqAIJ;
41   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
42   B->ops->destroy                 = MatDestroy_SeqAIJ;
43   B->ops->mult                    = MatMult_SeqAIJ;
44   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
45   B->ops->multadd                 = MatMultAdd_SeqAIJ;
46   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
47   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
48   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53 
54   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
55 
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58    * simply involves destroying the MKL sparse matrix handle and then freeing
59    * the spptr pointer. */
60   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61 
62   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64   PetscCall(PetscFree(B->spptr));
65 
66   /* Change the type of B to MATSEQAIJ. */
67   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68 
69   *newmat = B;
70   PetscFunctionReturn(PETSC_SUCCESS);
71 }
72 
MatDestroy_SeqAIJMKL(Mat A)73 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
76 
77   PetscFunctionBegin;
78   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
79   if (aijmkl) {
80     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
81 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
82     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
83 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
84     PetscCall(PetscFree(A->spptr));
85   }
86 
87   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
88    * to destroy everything that remains. */
89   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
90   /* I don't call MatSetType().  I believe this is because that
91    * is only to be called when *building* a matrix.  I could be wrong, but
92    * that is how things work for the SuperLU matrix class. */
93   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
94   PetscCall(MatDestroy_SeqAIJ(A));
95   PetscFunctionReturn(PETSC_SUCCESS);
96 }
97 
98 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
99  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
100  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
101  * handle, creates a new one, and then calls mkl_sparse_optimize().
102  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
103  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
104  * an unoptimized matrix handle here. */
MatSeqAIJMKL_create_mkl_handle(Mat A)105 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106 {
107 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
108   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
109    * does nothing. We make it callable anyway in this case because it cuts
110    * down on littering the code with #ifdefs. */
111   PetscFunctionBegin;
112   PetscFunctionReturn(PETSC_SUCCESS);
113 #else
114   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
115   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
116   PetscInt       m, n;
117   MatScalar     *aa;
118   PetscInt      *aj, *ai;
119 
120   PetscFunctionBegin;
121   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125    * mkl_sparse_optimize() later. */
126   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
127   #endif
128 
129   if (aijmkl->sparse_optimized) {
130     /* Matrix has been previously assembled and optimized. Must destroy old
131      * matrix handle before running the optimization step again. */
132     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133   }
134   aijmkl->sparse_optimized = PETSC_FALSE;
135 
136   /* Now perform the SpMV2 setup and matrix optimization. */
137   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140   m                  = A->rmap->n;
141   n                  = A->cmap->n;
142   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
143   aa                 = a->a; /* Nonzero elements stored row-by-row. */
144   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145   if (a->nz && aa && !A->structure_only) {
146     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152     aijmkl->sparse_optimized = PETSC_TRUE;
153     PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
154   } else {
155     aijmkl->csrA = NULL;
156   }
157   PetscFunctionReturn(PETSC_SUCCESS);
158 #endif
159 }
160 
161 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
162 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)163 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
164 {
165   sparse_index_base_t indexing;
166   PetscInt            m, n;
167   PetscInt           *aj, *ai, *unused;
168   MatScalar          *aa;
169   Mat_SeqAIJMKL      *aijmkl;
170 
171   PetscFunctionBegin;
172   if (csrA) {
173     /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
174     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
175     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
176   } else {
177     aj = ai = NULL;
178     aa      = NULL;
179   }
180 
181   PetscCall(MatSetType(A, MATSEQAIJ));
182   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
183   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185    * they will be destroyed when the MKL handle is destroyed.
186    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187   if (csrA) {
188     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
189   } else {
190     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191     PetscCall(MatSetUp(A));
192     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
193     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
194   }
195 
196   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197    * Now turn it into a MATSEQAIJMKL. */
198   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
199 
200   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
201   aijmkl->csrA = csrA;
202 
203   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205    * and just need to be able to run the MKL optimization step. */
206   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209   if (csrA) {
210     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212   }
213   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
217 
218 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220  * MatMatMultNumeric(). */
221 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatSeqAIJMKL_update_from_mkl_handle(Mat A)222 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223 {
224   PetscInt            i;
225   PetscInt            nrows, ncols;
226   PetscInt            nz;
227   PetscInt           *ai, *aj, *unused;
228   PetscScalar        *aa;
229   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230   sparse_index_base_t indexing;
231 
232   PetscFunctionBegin;
233   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
234   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
235 
236   /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
237   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
238 
239   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
240    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
241   for (i = 0; i < nrows; i++) {
242     nz = ai[i + 1] - ai[i];
243     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
244   }
245 
246   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
247   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
248 
249   PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
250   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
251    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
252    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
253   aijmkl->sparse_optimized = PETSC_FALSE;
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
257 
258 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)259 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
260 {
261   PetscInt            i, j, k;
262   PetscInt            nrows, ncols;
263   PetscInt            nz;
264   PetscInt           *ai, *aj, *unused;
265   PetscScalar        *aa;
266   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
267   sparse_index_base_t indexing;
268 
269   PetscFunctionBegin;
270   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
271 
272   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
273   if (!aijmkl->csrA) {
274     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
275     PetscFunctionReturn(PETSC_SUCCESS);
276   }
277 
278   /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
279   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
280 
281   k = 0;
282   for (i = 0; i < nrows; i++) {
283     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
284     nz = ai[i + 1] - ai[i];
285     for (j = 0; j < nz; j++) {
286       if (aa) {
287         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
288       } else {
289         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
290       }
291       k++;
292     }
293     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
294   }
295   PetscFunctionReturn(PETSC_SUCCESS);
296 }
297 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
298 
MatDuplicate_SeqAIJMKL(Mat A,MatDuplicateOption op,Mat * M)299 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
300 {
301   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
302   Mat_SeqAIJMKL *aijmkl_dest;
303 
304   PetscFunctionBegin;
305   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
306   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
307   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
308   aijmkl_dest->sparse_optimized = PETSC_FALSE;
309   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
MatAssemblyEnd_SeqAIJMKL(Mat A,MatAssemblyType mode)313 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314 {
315   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
316   Mat_SeqAIJMKL *aijmkl;
317 
318   PetscFunctionBegin;
319   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
320 
321   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
322    * extra information and some different methods, call the AssemblyEnd
323    * routine for a MATSEQAIJ.
324    * I'm not sure if this is the best way to do this, but it avoids
325    * a lot of code duplication. */
326   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
327   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
328 
329   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
330    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
331   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
332   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)337 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
338 {
339   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
340   const PetscScalar *x;
341   PetscScalar       *y;
342   const MatScalar   *aa;
343   PetscInt           m     = A->rmap->n;
344   PetscInt           n     = A->cmap->n;
345   PetscScalar        alpha = 1.0;
346   PetscScalar        beta  = 0.0;
347   const PetscInt    *aj, *ai;
348   char               matdescra[6];
349 
350   /* Variables not in MatMult_SeqAIJ. */
351   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
352 
353   PetscFunctionBegin;
354   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356   PetscCall(VecGetArrayRead(xx, &x));
357   PetscCall(VecGetArray(yy, &y));
358   aj = a->j; /* aj[k] gives column index for element aa[k]. */
359   aa = a->a; /* Nonzero elements stored row-by-row. */
360   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
361 
362   /* Call MKL sparse BLAS routine to do the MatMult. */
363   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
364 
365   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
366   PetscCall(VecRestoreArrayRead(xx, &x));
367   PetscCall(VecRestoreArray(yy, &y));
368   PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 #endif
371 
372 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)373 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
374 {
375   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
376   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
377   const PetscScalar *x;
378   PetscScalar       *y;
379   PetscObjectState   state;
380 
381   PetscFunctionBegin;
382   /* If there are no nonzero entries, zero yy and return immediately. */
383   if (!a->nz) {
384     PetscCall(VecGetArray(yy, &y));
385     PetscCall(PetscArrayzero(y, A->rmap->n));
386     PetscCall(VecRestoreArray(yy, &y));
387     PetscFunctionReturn(PETSC_SUCCESS);
388   }
389 
390   PetscCall(VecGetArrayRead(xx, &x));
391   PetscCall(VecGetArray(yy, &y));
392 
393   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
394    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
395    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
397   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
398 
399   /* Call MKL SpMV2 executor routine to do the MatMult. */
400   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
401 
402   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
403   PetscCall(VecRestoreArrayRead(xx, &x));
404   PetscCall(VecRestoreArray(yy, &y));
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
408 
409 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)410 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
411 {
412   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
413   const PetscScalar *x;
414   PetscScalar       *y;
415   const MatScalar   *aa;
416   PetscInt           m     = A->rmap->n;
417   PetscInt           n     = A->cmap->n;
418   PetscScalar        alpha = 1.0;
419   PetscScalar        beta  = 0.0;
420   const PetscInt    *aj, *ai;
421   char               matdescra[6];
422 
423   /* Variables not in MatMultTranspose_SeqAIJ. */
424   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
425 
426   PetscFunctionBegin;
427   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
428   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
429   PetscCall(VecGetArrayRead(xx, &x));
430   PetscCall(VecGetArray(yy, &y));
431   aj = a->j; /* aj[k] gives column index for element aa[k]. */
432   aa = a->a; /* Nonzero elements stored row-by-row. */
433   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
434 
435   /* Call MKL sparse BLAS routine to do the MatMult. */
436   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
437 
438   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
439   PetscCall(VecRestoreArrayRead(xx, &x));
440   PetscCall(VecRestoreArray(yy, &y));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 #endif
444 
445 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)446 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
447 {
448   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
449   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
450   const PetscScalar *x;
451   PetscScalar       *y;
452   PetscObjectState   state;
453 
454   PetscFunctionBegin;
455   /* If there are no nonzero entries, zero yy and return immediately. */
456   if (!a->nz) {
457     PetscCall(VecGetArray(yy, &y));
458     PetscCall(PetscArrayzero(y, A->cmap->n));
459     PetscCall(VecRestoreArray(yy, &y));
460     PetscFunctionReturn(PETSC_SUCCESS);
461   }
462 
463   PetscCall(VecGetArrayRead(xx, &x));
464   PetscCall(VecGetArray(yy, &y));
465 
466   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
467    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
468    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
469   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
470   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
471 
472   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
474 
475   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
476   PetscCall(VecRestoreArrayRead(xx, &x));
477   PetscCall(VecRestoreArray(yy, &y));
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
481 
482 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)483 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
484 {
485   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
486   const PetscScalar *x;
487   PetscScalar       *y, *z;
488   const MatScalar   *aa;
489   PetscInt           m = A->rmap->n;
490   PetscInt           n = A->cmap->n;
491   const PetscInt    *aj, *ai;
492   PetscInt           i;
493 
494   /* Variables not in MatMultAdd_SeqAIJ. */
495   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
496   PetscScalar alpha  = 1.0;
497   PetscScalar beta;
498   char        matdescra[6];
499 
500   PetscFunctionBegin;
501   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
502   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
503 
504   PetscCall(VecGetArrayRead(xx, &x));
505   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
506   aj = a->j; /* aj[k] gives column index for element aa[k]. */
507   aa = a->a; /* Nonzero elements stored row-by-row. */
508   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
509 
510   /* Call MKL sparse BLAS routine to do the MatMult. */
511   if (zz == yy) {
512     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
513     beta = 1.0;
514     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
515   } else {
516     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
517      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
518     beta = 0.0;
519     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520     for (i = 0; i < m; i++) z[i] += y[i];
521   }
522 
523   PetscCall(PetscLogFlops(2.0 * a->nz));
524   PetscCall(VecRestoreArrayRead(xx, &x));
525   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 #endif
529 
530 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)531 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
532 {
533   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
534   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
535   const PetscScalar *x;
536   PetscScalar       *y, *z;
537   PetscInt           m = A->rmap->n;
538   PetscInt           i;
539 
540   /* Variables not in MatMultAdd_SeqAIJ. */
541   PetscObjectState state;
542 
543   PetscFunctionBegin;
544   /* If there are no nonzero entries, set zz = yy and return immediately. */
545   if (!a->nz) {
546     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
547     PetscCall(PetscArraycpy(z, y, m));
548     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
549     PetscFunctionReturn(PETSC_SUCCESS);
550   }
551 
552   PetscCall(VecGetArrayRead(xx, &x));
553   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
554 
555   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
556    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
557    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
558   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
559   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
560 
561   /* Call MKL sparse BLAS routine to do the MatMult. */
562   if (zz == yy) {
563     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564      * with alpha and beta both set to 1.0. */
565     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
566   } else {
567     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
570     for (i = 0; i < m; i++) z[i] += y[i];
571   }
572 
573   PetscCall(PetscLogFlops(2.0 * a->nz));
574   PetscCall(VecRestoreArrayRead(xx, &x));
575   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
576   PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
579 
580 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)581 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
582 {
583   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
584   const PetscScalar *x;
585   PetscScalar       *y, *z;
586   const MatScalar   *aa;
587   PetscInt           m = A->rmap->n;
588   PetscInt           n = A->cmap->n;
589   const PetscInt    *aj, *ai;
590   PetscInt           i;
591 
592   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
594   PetscScalar alpha  = 1.0;
595   PetscScalar beta;
596   char        matdescra[6];
597 
598   PetscFunctionBegin;
599   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
600   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
601 
602   PetscCall(VecGetArrayRead(xx, &x));
603   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
604   aj = a->j; /* aj[k] gives column index for element aa[k]. */
605   aa = a->a; /* Nonzero elements stored row-by-row. */
606   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
607 
608   /* Call MKL sparse BLAS routine to do the MatMult. */
609   if (zz == yy) {
610     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611     beta = 1.0;
612     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
613   } else {
614     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616     beta = 0.0;
617     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
618     for (i = 0; i < n; i++) z[i] += y[i];
619   }
620 
621   PetscCall(PetscLogFlops(2.0 * a->nz));
622   PetscCall(VecRestoreArrayRead(xx, &x));
623   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 #endif
627 
628 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)629 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
630 {
631   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
632   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
633   const PetscScalar *x;
634   PetscScalar       *y, *z;
635   PetscInt           n = A->cmap->n;
636   PetscInt           i;
637   PetscObjectState   state;
638 
639   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
640 
641   PetscFunctionBegin;
642   /* If there are no nonzero entries, set zz = yy and return immediately. */
643   if (!a->nz) {
644     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
645     PetscCall(PetscArraycpy(z, y, n));
646     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
647     PetscFunctionReturn(PETSC_SUCCESS);
648   }
649 
650   PetscCall(VecGetArrayRead(xx, &x));
651   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
652 
653   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
654    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
655    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
656   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
657   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
658 
659   /* Call MKL sparse BLAS routine to do the MatMult. */
660   if (zz == yy) {
661     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
662      * with alpha and beta both set to 1.0. */
663     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
664   } else {
665     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
666      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
667     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
668     for (i = 0; i < n; i++) z[i] += y[i];
669   }
670 
671   PetscCall(PetscLogFlops(2.0 * a->nz));
672   PetscCall(VecRestoreArrayRead(xx, &x));
673   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
677 
678 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)679 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
680 {
681   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
682   sparse_matrix_t     csrA, csrB, csrC;
683   PetscInt            nrows, ncols;
684   struct matrix_descr descr_type_gen;
685   PetscObjectState    state;
686 
687   PetscFunctionBegin;
688   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
689    * not handle sparse matrices with zero rows or columns. */
690   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
691   else nrows = A->cmap->N;
692   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
693   else ncols = B->rmap->N;
694 
695   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
696   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
697   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
698   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
699   csrA                = a->csrA;
700   csrB                = b->csrA;
701   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
702 
703   if (csrA && csrB) {
704     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
705   } else {
706     csrC = NULL;
707   }
708 
709   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
710   PetscFunctionReturn(PETSC_SUCCESS);
711 }
712 
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)713 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
714 {
715   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
716   sparse_matrix_t     csrA, csrB, csrC;
717   struct matrix_descr descr_type_gen;
718   PetscObjectState    state;
719 
720   PetscFunctionBegin;
721   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
722   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
723   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
724   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
725   csrA                = a->csrA;
726   csrB                = b->csrA;
727   csrC                = c->csrA;
728   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
729 
730   if (csrA && csrB) {
731     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
732   } else {
733     csrC = NULL;
734   }
735 
736   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
737   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)741 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
742 {
743   PetscFunctionBegin;
744   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
745   PetscFunctionReturn(PETSC_SUCCESS);
746 }
747 
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)748 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
749 {
750   PetscFunctionBegin;
751   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
752   PetscFunctionReturn(PETSC_SUCCESS);
753 }
754 
MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)755 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
756 {
757   PetscFunctionBegin;
758   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)762 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
763 {
764   PetscFunctionBegin;
765   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
766   PetscFunctionReturn(PETSC_SUCCESS);
767 }
768 
MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)769 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
770 {
771   PetscFunctionBegin;
772   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)776 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
777 {
778   PetscFunctionBegin;
779   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
780   PetscFunctionReturn(PETSC_SUCCESS);
781 }
782 
MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)783 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
784 {
785   Mat_Product *product = C->product;
786   Mat          A = product->A, B = product->B;
787 
788   PetscFunctionBegin;
789   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
790   PetscFunctionReturn(PETSC_SUCCESS);
791 }
792 
MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)793 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794 {
795   Mat_Product *product = C->product;
796   Mat          A = product->A, B = product->B;
797   PetscReal    fill = product->fill;
798 
799   PetscFunctionBegin;
800   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
801   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)805 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
806 {
807   Mat                 Ct;
808   Vec                 zeros;
809   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
810   sparse_matrix_t     csrA, csrP, csrC;
811   PetscBool           set, flag;
812   struct matrix_descr descr_type_sym;
813   PetscObjectState    state;
814 
815   PetscFunctionBegin;
816   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
817   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
818 
819   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
820   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
821   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
822   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
823   csrA                = a->csrA;
824   csrP                = p->csrA;
825   csrC                = c->csrA;
826   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
827   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
828   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
829 
830   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
831   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
832 
833   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
834    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
835    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
836    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
837    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
838    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
839    * the full matrix. */
840   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
841   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
842   PetscCall(MatCreateVecs(C, &zeros, NULL));
843   PetscCall(VecSetFromOptions(zeros));
844   PetscCall(VecZeroEntries(zeros));
845   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
846   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
847   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
848   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
849   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
850   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
851   PetscCall(VecDestroy(&zeros));
852   PetscCall(MatDestroy(&Ct));
853   PetscFunctionReturn(PETSC_SUCCESS);
854 }
855 
MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)856 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
857 {
858   Mat_Product        *product = C->product;
859   Mat                 A = product->A, P = product->B;
860   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
861   sparse_matrix_t     csrA, csrP, csrC;
862   struct matrix_descr descr_type_sym;
863   PetscObjectState    state;
864 
865   PetscFunctionBegin;
866   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
867   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
868   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
869   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
870   csrA                = a->csrA;
871   csrP                = p->csrA;
872   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
874   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
875 
876   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877   if (csrP && csrA) {
878     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
879   } else {
880     csrC = NULL;
881   }
882 
883   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
884    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
885    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
886    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
887    * is guaranteed. */
888   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
889 
890   C->ops->productnumeric = MatProductNumeric_PtAP;
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)894 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
895 {
896   PetscFunctionBegin;
897   C->ops->productsymbolic = MatProductSymbolic_AB;
898   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
899   PetscFunctionReturn(PETSC_SUCCESS);
900 }
901 
MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)902 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
903 {
904   PetscFunctionBegin;
905   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
906   PetscFunctionReturn(PETSC_SUCCESS);
907 }
908 
MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)909 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
910 {
911   PetscFunctionBegin;
912   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
913   C->ops->productsymbolic          = MatProductSymbolic_ABt;
914   PetscFunctionReturn(PETSC_SUCCESS);
915 }
916 
MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)917 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
918 {
919   Mat_Product *product = C->product;
920   Mat          A       = product->A;
921   PetscBool    set, flag;
922 
923   PetscFunctionBegin;
924   /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
925   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
926   PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL");
927   /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */
928   C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
929   PetscFunctionReturn(PETSC_SUCCESS);
930 }
931 
MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)932 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
933 {
934   PetscFunctionBegin;
935   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936   PetscFunctionReturn(PETSC_SUCCESS);
937 }
938 
MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)939 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
940 {
941   PetscFunctionBegin;
942   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL");
943 }
944 
MatProductSetFromOptions_SeqAIJMKL(Mat C)945 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
946 {
947   Mat_Product *product = C->product;
948 
949   PetscFunctionBegin;
950   switch (product->type) {
951   case MATPRODUCT_AB:
952     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
953     break;
954   case MATPRODUCT_AtB:
955     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
956     break;
957   case MATPRODUCT_ABt:
958     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
959     break;
960   case MATPRODUCT_PtAP:
961     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
962     break;
963   case MATPRODUCT_RARt:
964     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
965     break;
966   case MATPRODUCT_ABC:
967     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
968     break;
969   default:
970     break;
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
975 
976 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
977  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
978  * routine, but can also be used to convert an assembled SeqAIJ matrix
979  * into a SeqAIJMKL one. */
MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)980 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
981 {
982   Mat            B = *newmat;
983   Mat_SeqAIJMKL *aijmkl;
984   PetscBool      set;
985   PetscBool      sametype;
986 
987   PetscFunctionBegin;
988   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
989 
990   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
991   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
992 
993   PetscCall(PetscNew(&aijmkl));
994   B->spptr = (void *)aijmkl;
995 
996   /* Set function pointers for methods that we inherit from AIJ but override.
997    * We also parse some command line options below, since those determine some of the methods we point to. */
998   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
999   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1000   B->ops->destroy     = MatDestroy_SeqAIJMKL;
1001 
1002   aijmkl->sparse_optimized = PETSC_FALSE;
1003   aijmkl->no_SpMV2         = PetscDefined(HAVE_MKL_SPARSE_OPTIMIZE) ? PETSC_FALSE : PETSC_TRUE; /* Default to using the SpMV2 routines if our MKL supports them. */
1004   aijmkl->eager_inspection = PETSC_FALSE;
1005 
1006   /* Parse command line options. */
1007   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1008   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1009   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1010   PetscOptionsEnd();
1011 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1012   if (!aijmkl->no_SpMV2) {
1013     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1014     aijmkl->no_SpMV2 = PETSC_TRUE;
1015   }
1016 #endif
1017 
1018 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1019   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1020   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1021   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1022   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1023   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1024   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1025   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1026   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1027   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1028   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1029     #if !defined(PETSC_USE_COMPLEX)
1030   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1031     #else
1032   B->ops->ptapnumeric = NULL;
1033     #endif
1034   #endif
1035 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1036 
1037 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1038   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1039    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1040    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1041    * versions in which the older interface has not been deprecated, we use the old interface. */
1042   if (aijmkl->no_SpMV2) {
1043     B->ops->mult             = MatMult_SeqAIJMKL;
1044     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1045     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1046     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1047   }
1048 #endif
1049 
1050   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1051 
1052   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1053   *newmat = B;
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 /*@C
1058   MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1059 
1060   Collective
1061 
1062   Input Parameters:
1063 + comm - MPI communicator, set to `PETSC_COMM_SELF`
1064 . m    - number of rows
1065 . n    - number of columns
1066 . nz   - number of nonzeros per row (same for all rows)
1067 - nnz  - array containing the number of nonzeros in the various rows
1068          (possibly different for each row) or `NULL`
1069 
1070   Output Parameter:
1071 . A - the matrix
1072 
1073   Options Database Keys:
1074 + -mat_aijmkl_no_spmv2         - disable use of the SpMV2 inspector-executor routines
1075 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1076                                   performing this step the first time the matrix is applied
1077 
1078   Level: intermediate
1079 
1080   Notes:
1081   If `nnz` is given then `nz` is ignored
1082 
1083   This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1084   routines from Intel MKL whenever possible.
1085 
1086   If the installed version of MKL supports the "SpMV2" sparse
1087   inspector-executor routines, then those are used by default.
1088 
1089   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1090   (for symmetric A) operations are currently supported.
1091   MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1092 
1093 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1094 @*/
MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)1095 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1096 {
1097   PetscFunctionBegin;
1098   PetscCall(MatCreate(comm, A));
1099   PetscCall(MatSetSizes(*A, m, n, m, n));
1100   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1101   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
MatCreate_SeqAIJMKL(Mat A)1105 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1106 {
1107   PetscFunctionBegin;
1108   PetscCall(MatSetType(A, MATSEQAIJ));
1109   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112