1 /*
2 Defines basic operations for the MATSEQAIJMKL matrix class.
3 This class is derived from the MATSEQAIJ class and retains the
4 compressed row storage (aka Yale sparse matrix format) but uses
5 sparse BLAS operations from the Intel Math Kernel Library (MKL)
6 wherever possible.
7 */
8
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
12 #define MKL_ILP64
13 #endif
14 #include <mkl_spblas.h>
15
16 typedef struct {
17 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
18 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
19 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
20 PetscObjectState state;
21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
22 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
23 struct matrix_descr descr;
24 #endif
25 } Mat_SeqAIJMKL;
26
MatConvert_SeqAIJMKL_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
28 {
29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
30 /* so we will ignore 'MatType type'. */
31 Mat B = *newmat;
32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
34 #endif
35
36 PetscFunctionBegin;
37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
38
39 /* Reset the original function pointers. */
40 B->ops->duplicate = MatDuplicate_SeqAIJ;
41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
42 B->ops->destroy = MatDestroy_SeqAIJ;
43 B->ops->mult = MatMult_SeqAIJ;
44 B->ops->multtranspose = MatMultTranspose_SeqAIJ;
45 B->ops->multadd = MatMultAdd_SeqAIJ;
46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
53
54 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
55
56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
58 * simply involves destroying the MKL sparse matrix handle and then freeing
59 * the spptr pointer. */
60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
61
62 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
64 PetscCall(PetscFree(B->spptr));
65
66 /* Change the type of B to MATSEQAIJ. */
67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
68
69 *newmat = B;
70 PetscFunctionReturn(PETSC_SUCCESS);
71 }
72
MatDestroy_SeqAIJMKL(Mat A)73 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
74 {
75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
76
77 PetscFunctionBegin;
78 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
79 if (aijmkl) {
80 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
81 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
82 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
83 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
84 PetscCall(PetscFree(A->spptr));
85 }
86
87 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
88 * to destroy everything that remains. */
89 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
90 /* I don't call MatSetType(). I believe this is because that
91 * is only to be called when *building* a matrix. I could be wrong, but
92 * that is how things work for the SuperLU matrix class. */
93 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
94 PetscCall(MatDestroy_SeqAIJ(A));
95 PetscFunctionReturn(PETSC_SUCCESS);
96 }
97
98 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
99 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
100 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
101 * handle, creates a new one, and then calls mkl_sparse_optimize().
102 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
103 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
104 * an unoptimized matrix handle here. */
MatSeqAIJMKL_create_mkl_handle(Mat A)105 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
106 {
107 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
108 /* If the MKL library does not have mkl_sparse_optimize(), then this routine
109 * does nothing. We make it callable anyway in this case because it cuts
110 * down on littering the code with #ifdefs. */
111 PetscFunctionBegin;
112 PetscFunctionReturn(PETSC_SUCCESS);
113 #else
114 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
115 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
116 PetscInt m, n;
117 MatScalar *aa;
118 PetscInt *aj, *ai;
119
120 PetscFunctionBegin;
121 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
122 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
123 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
124 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
125 * mkl_sparse_optimize() later. */
126 if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
127 #endif
128
129 if (aijmkl->sparse_optimized) {
130 /* Matrix has been previously assembled and optimized. Must destroy old
131 * matrix handle before running the optimization step again. */
132 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
133 }
134 aijmkl->sparse_optimized = PETSC_FALSE;
135
136 /* Now perform the SpMV2 setup and matrix optimization. */
137 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
138 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
139 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
140 m = A->rmap->n;
141 n = A->cmap->n;
142 aj = a->j; /* aj[k] gives column index for element aa[k]. */
143 aa = a->a; /* Nonzero elements stored row-by-row. */
144 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
145 if (a->nz && aa && !A->structure_only) {
146 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
147 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
148 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
149 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
150 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
151 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
152 aijmkl->sparse_optimized = PETSC_TRUE;
153 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
154 } else {
155 aijmkl->csrA = NULL;
156 }
157 PetscFunctionReturn(PETSC_SUCCESS);
158 #endif
159 }
160
161 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
162 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm,sparse_matrix_t csrA,PetscInt nrows,PetscInt ncols,Mat A)163 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
164 {
165 sparse_index_base_t indexing;
166 PetscInt m, n;
167 PetscInt *aj, *ai, *unused;
168 MatScalar *aa;
169 Mat_SeqAIJMKL *aijmkl;
170
171 PetscFunctionBegin;
172 if (csrA) {
173 /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
174 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
175 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
176 } else {
177 aj = ai = NULL;
178 aa = NULL;
179 }
180
181 PetscCall(MatSetType(A, MATSEQAIJ));
182 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
183 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
184 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
185 * they will be destroyed when the MKL handle is destroyed.
186 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
187 if (csrA) {
188 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
189 } else {
190 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
191 PetscCall(MatSetUp(A));
192 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
193 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
194 }
195
196 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
197 * Now turn it into a MATSEQAIJMKL. */
198 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
199
200 aijmkl = (Mat_SeqAIJMKL *)A->spptr;
201 aijmkl->csrA = csrA;
202
203 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
204 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
205 * and just need to be able to run the MKL optimization step. */
206 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
207 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
208 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
209 if (csrA) {
210 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
211 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
212 }
213 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
214 PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
217
218 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
219 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
220 * MatMatMultNumeric(). */
221 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatSeqAIJMKL_update_from_mkl_handle(Mat A)222 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
223 {
224 PetscInt i;
225 PetscInt nrows, ncols;
226 PetscInt nz;
227 PetscInt *ai, *aj, *unused;
228 PetscScalar *aa;
229 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
230 sparse_index_base_t indexing;
231
232 PetscFunctionBegin;
233 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
234 if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
235
236 /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
237 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
238
239 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
240 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
241 for (i = 0; i < nrows; i++) {
242 nz = ai[i + 1] - ai[i];
243 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
244 }
245
246 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
247 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
248
249 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state));
250 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
251 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
252 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
253 aijmkl->sparse_optimized = PETSC_FALSE;
254 PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
257
258 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatSeqAIJMKL_view_mkl_handle(Mat A,PetscViewer viewer)259 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
260 {
261 PetscInt i, j, k;
262 PetscInt nrows, ncols;
263 PetscInt nz;
264 PetscInt *ai, *aj, *unused;
265 PetscScalar *aa;
266 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
267 sparse_index_base_t indexing;
268
269 PetscFunctionBegin;
270 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
271
272 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
273 if (!aijmkl->csrA) {
274 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
275 PetscFunctionReturn(PETSC_SUCCESS);
276 }
277
278 /* Note: Must pass in &unused below since MKL can't accept NULL for this output array we don't actually want. */
279 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&unused, (MKL_INT **)&aj, &aa);
280
281 k = 0;
282 for (i = 0; i < nrows; i++) {
283 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
284 nz = ai[i + 1] - ai[i];
285 for (j = 0; j < nz; j++) {
286 if (aa) {
287 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k])));
288 } else {
289 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
290 }
291 k++;
292 }
293 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
294 }
295 PetscFunctionReturn(PETSC_SUCCESS);
296 }
297 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
298
MatDuplicate_SeqAIJMKL(Mat A,MatDuplicateOption op,Mat * M)299 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
300 {
301 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
302 Mat_SeqAIJMKL *aijmkl_dest;
303
304 PetscFunctionBegin;
305 PetscCall(MatDuplicate_SeqAIJ(A, op, M));
306 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
307 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
308 aijmkl_dest->sparse_optimized = PETSC_FALSE;
309 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
310 PetscFunctionReturn(PETSC_SUCCESS);
311 }
312
MatAssemblyEnd_SeqAIJMKL(Mat A,MatAssemblyType mode)313 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
314 {
315 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
316 Mat_SeqAIJMKL *aijmkl;
317
318 PetscFunctionBegin;
319 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
320
321 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
322 * extra information and some different methods, call the AssemblyEnd
323 * routine for a MATSEQAIJ.
324 * I'm not sure if this is the best way to do this, but it avoids
325 * a lot of code duplication. */
326 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
327 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
328
329 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
330 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
331 aijmkl = (Mat_SeqAIJMKL *)A->spptr;
332 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
333 PetscFunctionReturn(PETSC_SUCCESS);
334 }
335
336 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMult_SeqAIJMKL(Mat A,Vec xx,Vec yy)337 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
338 {
339 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
340 const PetscScalar *x;
341 PetscScalar *y;
342 const MatScalar *aa;
343 PetscInt m = A->rmap->n;
344 PetscInt n = A->cmap->n;
345 PetscScalar alpha = 1.0;
346 PetscScalar beta = 0.0;
347 const PetscInt *aj, *ai;
348 char matdescra[6];
349
350 /* Variables not in MatMult_SeqAIJ. */
351 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
352
353 PetscFunctionBegin;
354 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
355 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
356 PetscCall(VecGetArrayRead(xx, &x));
357 PetscCall(VecGetArray(yy, &y));
358 aj = a->j; /* aj[k] gives column index for element aa[k]. */
359 aa = a->a; /* Nonzero elements stored row-by-row. */
360 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
361
362 /* Call MKL sparse BLAS routine to do the MatMult. */
363 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
364
365 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
366 PetscCall(VecRestoreArrayRead(xx, &x));
367 PetscCall(VecRestoreArray(yy, &y));
368 PetscFunctionReturn(PETSC_SUCCESS);
369 }
370 #endif
371
372 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMult_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)373 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
374 {
375 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
376 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
377 const PetscScalar *x;
378 PetscScalar *y;
379 PetscObjectState state;
380
381 PetscFunctionBegin;
382 /* If there are no nonzero entries, zero yy and return immediately. */
383 if (!a->nz) {
384 PetscCall(VecGetArray(yy, &y));
385 PetscCall(PetscArrayzero(y, A->rmap->n));
386 PetscCall(VecRestoreArray(yy, &y));
387 PetscFunctionReturn(PETSC_SUCCESS);
388 }
389
390 PetscCall(VecGetArrayRead(xx, &x));
391 PetscCall(VecGetArray(yy, &y));
392
393 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
394 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
395 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
396 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
397 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
398
399 /* Call MKL SpMV2 executor routine to do the MatMult. */
400 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
401
402 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
403 PetscCall(VecRestoreArrayRead(xx, &x));
404 PetscCall(VecRestoreArray(yy, &y));
405 PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
408
409 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTranspose_SeqAIJMKL(Mat A,Vec xx,Vec yy)410 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
411 {
412 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
413 const PetscScalar *x;
414 PetscScalar *y;
415 const MatScalar *aa;
416 PetscInt m = A->rmap->n;
417 PetscInt n = A->cmap->n;
418 PetscScalar alpha = 1.0;
419 PetscScalar beta = 0.0;
420 const PetscInt *aj, *ai;
421 char matdescra[6];
422
423 /* Variables not in MatMultTranspose_SeqAIJ. */
424 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
425
426 PetscFunctionBegin;
427 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
428 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
429 PetscCall(VecGetArrayRead(xx, &x));
430 PetscCall(VecGetArray(yy, &y));
431 aj = a->j; /* aj[k] gives column index for element aa[k]. */
432 aa = a->a; /* Nonzero elements stored row-by-row. */
433 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
434
435 /* Call MKL sparse BLAS routine to do the MatMult. */
436 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
437
438 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
439 PetscCall(VecRestoreArrayRead(xx, &x));
440 PetscCall(VecRestoreArray(yy, &y));
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 #endif
444
445 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTranspose_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)446 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
447 {
448 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
449 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
450 const PetscScalar *x;
451 PetscScalar *y;
452 PetscObjectState state;
453
454 PetscFunctionBegin;
455 /* If there are no nonzero entries, zero yy and return immediately. */
456 if (!a->nz) {
457 PetscCall(VecGetArray(yy, &y));
458 PetscCall(PetscArrayzero(y, A->cmap->n));
459 PetscCall(VecRestoreArray(yy, &y));
460 PetscFunctionReturn(PETSC_SUCCESS);
461 }
462
463 PetscCall(VecGetArrayRead(xx, &x));
464 PetscCall(VecGetArray(yy, &y));
465
466 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
467 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
468 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
469 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
470 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
471
472 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
473 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
474
475 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
476 PetscCall(VecRestoreArrayRead(xx, &x));
477 PetscCall(VecRestoreArray(yy, &y));
478 PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
481
482 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)483 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
484 {
485 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
486 const PetscScalar *x;
487 PetscScalar *y, *z;
488 const MatScalar *aa;
489 PetscInt m = A->rmap->n;
490 PetscInt n = A->cmap->n;
491 const PetscInt *aj, *ai;
492 PetscInt i;
493
494 /* Variables not in MatMultAdd_SeqAIJ. */
495 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
496 PetscScalar alpha = 1.0;
497 PetscScalar beta;
498 char matdescra[6];
499
500 PetscFunctionBegin;
501 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
502 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
503
504 PetscCall(VecGetArrayRead(xx, &x));
505 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
506 aj = a->j; /* aj[k] gives column index for element aa[k]. */
507 aa = a->a; /* Nonzero elements stored row-by-row. */
508 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
509
510 /* Call MKL sparse BLAS routine to do the MatMult. */
511 if (zz == yy) {
512 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
513 beta = 1.0;
514 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
515 } else {
516 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
517 * MKL sparse BLAS does not have a MatMultAdd equivalent. */
518 beta = 0.0;
519 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
520 for (i = 0; i < m; i++) z[i] += y[i];
521 }
522
523 PetscCall(PetscLogFlops(2.0 * a->nz));
524 PetscCall(VecRestoreArrayRead(xx, &x));
525 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
526 PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 #endif
529
530 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)531 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
532 {
533 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
534 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
535 const PetscScalar *x;
536 PetscScalar *y, *z;
537 PetscInt m = A->rmap->n;
538 PetscInt i;
539
540 /* Variables not in MatMultAdd_SeqAIJ. */
541 PetscObjectState state;
542
543 PetscFunctionBegin;
544 /* If there are no nonzero entries, set zz = yy and return immediately. */
545 if (!a->nz) {
546 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
547 PetscCall(PetscArraycpy(z, y, m));
548 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
549 PetscFunctionReturn(PETSC_SUCCESS);
550 }
551
552 PetscCall(VecGetArrayRead(xx, &x));
553 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
554
555 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
556 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
557 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
558 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
559 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
560
561 /* Call MKL sparse BLAS routine to do the MatMult. */
562 if (zz == yy) {
563 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
564 * with alpha and beta both set to 1.0. */
565 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
566 } else {
567 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
568 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
569 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
570 for (i = 0; i < m; i++) z[i] += y[i];
571 }
572
573 PetscCall(PetscLogFlops(2.0 * a->nz));
574 PetscCall(VecRestoreArrayRead(xx, &x));
575 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
576 PetscFunctionReturn(PETSC_SUCCESS);
577 }
578 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
579
580 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
MatMultTransposeAdd_SeqAIJMKL(Mat A,Vec xx,Vec yy,Vec zz)581 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
582 {
583 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
584 const PetscScalar *x;
585 PetscScalar *y, *z;
586 const MatScalar *aa;
587 PetscInt m = A->rmap->n;
588 PetscInt n = A->cmap->n;
589 const PetscInt *aj, *ai;
590 PetscInt i;
591
592 /* Variables not in MatMultTransposeAdd_SeqAIJ. */
593 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
594 PetscScalar alpha = 1.0;
595 PetscScalar beta;
596 char matdescra[6];
597
598 PetscFunctionBegin;
599 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
600 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
601
602 PetscCall(VecGetArrayRead(xx, &x));
603 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
604 aj = a->j; /* aj[k] gives column index for element aa[k]. */
605 aa = a->a; /* Nonzero elements stored row-by-row. */
606 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
607
608 /* Call MKL sparse BLAS routine to do the MatMult. */
609 if (zz == yy) {
610 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
611 beta = 1.0;
612 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
613 } else {
614 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
615 * MKL sparse BLAS does not have a MatMultAdd equivalent. */
616 beta = 0.0;
617 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
618 for (i = 0; i < n; i++) z[i] += y[i];
619 }
620
621 PetscCall(PetscLogFlops(2.0 * a->nz));
622 PetscCall(VecRestoreArrayRead(xx, &x));
623 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
624 PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 #endif
627
628 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)629 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
630 {
631 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
632 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
633 const PetscScalar *x;
634 PetscScalar *y, *z;
635 PetscInt n = A->cmap->n;
636 PetscInt i;
637 PetscObjectState state;
638
639 /* Variables not in MatMultTransposeAdd_SeqAIJ. */
640
641 PetscFunctionBegin;
642 /* If there are no nonzero entries, set zz = yy and return immediately. */
643 if (!a->nz) {
644 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
645 PetscCall(PetscArraycpy(z, y, n));
646 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
647 PetscFunctionReturn(PETSC_SUCCESS);
648 }
649
650 PetscCall(VecGetArrayRead(xx, &x));
651 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
652
653 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
654 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
655 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
656 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
657 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
658
659 /* Call MKL sparse BLAS routine to do the MatMult. */
660 if (zz == yy) {
661 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
662 * with alpha and beta both set to 1.0. */
663 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
664 } else {
665 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
666 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
667 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
668 for (i = 0; i < n; i++) z[i] += y[i];
669 }
670
671 PetscCall(PetscLogFlops(2.0 * a->nz));
672 PetscCall(VecRestoreArrayRead(xx, &x));
673 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
674 PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
677
678 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)679 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
680 {
681 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
682 sparse_matrix_t csrA, csrB, csrC;
683 PetscInt nrows, ncols;
684 struct matrix_descr descr_type_gen;
685 PetscObjectState state;
686
687 PetscFunctionBegin;
688 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
689 * not handle sparse matrices with zero rows or columns. */
690 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
691 else nrows = A->cmap->N;
692 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
693 else ncols = B->rmap->N;
694
695 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
696 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
697 PetscCall(PetscObjectStateGet((PetscObject)B, &state));
698 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
699 csrA = a->csrA;
700 csrB = b->csrA;
701 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
702
703 if (csrA && csrB) {
704 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
705 } else {
706 csrC = NULL;
707 }
708
709 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
710 PetscFunctionReturn(PETSC_SUCCESS);
711 }
712
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A,const sparse_operation_t transA,Mat B,const sparse_operation_t transB,Mat C)713 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
714 {
715 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
716 sparse_matrix_t csrA, csrB, csrC;
717 struct matrix_descr descr_type_gen;
718 PetscObjectState state;
719
720 PetscFunctionBegin;
721 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
722 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
723 PetscCall(PetscObjectStateGet((PetscObject)B, &state));
724 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
725 csrA = a->csrA;
726 csrB = b->csrA;
727 csrC = c->csrA;
728 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
729
730 if (csrA && csrB) {
731 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
732 } else {
733 csrC = NULL;
734 }
735
736 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
737 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
738 PetscFunctionReturn(PETSC_SUCCESS);
739 }
740
MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)741 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
742 {
743 PetscFunctionBegin;
744 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
745 PetscFunctionReturn(PETSC_SUCCESS);
746 }
747
MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)748 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
749 {
750 PetscFunctionBegin;
751 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
752 PetscFunctionReturn(PETSC_SUCCESS);
753 }
754
MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)755 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
756 {
757 PetscFunctionBegin;
758 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
759 PetscFunctionReturn(PETSC_SUCCESS);
760 }
761
MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)762 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
763 {
764 PetscFunctionBegin;
765 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
766 PetscFunctionReturn(PETSC_SUCCESS);
767 }
768
MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,PetscReal fill,Mat C)769 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
770 {
771 PetscFunctionBegin;
772 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
773 PetscFunctionReturn(PETSC_SUCCESS);
774 }
775
MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A,Mat B,Mat C)776 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
777 {
778 PetscFunctionBegin;
779 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
780 PetscFunctionReturn(PETSC_SUCCESS);
781 }
782
MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)783 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
784 {
785 Mat_Product *product = C->product;
786 Mat A = product->A, B = product->B;
787
788 PetscFunctionBegin;
789 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
790 PetscFunctionReturn(PETSC_SUCCESS);
791 }
792
MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)793 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
794 {
795 Mat_Product *product = C->product;
796 Mat A = product->A, B = product->B;
797 PetscReal fill = product->fill;
798
799 PetscFunctionBegin;
800 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
801 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
802 PetscFunctionReturn(PETSC_SUCCESS);
803 }
804
MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A,Mat P,Mat C)805 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
806 {
807 Mat Ct;
808 Vec zeros;
809 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
810 sparse_matrix_t csrA, csrP, csrC;
811 PetscBool set, flag;
812 struct matrix_descr descr_type_sym;
813 PetscObjectState state;
814
815 PetscFunctionBegin;
816 PetscCall(MatIsSymmetricKnown(A, &set, &flag));
817 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
818
819 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
820 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
821 PetscCall(PetscObjectStateGet((PetscObject)P, &state));
822 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
823 csrA = a->csrA;
824 csrP = p->csrA;
825 csrC = c->csrA;
826 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
827 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
828 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
829
830 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
831 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
832
833 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
834 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
835 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
836 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
837 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
838 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
839 * the full matrix. */
840 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
841 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
842 PetscCall(MatCreateVecs(C, &zeros, NULL));
843 PetscCall(VecSetFromOptions(zeros));
844 PetscCall(VecZeroEntries(zeros));
845 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
846 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
847 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
848 PetscCall(MatProductCreateWithMat(A, P, NULL, C));
849 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
850 PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
851 PetscCall(VecDestroy(&zeros));
852 PetscCall(MatDestroy(&Ct));
853 PetscFunctionReturn(PETSC_SUCCESS);
854 }
855
MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)856 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
857 {
858 Mat_Product *product = C->product;
859 Mat A = product->A, P = product->B;
860 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
861 sparse_matrix_t csrA, csrP, csrC;
862 struct matrix_descr descr_type_sym;
863 PetscObjectState state;
864
865 PetscFunctionBegin;
866 PetscCall(PetscObjectStateGet((PetscObject)A, &state));
867 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
868 PetscCall(PetscObjectStateGet((PetscObject)P, &state));
869 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
870 csrA = a->csrA;
871 csrP = p->csrA;
872 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
873 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
874 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
875
876 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
877 if (csrP && csrA) {
878 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
879 } else {
880 csrC = NULL;
881 }
882
883 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
884 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
885 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
886 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
887 * is guaranteed. */
888 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
889
890 C->ops->productnumeric = MatProductNumeric_PtAP;
891 PetscFunctionReturn(PETSC_SUCCESS);
892 }
893
MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)894 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
895 {
896 PetscFunctionBegin;
897 C->ops->productsymbolic = MatProductSymbolic_AB;
898 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
899 PetscFunctionReturn(PETSC_SUCCESS);
900 }
901
MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)902 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
903 {
904 PetscFunctionBegin;
905 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
906 PetscFunctionReturn(PETSC_SUCCESS);
907 }
908
MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)909 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
910 {
911 PetscFunctionBegin;
912 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
913 C->ops->productsymbolic = MatProductSymbolic_ABt;
914 PetscFunctionReturn(PETSC_SUCCESS);
915 }
916
MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)917 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
918 {
919 Mat_Product *product = C->product;
920 Mat A = product->A;
921 PetscBool set, flag;
922
923 PetscFunctionBegin;
924 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
925 PetscCall(MatIsSymmetricKnown(A, &set, &flag));
926 PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL");
927 /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */
928 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
929 PetscFunctionReturn(PETSC_SUCCESS);
930 }
931
MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)932 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
933 {
934 PetscFunctionBegin;
935 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
936 PetscFunctionReturn(PETSC_SUCCESS);
937 }
938
MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)939 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
940 {
941 PetscFunctionBegin;
942 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL");
943 }
944
MatProductSetFromOptions_SeqAIJMKL(Mat C)945 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
946 {
947 Mat_Product *product = C->product;
948
949 PetscFunctionBegin;
950 switch (product->type) {
951 case MATPRODUCT_AB:
952 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
953 break;
954 case MATPRODUCT_AtB:
955 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
956 break;
957 case MATPRODUCT_ABt:
958 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
959 break;
960 case MATPRODUCT_PtAP:
961 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
962 break;
963 case MATPRODUCT_RARt:
964 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
965 break;
966 case MATPRODUCT_ABC:
967 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
968 break;
969 default:
970 break;
971 }
972 PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
975
976 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
977 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
978 * routine, but can also be used to convert an assembled SeqAIJ matrix
979 * into a SeqAIJMKL one. */
MatConvert_SeqAIJ_SeqAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)980 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
981 {
982 Mat B = *newmat;
983 Mat_SeqAIJMKL *aijmkl;
984 PetscBool set;
985 PetscBool sametype;
986
987 PetscFunctionBegin;
988 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
989
990 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
991 if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
992
993 PetscCall(PetscNew(&aijmkl));
994 B->spptr = (void *)aijmkl;
995
996 /* Set function pointers for methods that we inherit from AIJ but override.
997 * We also parse some command line options below, since those determine some of the methods we point to. */
998 B->ops->duplicate = MatDuplicate_SeqAIJMKL;
999 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1000 B->ops->destroy = MatDestroy_SeqAIJMKL;
1001
1002 aijmkl->sparse_optimized = PETSC_FALSE;
1003 aijmkl->no_SpMV2 = PetscDefined(HAVE_MKL_SPARSE_OPTIMIZE) ? PETSC_FALSE : PETSC_TRUE; /* Default to using the SpMV2 routines if our MKL supports them. */
1004 aijmkl->eager_inspection = PETSC_FALSE;
1005
1006 /* Parse command line options. */
1007 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1008 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1009 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1010 PetscOptionsEnd();
1011 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1012 if (!aijmkl->no_SpMV2) {
1013 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"));
1014 aijmkl->no_SpMV2 = PETSC_TRUE;
1015 }
1016 #endif
1017
1018 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1019 B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1020 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1021 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1022 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1023 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1024 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1025 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1026 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1027 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1028 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1029 #if !defined(PETSC_USE_COMPLEX)
1030 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1031 #else
1032 B->ops->ptapnumeric = NULL;
1033 #endif
1034 #endif
1035 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1036
1037 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1038 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1039 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1040 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1041 * versions in which the older interface has not been deprecated, we use the old interface. */
1042 if (aijmkl->no_SpMV2) {
1043 B->ops->mult = MatMult_SeqAIJMKL;
1044 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1045 B->ops->multadd = MatMultAdd_SeqAIJMKL;
1046 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1047 }
1048 #endif
1049
1050 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1051
1052 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1053 *newmat = B;
1054 PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056
1057 /*@C
1058 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1059
1060 Collective
1061
1062 Input Parameters:
1063 + comm - MPI communicator, set to `PETSC_COMM_SELF`
1064 . m - number of rows
1065 . n - number of columns
1066 . nz - number of nonzeros per row (same for all rows)
1067 - nnz - array containing the number of nonzeros in the various rows
1068 (possibly different for each row) or `NULL`
1069
1070 Output Parameter:
1071 . A - the matrix
1072
1073 Options Database Keys:
1074 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1075 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1076 performing this step the first time the matrix is applied
1077
1078 Level: intermediate
1079
1080 Notes:
1081 If `nnz` is given then `nz` is ignored
1082
1083 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1084 routines from Intel MKL whenever possible.
1085
1086 If the installed version of MKL supports the "SpMV2" sparse
1087 inspector-executor routines, then those are used by default.
1088
1089 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1090 (for symmetric A) operations are currently supported.
1091 MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1092
1093 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1094 @*/
MatCreateSeqAIJMKL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)1095 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1096 {
1097 PetscFunctionBegin;
1098 PetscCall(MatCreate(comm, A));
1099 PetscCall(MatSetSizes(*A, m, n, m, n));
1100 PetscCall(MatSetType(*A, MATSEQAIJMKL));
1101 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1102 PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104
MatCreate_SeqAIJMKL(Mat A)1105 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1106 {
1107 PetscFunctionBegin;
1108 PetscCall(MatSetType(A, MATSEQAIJ));
1109 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1110 PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112