xref: /petsc/src/mat/impls/baij/seq/baijmkl/baijmkl.c (revision c5e72636b2d9a321f94c0ae5541b656f747ad7ff)
1 /*
2   Defines basic operations for the MATSEQBAIJMKL matrix class.
3   Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4   wherever possible. If used MKL version is older than 11.3 PETSc default
5   code for sparse matrix operations is used.
6 */
7 
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> /*I   "petscmat.h"   I*/
10 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11   #define MKL_ILP64
12 #endif
13 #include <mkl_spblas.h>
14 
PetscSeqBAIJSupportsZeroBased(void)15 static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16 {
17   static PetscBool set = PETSC_FALSE, value;
18   int              n   = 1, ia[1], ja[1];
19   float            a[1];
20   sparse_status_t  status;
21   sparse_matrix_t  A;
22 
23   if (!set) {
24     status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25     value  = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26     (void)mkl_sparse_destroy(A);
27     set = PETSC_TRUE;
28   }
29   return value;
30 }
31 
32 typedef struct {
33   PetscBool           sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
34   sparse_matrix_t     bsrA;             /* "Handle" used by SpMV2 inspector-executor routines. */
35   struct matrix_descr descr;
36   PetscInt           *ai1;
37   PetscInt           *aj1;
38 } Mat_SeqBAIJMKL;
39 
40 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
41 
MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)42 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
43 {
44   /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
45   /* so we will ignore 'MatType type'. */
46   Mat             B       = *newmat;
47   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
48 
49   PetscFunctionBegin;
50   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
51 
52   /* Reset the original function pointers. */
53   B->ops->duplicate        = MatDuplicate_SeqBAIJ;
54   B->ops->assemblyend      = MatAssemblyEnd_SeqBAIJ;
55   B->ops->destroy          = MatDestroy_SeqBAIJ;
56   B->ops->multtranspose    = MatMultTranspose_SeqBAIJ;
57   B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
58   B->ops->scale            = MatScale_SeqBAIJ;
59   B->ops->diagonalscale    = MatDiagonalScale_SeqBAIJ;
60   B->ops->axpy             = MatAXPY_SeqBAIJ;
61 
62   switch (A->rmap->bs) {
63   case 1:
64     B->ops->mult    = MatMult_SeqBAIJ_1;
65     B->ops->multadd = MatMultAdd_SeqBAIJ_1;
66     break;
67   case 2:
68     B->ops->mult    = MatMult_SeqBAIJ_2;
69     B->ops->multadd = MatMultAdd_SeqBAIJ_2;
70     break;
71   case 3:
72     B->ops->mult    = MatMult_SeqBAIJ_3;
73     B->ops->multadd = MatMultAdd_SeqBAIJ_3;
74     break;
75   case 4:
76     B->ops->mult    = MatMult_SeqBAIJ_4;
77     B->ops->multadd = MatMultAdd_SeqBAIJ_4;
78     break;
79   case 5:
80     B->ops->mult    = MatMult_SeqBAIJ_5;
81     B->ops->multadd = MatMultAdd_SeqBAIJ_5;
82     break;
83   case 6:
84     B->ops->mult    = MatMult_SeqBAIJ_6;
85     B->ops->multadd = MatMultAdd_SeqBAIJ_6;
86     break;
87   case 7:
88     B->ops->mult    = MatMult_SeqBAIJ_7;
89     B->ops->multadd = MatMultAdd_SeqBAIJ_7;
90     break;
91   case 11:
92     B->ops->mult    = MatMult_SeqBAIJ_11;
93     B->ops->multadd = MatMultAdd_SeqBAIJ_11;
94     break;
95   case 15:
96     B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
97     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
98     break;
99   default:
100     B->ops->mult    = MatMult_SeqBAIJ_N;
101     B->ops->multadd = MatMultAdd_SeqBAIJ_N;
102     break;
103   }
104   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
105 
106   /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
107    * simply involves destroying the MKL sparse matrix handle and then freeing
108    * the spptr pointer. */
109   if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
110 
111   if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
112   PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
113   PetscCall(PetscFree(B->spptr));
114 
115   /* Change the type of B to MATSEQBAIJ. */
116   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
117 
118   *newmat = B;
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
MatDestroy_SeqBAIJMKL(Mat A)122 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
123 {
124   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
125 
126   PetscFunctionBegin;
127   if (baijmkl) {
128     /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129     if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
130     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
131     PetscCall(PetscFree(A->spptr));
132   }
133 
134   /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
135    * to destroy everything that remains. */
136   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
137   PetscCall(MatDestroy_SeqBAIJ(A));
138   PetscFunctionReturn(PETSC_SUCCESS);
139 }
140 
MatSeqBAIJMKL_create_mkl_handle(Mat A)141 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
142 {
143   Mat_SeqBAIJ    *a       = (Mat_SeqBAIJ *)A->data;
144   Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
145   PetscInt        mbs, nbs, nz, bs;
146   MatScalar      *aa;
147   PetscInt       *aj, *ai;
148   PetscInt        i;
149 
150   PetscFunctionBegin;
151   if (baijmkl->sparse_optimized) {
152     /* Matrix has been previously assembled and optimized. Must destroy old
153      * matrix handle before running the optimization step again. */
154     PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
155     PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
156   }
157   baijmkl->sparse_optimized = PETSC_FALSE;
158 
159   /* Now perform the SpMV2 setup and matrix optimization. */
160   baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
161   baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
162   baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
163   mbs                 = a->mbs;
164   nbs                 = a->nbs;
165   nz                  = a->nz;
166   bs                  = A->rmap->bs;
167   aa                  = a->a;
168 
169   if ((nz != 0) & !A->structure_only) {
170     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
171      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
172     if (PetscSeqBAIJSupportsZeroBased()) {
173       aj = a->j;
174       ai = a->i;
175       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
176     } else {
177       PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
178       for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
179       for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
180       aa = a->a;
181       PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
182       baijmkl->ai1 = ai;
183       baijmkl->aj1 = aj;
184     }
185     PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
186     PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
187     PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
188     baijmkl->sparse_optimized = PETSC_TRUE;
189   }
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
MatDuplicate_SeqBAIJMKL(Mat A,MatDuplicateOption op,Mat * M)193 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
194 {
195   Mat_SeqBAIJMKL *baijmkl;
196   Mat_SeqBAIJMKL *baijmkl_dest;
197 
198   PetscFunctionBegin;
199   PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
200   baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
201   PetscCall(PetscNew(&baijmkl_dest));
202   (*M)->spptr = (void *)baijmkl_dest;
203   PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
204   baijmkl_dest->sparse_optimized = PETSC_FALSE;
205   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
206   PetscFunctionReturn(PETSC_SUCCESS);
207 }
208 
MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)209 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
210 {
211   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
212   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
213   const PetscScalar *x;
214   PetscScalar       *y;
215 
216   PetscFunctionBegin;
217   /* If there are no nonzero entries, zero yy and return immediately. */
218   if (!a->nz) {
219     PetscCall(VecSet(yy, 0.0));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222 
223   PetscCall(VecGetArrayRead(xx, &x));
224   PetscCall(VecGetArray(yy, &y));
225 
226   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
227    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
228    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
229   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
230 
231   /* Call MKL SpMV2 executor routine to do the MatMult. */
232   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
233 
234   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
235   PetscCall(VecRestoreArrayRead(xx, &x));
236   PetscCall(VecRestoreArray(yy, &y));
237   PetscFunctionReturn(PETSC_SUCCESS);
238 }
239 
MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)240 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
241 {
242   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
243   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
244   const PetscScalar *x;
245   PetscScalar       *y;
246 
247   PetscFunctionBegin;
248   /* If there are no nonzero entries, zero yy and return immediately. */
249   if (!a->nz) {
250     PetscCall(VecSet(yy, 0.0));
251     PetscFunctionReturn(PETSC_SUCCESS);
252   }
253 
254   PetscCall(VecGetArrayRead(xx, &x));
255   PetscCall(VecGetArray(yy, &y));
256 
257   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
258    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
259    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
260   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
261 
262   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
263   PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
264 
265   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
266   PetscCall(VecRestoreArrayRead(xx, &x));
267   PetscCall(VecRestoreArray(yy, &y));
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)271 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
272 {
273   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
274   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
275   const PetscScalar *x;
276   PetscScalar       *y, *z;
277   PetscInt           m = a->mbs * A->rmap->bs;
278   PetscInt           i;
279 
280   PetscFunctionBegin;
281   /* If there are no nonzero entries, set zz = yy and return immediately. */
282   if (!a->nz) {
283     PetscCall(VecCopy(yy, zz));
284     PetscFunctionReturn(PETSC_SUCCESS);
285   }
286 
287   PetscCall(VecGetArrayRead(xx, &x));
288   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
289 
290   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
291    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
292    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
293   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
294 
295   /* Call MKL sparse BLAS routine to do the MatMult. */
296   if (zz == yy) {
297     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
298      * with alpha and beta both set to 1.0. */
299     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
300   } else {
301     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
302      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
303     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
304     for (i = 0; i < m; i++) z[i] += y[i];
305   }
306 
307   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
308   PetscCall(VecRestoreArrayRead(xx, &x));
309   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
310   PetscFunctionReturn(PETSC_SUCCESS);
311 }
312 
MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)313 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
314 {
315   Mat_SeqBAIJ       *a       = (Mat_SeqBAIJ *)A->data;
316   Mat_SeqBAIJMKL    *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
317   const PetscScalar *x;
318   PetscScalar       *y, *z;
319   PetscInt           n = a->nbs * A->rmap->bs;
320   PetscInt           i;
321   /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
322 
323   PetscFunctionBegin;
324   /* If there are no nonzero entries, set zz = yy and return immediately. */
325   if (!a->nz) {
326     PetscCall(VecCopy(yy, zz));
327     PetscFunctionReturn(PETSC_SUCCESS);
328   }
329 
330   PetscCall(VecGetArrayRead(xx, &x));
331   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
332 
333   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
334    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
335    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
336   if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
337 
338   /* Call MKL sparse BLAS routine to do the MatMult. */
339   if (zz == yy) {
340     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
341      * with alpha and beta both set to 1.0. */
342     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
343   } else {
344     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
345      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
346     PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
347     for (i = 0; i < n; i++) z[i] += y[i];
348   }
349 
350   PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
351   PetscCall(VecRestoreArrayRead(xx, &x));
352   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)356 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
357 {
358   PetscFunctionBegin;
359   PetscCall(MatScale_SeqBAIJ(inA, alpha));
360   PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
361   PetscFunctionReturn(PETSC_SUCCESS);
362 }
363 
MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)364 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
365 {
366   PetscFunctionBegin;
367   PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
368   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
369   PetscFunctionReturn(PETSC_SUCCESS);
370 }
371 
MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)372 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
373 {
374   PetscFunctionBegin;
375   PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
376   if (str == SAME_NONZERO_PATTERN) {
377     /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
378     PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
379   }
380   PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
383  * SeqBAIJMKL matrix.  This routine is called by the MatCreate_SeqMKLBAIJ()
384  * routine, but can also be used to convert an assembled SeqBAIJ matrix
385  * into a SeqBAIJMKL one. */
MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)386 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
387 {
388   Mat             B = *newmat;
389   Mat_SeqBAIJMKL *baijmkl;
390   PetscBool       sametype;
391 
392   PetscFunctionBegin;
393   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
394 
395   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
396   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
397 
398   PetscCall(PetscNew(&baijmkl));
399   B->spptr = (void *)baijmkl;
400 
401   /* Set function pointers for methods that we inherit from BAIJ but override.
402    * We also parse some command line options below, since those determine some of the methods we point to. */
403   B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
404 
405   baijmkl->sparse_optimized = PETSC_FALSE;
406 
407   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
408   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
409 
410   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
411   *newmat = B;
412   PetscFunctionReturn(PETSC_SUCCESS);
413 }
414 
MatAssemblyEnd_SeqBAIJMKL(Mat A,MatAssemblyType mode)415 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
416 {
417   PetscFunctionBegin;
418   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
419   PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
420   PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
421   A->ops->destroy          = MatDestroy_SeqBAIJMKL;
422   A->ops->mult             = MatMult_SeqBAIJMKL_SpMV2;
423   A->ops->multtranspose    = MatMultTranspose_SeqBAIJMKL_SpMV2;
424   A->ops->multadd          = MatMultAdd_SeqBAIJMKL_SpMV2;
425   A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
426   A->ops->scale            = MatScale_SeqBAIJMKL;
427   A->ops->diagonalscale    = MatDiagonalScale_SeqBAIJMKL;
428   A->ops->axpy             = MatAXPY_SeqBAIJMKL;
429   A->ops->duplicate        = MatDuplicate_SeqBAIJMKL;
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 /*@
434   MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
435   This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
436   routines from Intel MKL whenever possible.
437 
438   Input Parameters:
439 + comm - MPI communicator, set to `PETSC_COMM_SELF`
440 . bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
441           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
442 . m    - number of rows
443 . n    - number of columns
444 . nz   - number of nonzero blocks  per block row (same for all rows)
445 - nnz  - array containing the number of nonzero blocks in the various block rows
446          (possibly different for each block row) or `NULL`
447 
448   Output Parameter:
449 . A - the matrix
450 
451    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
452    MatXXXXSetPreallocation() paradigm instead of this routine directly.
453    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
454 
455   Options Database Keys:
456 + -mat_no_unroll  - uses code that does not unroll the loops in the block calculations (much slower)
457 - -mat_block_size - size of the blocks to use
458 
459   Level: intermediate
460 
461   Notes:
462   The number of rows and columns must be divisible by blocksize.
463 
464   If the `nnz` parameter is given then the `nz` parameter is ignored
465 
466   A nonzero block is any block that as 1 or more nonzeros in it
467 
468   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
469   operations are currently supported.
470   If the installed version of MKL supports the "SpMV2" sparse
471   inspector-executor routines, then those are used by default.
472   Default PETSc kernels are used otherwise.
473 
474   The `MATSEQBAIJ` format is fully compatible with standard Fortran
475   storage.  That is, the stored row and column indices can begin at
476   either one (as in Fortran) or zero.  See the users' manual for details.
477 
478   Specify the preallocated storage with either nz or nnz (not both).
479   Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
480   allocation.  See [Sparse Matrices](sec_matsparse) for details.
481   matrices.
482 
483 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
484 @*/
MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)485 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
486 {
487   PetscFunctionBegin;
488   PetscCall(MatCreate(comm, A));
489   PetscCall(MatSetSizes(*A, m, n, m, n));
490   PetscCall(MatSetType(*A, MATSEQBAIJMKL));
491   PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
492   PetscFunctionReturn(PETSC_SUCCESS);
493 }
494 
MatCreate_SeqBAIJMKL(Mat A)495 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
496 {
497   PetscFunctionBegin;
498   PetscCall(MatSetType(A, MATSEQBAIJ));
499   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502