1 /*
2 Defines basic operations for the MATSEQBAIJMKL matrix class.
3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL)
4 wherever possible. If used MKL version is older than 11.3 PETSc default
5 code for sparse matrix operations is used.
6 */
7
8 #include <../src/mat/impls/baij/seq/baij.h>
9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> /*I "petscmat.h" I*/
10 #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
11 #define MKL_ILP64
12 #endif
13 #include <mkl_spblas.h>
14
PetscSeqBAIJSupportsZeroBased(void)15 static PetscBool PetscSeqBAIJSupportsZeroBased(void)
16 {
17 static PetscBool set = PETSC_FALSE, value;
18 int n = 1, ia[1], ja[1];
19 float a[1];
20 sparse_status_t status;
21 sparse_matrix_t A;
22
23 if (!set) {
24 status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a);
25 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE;
26 (void)mkl_sparse_destroy(A);
27 set = PETSC_TRUE;
28 }
29 return value;
30 }
31
32 typedef struct {
33 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
34 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */
35 struct matrix_descr descr;
36 PetscInt *ai1;
37 PetscInt *aj1;
38 } Mat_SeqBAIJMKL;
39
40 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode);
41
MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A,MatType type,MatReuse reuse,Mat * newmat)42 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
43 {
44 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */
45 /* so we will ignore 'MatType type'. */
46 Mat B = *newmat;
47 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
48
49 PetscFunctionBegin;
50 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
51
52 /* Reset the original function pointers. */
53 B->ops->duplicate = MatDuplicate_SeqBAIJ;
54 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ;
55 B->ops->destroy = MatDestroy_SeqBAIJ;
56 B->ops->multtranspose = MatMultTranspose_SeqBAIJ;
57 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ;
58 B->ops->scale = MatScale_SeqBAIJ;
59 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ;
60 B->ops->axpy = MatAXPY_SeqBAIJ;
61
62 switch (A->rmap->bs) {
63 case 1:
64 B->ops->mult = MatMult_SeqBAIJ_1;
65 B->ops->multadd = MatMultAdd_SeqBAIJ_1;
66 break;
67 case 2:
68 B->ops->mult = MatMult_SeqBAIJ_2;
69 B->ops->multadd = MatMultAdd_SeqBAIJ_2;
70 break;
71 case 3:
72 B->ops->mult = MatMult_SeqBAIJ_3;
73 B->ops->multadd = MatMultAdd_SeqBAIJ_3;
74 break;
75 case 4:
76 B->ops->mult = MatMult_SeqBAIJ_4;
77 B->ops->multadd = MatMultAdd_SeqBAIJ_4;
78 break;
79 case 5:
80 B->ops->mult = MatMult_SeqBAIJ_5;
81 B->ops->multadd = MatMultAdd_SeqBAIJ_5;
82 break;
83 case 6:
84 B->ops->mult = MatMult_SeqBAIJ_6;
85 B->ops->multadd = MatMultAdd_SeqBAIJ_6;
86 break;
87 case 7:
88 B->ops->mult = MatMult_SeqBAIJ_7;
89 B->ops->multadd = MatMultAdd_SeqBAIJ_7;
90 break;
91 case 11:
92 B->ops->mult = MatMult_SeqBAIJ_11;
93 B->ops->multadd = MatMultAdd_SeqBAIJ_11;
94 break;
95 case 15:
96 B->ops->mult = MatMult_SeqBAIJ_15_ver1;
97 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
98 break;
99 default:
100 B->ops->mult = MatMult_SeqBAIJ_N;
101 B->ops->multadd = MatMultAdd_SeqBAIJ_N;
102 break;
103 }
104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL));
105
106 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this
107 * simply involves destroying the MKL sparse matrix handle and then freeing
108 * the spptr pointer. */
109 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr;
110
111 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
112 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
113 PetscCall(PetscFree(B->spptr));
114
115 /* Change the type of B to MATSEQBAIJ. */
116 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
117
118 *newmat = B;
119 PetscFunctionReturn(PETSC_SUCCESS);
120 }
121
MatDestroy_SeqBAIJMKL(Mat A)122 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A)
123 {
124 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
125
126 PetscFunctionBegin;
127 if (baijmkl) {
128 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */
129 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA);
130 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
131 PetscCall(PetscFree(A->spptr));
132 }
133
134 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ()
135 * to destroy everything that remains. */
136 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ));
137 PetscCall(MatDestroy_SeqBAIJ(A));
138 PetscFunctionReturn(PETSC_SUCCESS);
139 }
140
MatSeqBAIJMKL_create_mkl_handle(Mat A)141 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A)
142 {
143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
144 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
145 PetscInt mbs, nbs, nz, bs;
146 MatScalar *aa;
147 PetscInt *aj, *ai;
148 PetscInt i;
149
150 PetscFunctionBegin;
151 if (baijmkl->sparse_optimized) {
152 /* Matrix has been previously assembled and optimized. Must destroy old
153 * matrix handle before running the optimization step again. */
154 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1));
155 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA));
156 }
157 baijmkl->sparse_optimized = PETSC_FALSE;
158
159 /* Now perform the SpMV2 setup and matrix optimization. */
160 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
161 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
162 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
163 mbs = a->mbs;
164 nbs = a->nbs;
165 nz = a->nz;
166 bs = A->rmap->bs;
167 aa = a->a;
168
169 if ((nz != 0) & !A->structure_only) {
170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
172 if (PetscSeqBAIJSupportsZeroBased()) {
173 aj = a->j;
174 ai = a->i;
175 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
176 } else {
177 PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj));
178 for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1;
179 for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1;
180 aa = a->a;
181 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa));
182 baijmkl->ai1 = ai;
183 baijmkl->aj1 = aj;
184 }
185 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000));
186 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE));
187 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA));
188 baijmkl->sparse_optimized = PETSC_TRUE;
189 }
190 PetscFunctionReturn(PETSC_SUCCESS);
191 }
192
MatDuplicate_SeqBAIJMKL(Mat A,MatDuplicateOption op,Mat * M)193 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
194 {
195 Mat_SeqBAIJMKL *baijmkl;
196 Mat_SeqBAIJMKL *baijmkl_dest;
197
198 PetscFunctionBegin;
199 PetscCall(MatDuplicate_SeqBAIJ(A, op, M));
200 baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
201 PetscCall(PetscNew(&baijmkl_dest));
202 (*M)->spptr = (void *)baijmkl_dest;
203 PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL)));
204 baijmkl_dest->sparse_optimized = PETSC_FALSE;
205 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
206 PetscFunctionReturn(PETSC_SUCCESS);
207 }
208
MatMult_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)209 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
210 {
211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
212 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
213 const PetscScalar *x;
214 PetscScalar *y;
215
216 PetscFunctionBegin;
217 /* If there are no nonzero entries, zero yy and return immediately. */
218 if (!a->nz) {
219 PetscCall(VecSet(yy, 0.0));
220 PetscFunctionReturn(PETSC_SUCCESS);
221 }
222
223 PetscCall(VecGetArrayRead(xx, &x));
224 PetscCall(VecGetArray(yy, &y));
225
226 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
227 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
228 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
229 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
230
231 /* Call MKL SpMV2 executor routine to do the MatMult. */
232 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
233
234 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
235 PetscCall(VecRestoreArrayRead(xx, &x));
236 PetscCall(VecRestoreArray(yy, &y));
237 PetscFunctionReturn(PETSC_SUCCESS);
238 }
239
MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy)240 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
241 {
242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
243 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
244 const PetscScalar *x;
245 PetscScalar *y;
246
247 PetscFunctionBegin;
248 /* If there are no nonzero entries, zero yy and return immediately. */
249 if (!a->nz) {
250 PetscCall(VecSet(yy, 0.0));
251 PetscFunctionReturn(PETSC_SUCCESS);
252 }
253
254 PetscCall(VecGetArrayRead(xx, &x));
255 PetscCall(VecGetArray(yy, &y));
256
257 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
258 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
259 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
260 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
261
262 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
263 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y));
264
265 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs));
266 PetscCall(VecRestoreArrayRead(xx, &x));
267 PetscCall(VecRestoreArray(yy, &y));
268 PetscFunctionReturn(PETSC_SUCCESS);
269 }
270
MatMultAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)271 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
272 {
273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
274 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
275 const PetscScalar *x;
276 PetscScalar *y, *z;
277 PetscInt m = a->mbs * A->rmap->bs;
278 PetscInt i;
279
280 PetscFunctionBegin;
281 /* If there are no nonzero entries, set zz = yy and return immediately. */
282 if (!a->nz) {
283 PetscCall(VecCopy(yy, zz));
284 PetscFunctionReturn(PETSC_SUCCESS);
285 }
286
287 PetscCall(VecGetArrayRead(xx, &x));
288 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
289
290 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
291 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
292 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
293 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
294
295 /* Call MKL sparse BLAS routine to do the MatMult. */
296 if (zz == yy) {
297 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
298 * with alpha and beta both set to 1.0. */
299 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
300 } else {
301 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
302 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
303 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
304 for (i = 0; i < m; i++) z[i] += y[i];
305 }
306
307 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
308 PetscCall(VecRestoreArrayRead(xx, &x));
309 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
310 PetscFunctionReturn(PETSC_SUCCESS);
311 }
312
MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A,Vec xx,Vec yy,Vec zz)313 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
314 {
315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
316 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr;
317 const PetscScalar *x;
318 PetscScalar *y, *z;
319 PetscInt n = a->nbs * A->rmap->bs;
320 PetscInt i;
321 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */
322
323 PetscFunctionBegin;
324 /* If there are no nonzero entries, set zz = yy and return immediately. */
325 if (!a->nz) {
326 PetscCall(VecCopy(yy, zz));
327 PetscFunctionReturn(PETSC_SUCCESS);
328 }
329
330 PetscCall(VecGetArrayRead(xx, &x));
331 PetscCall(VecGetArrayPair(yy, zz, &y, &z));
332
333 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
334 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
335 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
336 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
337
338 /* Call MKL sparse BLAS routine to do the MatMult. */
339 if (zz == yy) {
340 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
341 * with alpha and beta both set to 1.0. */
342 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z));
343 } else {
344 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
345 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
346 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z));
347 for (i = 0; i < n; i++) z[i] += y[i];
348 }
349
350 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz));
351 PetscCall(VecRestoreArrayRead(xx, &x));
352 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
353 PetscFunctionReturn(PETSC_SUCCESS);
354 }
355
MatScale_SeqBAIJMKL(Mat inA,PetscScalar alpha)356 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha)
357 {
358 PetscFunctionBegin;
359 PetscCall(MatScale_SeqBAIJ(inA, alpha));
360 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA));
361 PetscFunctionReturn(PETSC_SUCCESS);
362 }
363
MatDiagonalScale_SeqBAIJMKL(Mat A,Vec ll,Vec rr)364 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr)
365 {
366 PetscFunctionBegin;
367 PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr));
368 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
369 PetscFunctionReturn(PETSC_SUCCESS);
370 }
371
MatAXPY_SeqBAIJMKL(Mat Y,PetscScalar a,Mat X,MatStructure str)372 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str)
373 {
374 PetscFunctionBegin;
375 PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str));
376 if (str == SAME_NONZERO_PATTERN) {
377 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */
378 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y));
379 }
380 PetscFunctionReturn(PETSC_SUCCESS);
381 }
382 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a
383 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ()
384 * routine, but can also be used to convert an assembled SeqBAIJ matrix
385 * into a SeqBAIJMKL one. */
MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)386 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
387 {
388 Mat B = *newmat;
389 Mat_SeqBAIJMKL *baijmkl;
390 PetscBool sametype;
391
392 PetscFunctionBegin;
393 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
394
395 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
396 if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
397
398 PetscCall(PetscNew(&baijmkl));
399 B->spptr = (void *)baijmkl;
400
401 /* Set function pointers for methods that we inherit from BAIJ but override.
402 * We also parse some command line options below, since those determine some of the methods we point to. */
403 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL;
404
405 baijmkl->sparse_optimized = PETSC_FALSE;
406
407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL));
408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ));
409
410 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL));
411 *newmat = B;
412 PetscFunctionReturn(PETSC_SUCCESS);
413 }
414
MatAssemblyEnd_SeqBAIJMKL(Mat A,MatAssemblyType mode)415 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode)
416 {
417 PetscFunctionBegin;
418 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
419 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode));
420 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A));
421 A->ops->destroy = MatDestroy_SeqBAIJMKL;
422 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2;
423 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2;
424 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2;
425 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2;
426 A->ops->scale = MatScale_SeqBAIJMKL;
427 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL;
428 A->ops->axpy = MatAXPY_SeqBAIJMKL;
429 A->ops->duplicate = MatDuplicate_SeqBAIJMKL;
430 PetscFunctionReturn(PETSC_SUCCESS);
431 }
432
433 /*@
434 MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`.
435 This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS
436 routines from Intel MKL whenever possible.
437
438 Input Parameters:
439 + comm - MPI communicator, set to `PETSC_COMM_SELF`
440 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
441 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
442 . m - number of rows
443 . n - number of columns
444 . nz - number of nonzero blocks per block row (same for all rows)
445 - nnz - array containing the number of nonzero blocks in the various block rows
446 (possibly different for each block row) or `NULL`
447
448 Output Parameter:
449 . A - the matrix
450
451 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
452 MatXXXXSetPreallocation() paradigm instead of this routine directly.
453 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
454
455 Options Database Keys:
456 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
457 - -mat_block_size - size of the blocks to use
458
459 Level: intermediate
460
461 Notes:
462 The number of rows and columns must be divisible by blocksize.
463
464 If the `nnz` parameter is given then the `nz` parameter is ignored
465
466 A nonzero block is any block that as 1 or more nonzeros in it
467
468 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
469 operations are currently supported.
470 If the installed version of MKL supports the "SpMV2" sparse
471 inspector-executor routines, then those are used by default.
472 Default PETSc kernels are used otherwise.
473
474 The `MATSEQBAIJ` format is fully compatible with standard Fortran
475 storage. That is, the stored row and column indices can begin at
476 either one (as in Fortran) or zero. See the users' manual for details.
477
478 Specify the preallocated storage with either nz or nnz (not both).
479 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
480 allocation. See [Sparse Matrices](sec_matsparse) for details.
481 matrices.
482
483 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
484 @*/
MatCreateSeqBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat * A)485 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
486 {
487 PetscFunctionBegin;
488 PetscCall(MatCreate(comm, A));
489 PetscCall(MatSetSizes(*A, m, n, m, n));
490 PetscCall(MatSetType(*A, MATSEQBAIJMKL));
491 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz));
492 PetscFunctionReturn(PETSC_SUCCESS);
493 }
494
MatCreate_SeqBAIJMKL(Mat A)495 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A)
496 {
497 PetscFunctionBegin;
498 PetscCall(MatSetType(A, MATSEQBAIJ));
499 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A));
500 PetscFunctionReturn(PETSC_SUCCESS);
501 }
502