1 /* 2 Defines basic operations for the MATSEQBAIJMKL matrix class. 3 Uses sparse BLAS operations from the Intel Math Kernel Library (MKL) 4 wherever possible. If used MKL version is older than 11.3 PETSc default 5 code for sparse matrix operations is used. 6 */ 7 8 #include <../src/mat/impls/baij/seq/baij.h> 9 #include <../src/mat/impls/baij/seq/baijmkl/baijmkl.h> /*I "petscmat.h" I*/ 10 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 11 #define MKL_ILP64 12 #endif 13 #include <mkl_spblas.h> 14 15 static PetscBool PetscSeqBAIJSupportsZeroBased(void) 16 { 17 static PetscBool set = PETSC_FALSE, value; 18 int n = 1, ia[1], ja[1]; 19 float a[1]; 20 sparse_status_t status; 21 sparse_matrix_t A; 22 23 if (!set) { 24 status = mkl_sparse_s_create_bsr(&A, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)n, (MKL_INT)n, (MKL_INT)n, (MKL_INT *)ia, (MKL_INT *)ia, (MKL_INT *)ja, a); 25 value = (status != SPARSE_STATUS_NOT_SUPPORTED) ? PETSC_TRUE : PETSC_FALSE; 26 (void)mkl_sparse_destroy(A); 27 set = PETSC_TRUE; 28 } 29 return value; 30 } 31 32 typedef struct { 33 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 34 sparse_matrix_t bsrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 35 struct matrix_descr descr; 36 PetscInt *ai1; 37 PetscInt *aj1; 38 } Mat_SeqBAIJMKL; 39 40 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode); 41 42 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJMKL_SeqBAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 43 { 44 /* This routine is only called to convert a MATBAIJMKL to its base PETSc type, */ 45 /* so we will ignore 'MatType type'. */ 46 Mat B = *newmat; 47 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 48 49 PetscFunctionBegin; 50 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 51 52 /* Reset the original function pointers. */ 53 B->ops->duplicate = MatDuplicate_SeqBAIJ; 54 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJ; 55 B->ops->destroy = MatDestroy_SeqBAIJ; 56 B->ops->multtranspose = MatMultTranspose_SeqBAIJ; 57 B->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJ; 58 B->ops->scale = MatScale_SeqBAIJ; 59 B->ops->diagonalscale = MatDiagonalScale_SeqBAIJ; 60 B->ops->axpy = MatAXPY_SeqBAIJ; 61 62 switch (A->rmap->bs) { 63 case 1: 64 B->ops->mult = MatMult_SeqBAIJ_1; 65 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 66 break; 67 case 2: 68 B->ops->mult = MatMult_SeqBAIJ_2; 69 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 70 break; 71 case 3: 72 B->ops->mult = MatMult_SeqBAIJ_3; 73 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 74 break; 75 case 4: 76 B->ops->mult = MatMult_SeqBAIJ_4; 77 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 78 break; 79 case 5: 80 B->ops->mult = MatMult_SeqBAIJ_5; 81 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 82 break; 83 case 6: 84 B->ops->mult = MatMult_SeqBAIJ_6; 85 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 86 break; 87 case 7: 88 B->ops->mult = MatMult_SeqBAIJ_7; 89 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 90 break; 91 case 11: 92 B->ops->mult = MatMult_SeqBAIJ_11; 93 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 94 break; 95 case 15: 96 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 97 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 98 break; 99 default: 100 B->ops->mult = MatMult_SeqBAIJ_N; 101 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 102 break; 103 } 104 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", NULL)); 105 106 /* Free everything in the Mat_SeqBAIJMKL data structure. Currently, this 107 * simply involves destroying the MKL sparse matrix handle and then freeing 108 * the spptr pointer. */ 109 if (reuse == MAT_INITIAL_MATRIX) baijmkl = (Mat_SeqBAIJMKL *)B->spptr; 110 111 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 112 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 113 PetscCall(PetscFree(B->spptr)); 114 115 /* Change the type of B to MATSEQBAIJ. */ 116 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 117 118 *newmat = B; 119 PetscFunctionReturn(PETSC_SUCCESS); 120 } 121 122 static PetscErrorCode MatDestroy_SeqBAIJMKL(Mat A) 123 { 124 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 125 126 PetscFunctionBegin; 127 if (baijmkl) { 128 /* Clean up everything in the Mat_SeqBAIJMKL data structure, then free A->spptr. */ 129 if (baijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, baijmkl->bsrA); 130 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 131 PetscCall(PetscFree(A->spptr)); 132 } 133 134 /* Change the type of A back to SEQBAIJ and use MatDestroy_SeqBAIJ() 135 * to destroy everything that remains. */ 136 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQBAIJ)); 137 PetscCall(MatDestroy_SeqBAIJ(A)); 138 PetscFunctionReturn(PETSC_SUCCESS); 139 } 140 141 static PetscErrorCode MatSeqBAIJMKL_create_mkl_handle(Mat A) 142 { 143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 144 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 145 PetscInt mbs, nbs, nz, bs; 146 MatScalar *aa; 147 PetscInt *aj, *ai; 148 PetscInt i; 149 150 PetscFunctionBegin; 151 if (baijmkl->sparse_optimized) { 152 /* Matrix has been previously assembled and optimized. Must destroy old 153 * matrix handle before running the optimization step again. */ 154 PetscCall(PetscFree2(baijmkl->ai1, baijmkl->aj1)); 155 PetscCallMKL(mkl_sparse_destroy(baijmkl->bsrA)); 156 } 157 baijmkl->sparse_optimized = PETSC_FALSE; 158 159 /* Now perform the SpMV2 setup and matrix optimization. */ 160 baijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 161 baijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 162 baijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 163 mbs = a->mbs; 164 nbs = a->nbs; 165 nz = a->nz; 166 bs = A->rmap->bs; 167 aa = a->a; 168 169 if ((nz != 0) & !A->structure_only) { 170 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 171 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 172 if (PetscSeqBAIJSupportsZeroBased()) { 173 aj = a->j; 174 ai = a->i; 175 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ZERO, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 176 } else { 177 PetscCall(PetscMalloc2(mbs + 1, &ai, nz, &aj)); 178 for (i = 0; i < mbs + 1; i++) ai[i] = a->i[i] + 1; 179 for (i = 0; i < nz; i++) aj[i] = a->j[i] + 1; 180 aa = a->a; 181 PetscCallMKL(mkl_sparse_x_create_bsr(&baijmkl->bsrA, SPARSE_INDEX_BASE_ONE, SPARSE_LAYOUT_COLUMN_MAJOR, (MKL_INT)mbs, (MKL_INT)nbs, (MKL_INT)bs, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa)); 182 baijmkl->ai1 = ai; 183 baijmkl->aj1 = aj; 184 } 185 PetscCallMKL(mkl_sparse_set_mv_hint(baijmkl->bsrA, SPARSE_OPERATION_NON_TRANSPOSE, baijmkl->descr, 1000)); 186 PetscCallMKL(mkl_sparse_set_memory_hint(baijmkl->bsrA, SPARSE_MEMORY_AGGRESSIVE)); 187 PetscCallMKL(mkl_sparse_optimize(baijmkl->bsrA)); 188 baijmkl->sparse_optimized = PETSC_TRUE; 189 } 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 static PetscErrorCode MatDuplicate_SeqBAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 194 { 195 Mat_SeqBAIJMKL *baijmkl; 196 Mat_SeqBAIJMKL *baijmkl_dest; 197 198 PetscFunctionBegin; 199 PetscCall(MatDuplicate_SeqBAIJ(A, op, M)); 200 baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 201 PetscCall(PetscNew(&baijmkl_dest)); 202 (*M)->spptr = (void *)baijmkl_dest; 203 PetscCall(PetscMemcpy(baijmkl_dest, baijmkl, sizeof(Mat_SeqBAIJMKL))); 204 baijmkl_dest->sparse_optimized = PETSC_FALSE; 205 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 206 PetscFunctionReturn(PETSC_SUCCESS); 207 } 208 209 static PetscErrorCode MatMult_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 210 { 211 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 212 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 213 const PetscScalar *x; 214 PetscScalar *y; 215 216 PetscFunctionBegin; 217 /* If there are no nonzero entries, zero yy and return immediately. */ 218 if (!a->nz) { 219 PetscCall(VecSet(yy, 0.0)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 PetscCall(VecGetArrayRead(xx, &x)); 224 PetscCall(VecGetArray(yy, &y)); 225 226 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 227 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 228 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 229 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 230 231 /* Call MKL SpMV2 executor routine to do the MatMult. */ 232 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 233 234 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 235 PetscCall(VecRestoreArrayRead(xx, &x)); 236 PetscCall(VecRestoreArray(yy, &y)); 237 PetscFunctionReturn(PETSC_SUCCESS); 238 } 239 240 static PetscErrorCode MatMultTranspose_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 241 { 242 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 243 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 244 const PetscScalar *x; 245 PetscScalar *y; 246 247 PetscFunctionBegin; 248 /* If there are no nonzero entries, zero yy and return immediately. */ 249 if (!a->nz) { 250 PetscCall(VecSet(yy, 0.0)); 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 PetscCall(VecGetArrayRead(xx, &x)); 255 PetscCall(VecGetArray(yy, &y)); 256 257 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 258 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 259 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 260 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 261 262 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 263 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, y)); 264 265 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz - a->nonzerorowcnt * A->rmap->bs)); 266 PetscCall(VecRestoreArrayRead(xx, &x)); 267 PetscCall(VecRestoreArray(yy, &y)); 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 static PetscErrorCode MatMultAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 272 { 273 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 274 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 275 const PetscScalar *x; 276 PetscScalar *y, *z; 277 PetscInt m = a->mbs * A->rmap->bs; 278 PetscInt i; 279 280 PetscFunctionBegin; 281 /* If there are no nonzero entries, set zz = yy and return immediately. */ 282 if (!a->nz) { 283 PetscCall(VecCopy(yy, zz)); 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 PetscCall(VecGetArrayRead(xx, &x)); 288 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 289 290 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 291 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 292 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 293 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 294 295 /* Call MKL sparse BLAS routine to do the MatMult. */ 296 if (zz == yy) { 297 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 298 * with alpha and beta both set to 1.0. */ 299 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 300 } else { 301 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 302 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 303 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_NON_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 304 for (i = 0; i < m; i++) z[i] += y[i]; 305 } 306 307 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 308 PetscCall(VecRestoreArrayRead(xx, &x)); 309 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode MatMultTransposeAdd_SeqBAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 314 { 315 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 316 Mat_SeqBAIJMKL *baijmkl = (Mat_SeqBAIJMKL *)A->spptr; 317 const PetscScalar *x; 318 PetscScalar *y, *z; 319 PetscInt n = a->nbs * A->rmap->bs; 320 PetscInt i; 321 /* Variables not in MatMultTransposeAdd_SeqBAIJ. */ 322 323 PetscFunctionBegin; 324 /* If there are no nonzero entries, set zz = yy and return immediately. */ 325 if (!a->nz) { 326 PetscCall(VecCopy(yy, zz)); 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 PetscCall(VecGetArrayRead(xx, &x)); 331 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 332 333 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 334 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 335 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 336 if (!baijmkl->sparse_optimized) PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 337 338 /* Call MKL sparse BLAS routine to do the MatMult. */ 339 if (zz == yy) { 340 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 341 * with alpha and beta both set to 1.0. */ 342 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 1.0, z)); 343 } else { 344 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 345 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 346 PetscCallMKL(mkl_sparse_x_mv(SPARSE_OPERATION_TRANSPOSE, 1.0, baijmkl->bsrA, baijmkl->descr, x, 0.0, z)); 347 for (i = 0; i < n; i++) z[i] += y[i]; 348 } 349 350 PetscCall(PetscLogFlops(2.0 * a->bs2 * a->nz)); 351 PetscCall(VecRestoreArrayRead(xx, &x)); 352 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 static PetscErrorCode MatScale_SeqBAIJMKL(Mat inA, PetscScalar alpha) 357 { 358 PetscFunctionBegin; 359 PetscCall(MatScale_SeqBAIJ(inA, alpha)); 360 PetscCall(MatSeqBAIJMKL_create_mkl_handle(inA)); 361 PetscFunctionReturn(PETSC_SUCCESS); 362 } 363 364 static PetscErrorCode MatDiagonalScale_SeqBAIJMKL(Mat A, Vec ll, Vec rr) 365 { 366 PetscFunctionBegin; 367 PetscCall(MatDiagonalScale_SeqBAIJ(A, ll, rr)); 368 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode MatAXPY_SeqBAIJMKL(Mat Y, PetscScalar a, Mat X, MatStructure str) 373 { 374 PetscFunctionBegin; 375 PetscCall(MatAXPY_SeqBAIJ(Y, a, X, str)); 376 if (str == SAME_NONZERO_PATTERN) { 377 /* MatAssemblyEnd() is not called if SAME_NONZERO_PATTERN, so we need to force update of the MKL matrix handle. */ 378 PetscCall(MatSeqBAIJMKL_create_mkl_handle(Y)); 379 } 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 /* MatConvert_SeqBAIJ_SeqBAIJMKL converts a SeqBAIJ matrix into a 383 * SeqBAIJMKL matrix. This routine is called by the MatCreate_SeqMKLBAIJ() 384 * routine, but can also be used to convert an assembled SeqBAIJ matrix 385 * into a SeqBAIJMKL one. */ 386 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 387 { 388 Mat B = *newmat; 389 Mat_SeqBAIJMKL *baijmkl; 390 PetscBool sametype; 391 392 PetscFunctionBegin; 393 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 394 395 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 396 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 397 398 PetscCall(PetscNew(&baijmkl)); 399 B->spptr = (void *)baijmkl; 400 401 /* Set function pointers for methods that we inherit from BAIJ but override. 402 * We also parse some command line options below, since those determine some of the methods we point to. */ 403 B->ops->assemblyend = MatAssemblyEnd_SeqBAIJMKL; 404 405 baijmkl->sparse_optimized = PETSC_FALSE; 406 407 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatScale_C", MatScale_SeqBAIJMKL)); 408 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaijmkl_seqbaij_C", MatConvert_SeqBAIJMKL_SeqBAIJ)); 409 410 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJMKL)); 411 *newmat = B; 412 PetscFunctionReturn(PETSC_SUCCESS); 413 } 414 415 static PetscErrorCode MatAssemblyEnd_SeqBAIJMKL(Mat A, MatAssemblyType mode) 416 { 417 PetscFunctionBegin; 418 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 419 PetscCall(MatAssemblyEnd_SeqBAIJ(A, mode)); 420 PetscCall(MatSeqBAIJMKL_create_mkl_handle(A)); 421 A->ops->destroy = MatDestroy_SeqBAIJMKL; 422 A->ops->mult = MatMult_SeqBAIJMKL_SpMV2; 423 A->ops->multtranspose = MatMultTranspose_SeqBAIJMKL_SpMV2; 424 A->ops->multadd = MatMultAdd_SeqBAIJMKL_SpMV2; 425 A->ops->multtransposeadd = MatMultTransposeAdd_SeqBAIJMKL_SpMV2; 426 A->ops->scale = MatScale_SeqBAIJMKL; 427 A->ops->diagonalscale = MatDiagonalScale_SeqBAIJMKL; 428 A->ops->axpy = MatAXPY_SeqBAIJMKL; 429 A->ops->duplicate = MatDuplicate_SeqBAIJMKL; 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 /*@ 434 MatCreateSeqBAIJMKL - Creates a sparse matrix of type `MATSEQBAIJMKL`. 435 This type inherits from `MATSEQBAIJ` and is largely identical, but uses sparse BLAS 436 routines from Intel MKL whenever possible. 437 438 Input Parameters: 439 + comm - MPI communicator, set to `PETSC_COMM_SELF` 440 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 441 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 442 . m - number of rows 443 . n - number of columns 444 . nz - number of nonzero blocks per block row (same for all rows) 445 - nnz - array containing the number of nonzero blocks in the various block rows 446 (possibly different for each block row) or `NULL` 447 448 Output Parameter: 449 . A - the matrix 450 451 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 452 MatXXXXSetPreallocation() paradigm instead of this routine directly. 453 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 454 455 Options Database Keys: 456 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 457 - -mat_block_size - size of the blocks to use 458 459 Level: intermediate 460 461 Notes: 462 The number of rows and columns must be divisible by blocksize. 463 464 If the `nnz` parameter is given then the `nz` parameter is ignored 465 466 A nonzero block is any block that as 1 or more nonzeros in it 467 468 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()` 469 operations are currently supported. 470 If the installed version of MKL supports the "SpMV2" sparse 471 inspector-executor routines, then those are used by default. 472 Default PETSc kernels are used otherwise. 473 474 The `MATSEQBAIJ` format is fully compatible with standard Fortran 475 storage. That is, the stored row and column indices can begin at 476 either one (as in Fortran) or zero. See the users' manual for details. 477 478 Specify the preallocated storage with either nz or nnz (not both). 479 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 480 allocation. See [Sparse Matrices](sec_matsparse) for details. 481 matrices. 482 483 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 484 @*/ 485 PetscErrorCode MatCreateSeqBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 486 { 487 PetscFunctionBegin; 488 PetscCall(MatCreate(comm, A)); 489 PetscCall(MatSetSizes(*A, m, n, m, n)); 490 PetscCall(MatSetType(*A, MATSEQBAIJMKL)); 491 PetscCall(MatSeqBAIJSetPreallocation_SeqBAIJ(*A, bs, nz, (PetscInt *)nnz)); 492 PetscFunctionReturn(PETSC_SUCCESS); 493 } 494 495 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJMKL(Mat A) 496 { 497 PetscFunctionBegin; 498 PetscCall(MatSetType(A, MATSEQBAIJ)); 499 PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &A)); 500 PetscFunctionReturn(PETSC_SUCCESS); 501 } 502