1 /* 2 Defines basic operations for the MATSEQAIJMKL matrix class. 3 This class is derived from the MATSEQAIJ class and retains the 4 compressed row storage (aka Yale sparse matrix format) but uses 5 sparse BLAS operations from the Intel Math Kernel Library (MKL) 6 wherever possible. 7 */ 8 9 #include <../src/mat/impls/aij/seq/aij.h> 10 #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h> 11 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 12 #define MKL_ILP64 13 #endif 14 #include <mkl_spblas.h> 15 16 typedef struct { 17 PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */ 18 PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */ 19 PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */ 20 PetscObjectState state; 21 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 22 sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */ 23 struct matrix_descr descr; 24 #endif 25 } Mat_SeqAIJMKL; 26 27 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat) 28 { 29 /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */ 30 /* so we will ignore 'MatType type'. */ 31 Mat B = *newmat; 32 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 33 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 34 #endif 35 36 PetscFunctionBegin; 37 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 38 39 /* Reset the original function pointers. */ 40 B->ops->duplicate = MatDuplicate_SeqAIJ; 41 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ; 42 B->ops->destroy = MatDestroy_SeqAIJ; 43 B->ops->mult = MatMult_SeqAIJ; 44 B->ops->multtranspose = MatMultTranspose_SeqAIJ; 45 B->ops->multadd = MatMultAdd_SeqAIJ; 46 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ; 47 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ; 48 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ; 49 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ; 50 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ; 51 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ; 52 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ; 53 54 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL)); 55 56 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 57 /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this 58 * simply involves destroying the MKL sparse matrix handle and then freeing 59 * the spptr pointer. */ 60 if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr; 61 62 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 63 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 64 PetscCall(PetscFree(B->spptr)); 65 66 /* Change the type of B to MATSEQAIJ. */ 67 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ)); 68 69 *newmat = B; 70 PetscFunctionReturn(PETSC_SUCCESS); 71 } 72 73 static PetscErrorCode MatDestroy_SeqAIJMKL(Mat A) 74 { 75 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 76 77 PetscFunctionBegin; 78 /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */ 79 if (aijmkl) { 80 /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */ 81 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 82 if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 83 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 84 PetscCall(PetscFree(A->spptr)); 85 } 86 87 /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 88 * to destroy everything that remains. */ 89 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ)); 90 /* I don't call MatSetType(). I believe this is because that 91 * is only to be called when *building* a matrix. I could be wrong, but 92 * that is how things work for the SuperLU matrix class. */ 93 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL)); 94 PetscCall(MatDestroy_SeqAIJ(A)); 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it, 99 * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize(). 100 * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix 101 * handle, creates a new one, and then calls mkl_sparse_optimize(). 102 * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been 103 * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of 104 * an unoptimized matrix handle here. */ 105 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A) 106 { 107 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 108 /* If the MKL library does not have mkl_sparse_optimize(), then this routine 109 * does nothing. We make it callable anyway in this case because it cuts 110 * down on littering the code with #ifdefs. */ 111 PetscFunctionBegin; 112 PetscFunctionReturn(PETSC_SUCCESS); 113 #else 114 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 115 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 116 PetscInt m, n; 117 MatScalar *aa; 118 PetscInt *aj, *ai; 119 120 PetscFunctionBegin; 121 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 122 /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2 123 * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must 124 * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling 125 * mkl_sparse_optimize() later. */ 126 if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS); 127 #endif 128 129 if (aijmkl->sparse_optimized) { 130 /* Matrix has been previously assembled and optimized. Must destroy old 131 * matrix handle before running the optimization step again. */ 132 PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA); 133 } 134 aijmkl->sparse_optimized = PETSC_FALSE; 135 136 /* Now perform the SpMV2 setup and matrix optimization. */ 137 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 138 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 139 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 140 m = A->rmap->n; 141 n = A->cmap->n; 142 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 143 aa = a->a; /* Nonzero elements stored row-by-row. */ 144 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 145 if (a->nz && aa && !A->structure_only) { 146 /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries. 147 * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */ 148 PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa); 149 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 150 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 151 if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA); 152 aijmkl->sparse_optimized = PETSC_TRUE; 153 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 154 } else { 155 aijmkl->csrA = NULL; 156 } 157 PetscFunctionReturn(PETSC_SUCCESS); 158 #endif 159 } 160 161 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 162 /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */ 163 static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A) 164 { 165 sparse_index_base_t indexing; 166 PetscInt m, n; 167 PetscInt *aj, *ai, *dummy; 168 MatScalar *aa; 169 Mat_SeqAIJMKL *aijmkl; 170 171 PetscFunctionBegin; 172 if (csrA) { 173 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 174 PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 175 PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()"); 176 } else { 177 aj = ai = NULL; 178 aa = NULL; 179 } 180 181 PetscCall(MatSetType(A, MATSEQAIJ)); 182 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols)); 183 /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported 184 * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and 185 * they will be destroyed when the MKL handle is destroyed. 186 * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */ 187 if (csrA) { 188 PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL)); 189 } else { 190 /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */ 191 PetscCall(MatSetUp(A)); 192 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 193 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 194 } 195 196 /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle. 197 * Now turn it into a MATSEQAIJMKL. */ 198 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 199 200 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 201 aijmkl->csrA = csrA; 202 203 /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but 204 * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one, 205 * and just need to be able to run the MKL optimization step. */ 206 aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL; 207 aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER; 208 aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT; 209 if (csrA) { 210 PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000); 211 PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE); 212 } 213 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 217 218 /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle. 219 * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in 220 * MatMatMultNumeric(). */ 221 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 222 static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A) 223 { 224 PetscInt i; 225 PetscInt nrows, ncols; 226 PetscInt nz; 227 PetscInt *ai, *aj, *dummy; 228 PetscScalar *aa; 229 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 230 sparse_index_base_t indexing; 231 232 PetscFunctionBegin; 233 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 234 if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS); 235 236 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 237 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 238 239 /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc 240 * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */ 241 for (i = 0; i < nrows; i++) { 242 nz = ai[i + 1] - ai[i]; 243 PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES)); 244 } 245 246 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 247 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 248 249 PetscCall(PetscObjectStateGet((PetscObject)A, &aijmkl->state)); 250 /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation. 251 * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed 252 * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */ 253 aijmkl->sparse_optimized = PETSC_FALSE; 254 PetscFunctionReturn(PETSC_SUCCESS); 255 } 256 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 257 258 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 259 PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer) 260 { 261 PetscInt i, j, k; 262 PetscInt nrows, ncols; 263 PetscInt nz; 264 PetscInt *ai, *aj, *dummy; 265 PetscScalar *aa; 266 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 267 sparse_index_base_t indexing; 268 269 PetscFunctionBegin; 270 PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n")); 271 272 /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */ 273 if (!aijmkl->csrA) { 274 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n")); 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */ 279 PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa); 280 281 k = 0; 282 for (i = 0; i < nrows; i++) { 283 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i)); 284 nz = ai[i + 1] - ai[i]; 285 for (j = 0; j < nz; j++) { 286 if (aa) { 287 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k]))); 288 } else { 289 PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k])); 290 } 291 k++; 292 } 293 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 294 } 295 PetscFunctionReturn(PETSC_SUCCESS); 296 } 297 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 298 299 static PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M) 300 { 301 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 302 Mat_SeqAIJMKL *aijmkl_dest; 303 304 PetscFunctionBegin; 305 PetscCall(MatDuplicate_SeqAIJ(A, op, M)); 306 aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr; 307 PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1)); 308 aijmkl_dest->sparse_optimized = PETSC_FALSE; 309 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 310 PetscFunctionReturn(PETSC_SUCCESS); 311 } 312 313 static PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode) 314 { 315 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 316 Mat_SeqAIJMKL *aijmkl; 317 318 PetscFunctionBegin; 319 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 320 321 /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some 322 * extra information and some different methods, call the AssemblyEnd 323 * routine for a MATSEQAIJ. 324 * I'm not sure if this is the best way to do this, but it avoids 325 * a lot of code duplication. */ 326 a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */ 327 PetscCall(MatAssemblyEnd_SeqAIJ(A, mode)); 328 329 /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks). 330 * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */ 331 aijmkl = (Mat_SeqAIJMKL *)A->spptr; 332 if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 337 static PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy) 338 { 339 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 340 const PetscScalar *x; 341 PetscScalar *y; 342 const MatScalar *aa; 343 PetscInt m = A->rmap->n; 344 PetscInt n = A->cmap->n; 345 PetscScalar alpha = 1.0; 346 PetscScalar beta = 0.0; 347 const PetscInt *aj, *ai; 348 char matdescra[6]; 349 350 /* Variables not in MatMult_SeqAIJ. */ 351 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 352 353 PetscFunctionBegin; 354 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 355 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 356 PetscCall(VecGetArrayRead(xx, &x)); 357 PetscCall(VecGetArray(yy, &y)); 358 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 359 aa = a->a; /* Nonzero elements stored row-by-row. */ 360 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 361 362 /* Call MKL sparse BLAS routine to do the MatMult. */ 363 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 364 365 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 366 PetscCall(VecRestoreArrayRead(xx, &x)); 367 PetscCall(VecRestoreArray(yy, &y)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 #endif 371 372 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 373 PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 374 { 375 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 376 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 377 const PetscScalar *x; 378 PetscScalar *y; 379 PetscObjectState state; 380 381 PetscFunctionBegin; 382 /* If there are no nonzero entries, zero yy and return immediately. */ 383 if (!a->nz) { 384 PetscCall(VecGetArray(yy, &y)); 385 PetscCall(PetscArrayzero(y, A->rmap->n)); 386 PetscCall(VecRestoreArray(yy, &y)); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 PetscCall(VecGetArrayRead(xx, &x)); 391 PetscCall(VecGetArray(yy, &y)); 392 393 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 394 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 395 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 396 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 397 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 398 399 /* Call MKL SpMV2 executor routine to do the MatMult. */ 400 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 401 402 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 403 PetscCall(VecRestoreArrayRead(xx, &x)); 404 PetscCall(VecRestoreArray(yy, &y)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 408 409 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 410 static PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy) 411 { 412 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 413 const PetscScalar *x; 414 PetscScalar *y; 415 const MatScalar *aa; 416 PetscInt m = A->rmap->n; 417 PetscInt n = A->cmap->n; 418 PetscScalar alpha = 1.0; 419 PetscScalar beta = 0.0; 420 const PetscInt *aj, *ai; 421 char matdescra[6]; 422 423 /* Variables not in MatMultTranspose_SeqAIJ. */ 424 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 425 426 PetscFunctionBegin; 427 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 428 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 429 PetscCall(VecGetArrayRead(xx, &x)); 430 PetscCall(VecGetArray(yy, &y)); 431 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 432 aa = a->a; /* Nonzero elements stored row-by-row. */ 433 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 434 435 /* Call MKL sparse BLAS routine to do the MatMult. */ 436 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y); 437 438 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 439 PetscCall(VecRestoreArrayRead(xx, &x)); 440 PetscCall(VecRestoreArray(yy, &y)); 441 PetscFunctionReturn(PETSC_SUCCESS); 442 } 443 #endif 444 445 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 446 PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy) 447 { 448 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 449 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 450 const PetscScalar *x; 451 PetscScalar *y; 452 PetscObjectState state; 453 454 PetscFunctionBegin; 455 /* If there are no nonzero entries, zero yy and return immediately. */ 456 if (!a->nz) { 457 PetscCall(VecGetArray(yy, &y)); 458 PetscCall(PetscArrayzero(y, A->cmap->n)); 459 PetscCall(VecRestoreArray(yy, &y)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 PetscCall(VecGetArrayRead(xx, &x)); 464 PetscCall(VecGetArray(yy, &y)); 465 466 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 467 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 468 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 469 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 470 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 471 472 /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */ 473 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y); 474 475 PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt)); 476 PetscCall(VecRestoreArrayRead(xx, &x)); 477 PetscCall(VecRestoreArray(yy, &y)); 478 PetscFunctionReturn(PETSC_SUCCESS); 479 } 480 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 481 482 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 483 static PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 484 { 485 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 486 const PetscScalar *x; 487 PetscScalar *y, *z; 488 const MatScalar *aa; 489 PetscInt m = A->rmap->n; 490 PetscInt n = A->cmap->n; 491 const PetscInt *aj, *ai; 492 PetscInt i; 493 494 /* Variables not in MatMultAdd_SeqAIJ. */ 495 char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */ 496 PetscScalar alpha = 1.0; 497 PetscScalar beta; 498 char matdescra[6]; 499 500 PetscFunctionBegin; 501 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 502 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 503 504 PetscCall(VecGetArrayRead(xx, &x)); 505 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 506 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 507 aa = a->a; /* Nonzero elements stored row-by-row. */ 508 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 509 510 /* Call MKL sparse BLAS routine to do the MatMult. */ 511 if (zz == yy) { 512 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 513 beta = 1.0; 514 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 515 } else { 516 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 517 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 518 beta = 0.0; 519 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 520 for (i = 0; i < m; i++) z[i] += y[i]; 521 } 522 523 PetscCall(PetscLogFlops(2.0 * a->nz)); 524 PetscCall(VecRestoreArrayRead(xx, &x)); 525 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 #endif 529 530 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 531 PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 532 { 533 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 534 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 535 const PetscScalar *x; 536 PetscScalar *y, *z; 537 PetscInt m = A->rmap->n; 538 PetscInt i; 539 540 /* Variables not in MatMultAdd_SeqAIJ. */ 541 PetscObjectState state; 542 543 PetscFunctionBegin; 544 /* If there are no nonzero entries, set zz = yy and return immediately. */ 545 if (!a->nz) { 546 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 547 PetscCall(PetscArraycpy(z, y, m)); 548 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 549 PetscFunctionReturn(PETSC_SUCCESS); 550 } 551 552 PetscCall(VecGetArrayRead(xx, &x)); 553 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 554 555 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 556 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 557 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 558 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 559 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 560 561 /* Call MKL sparse BLAS routine to do the MatMult. */ 562 if (zz == yy) { 563 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 564 * with alpha and beta both set to 1.0. */ 565 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 566 } else { 567 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 568 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 569 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 570 for (i = 0; i < m; i++) z[i] += y[i]; 571 } 572 573 PetscCall(PetscLogFlops(2.0 * a->nz)); 574 PetscCall(VecRestoreArrayRead(xx, &x)); 575 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 579 580 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 581 static PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz) 582 { 583 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 584 const PetscScalar *x; 585 PetscScalar *y, *z; 586 const MatScalar *aa; 587 PetscInt m = A->rmap->n; 588 PetscInt n = A->cmap->n; 589 const PetscInt *aj, *ai; 590 PetscInt i; 591 592 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 593 char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */ 594 PetscScalar alpha = 1.0; 595 PetscScalar beta; 596 char matdescra[6]; 597 598 PetscFunctionBegin; 599 matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */ 600 matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */ 601 602 PetscCall(VecGetArrayRead(xx, &x)); 603 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 604 aj = a->j; /* aj[k] gives column index for element aa[k]. */ 605 aa = a->a; /* Nonzero elements stored row-by-row. */ 606 ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */ 607 608 /* Call MKL sparse BLAS routine to do the MatMult. */ 609 if (zz == yy) { 610 /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */ 611 beta = 1.0; 612 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 613 } else { 614 /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z. 615 * MKL sparse BLAS does not have a MatMultAdd equivalent. */ 616 beta = 0.0; 617 mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z); 618 for (i = 0; i < n; i++) z[i] += y[i]; 619 } 620 621 PetscCall(PetscLogFlops(2.0 * a->nz)); 622 PetscCall(VecRestoreArrayRead(xx, &x)); 623 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 624 PetscFunctionReturn(PETSC_SUCCESS); 625 } 626 #endif 627 628 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 629 PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz) 630 { 631 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 632 Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr; 633 const PetscScalar *x; 634 PetscScalar *y, *z; 635 PetscInt n = A->cmap->n; 636 PetscInt i; 637 PetscObjectState state; 638 639 /* Variables not in MatMultTransposeAdd_SeqAIJ. */ 640 641 PetscFunctionBegin; 642 /* If there are no nonzero entries, set zz = yy and return immediately. */ 643 if (!a->nz) { 644 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 645 PetscCall(PetscArraycpy(z, y, n)); 646 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 PetscCall(VecGetArrayRead(xx, &x)); 651 PetscCall(VecGetArrayPair(yy, zz, &y, &z)); 652 653 /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call 654 * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably 655 * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */ 656 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 657 if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 658 659 /* Call MKL sparse BLAS routine to do the MatMult. */ 660 if (zz == yy) { 661 /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y, 662 * with alpha and beta both set to 1.0. */ 663 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z); 664 } else { 665 /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then 666 * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */ 667 PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z); 668 for (i = 0; i < n; i++) z[i] += y[i]; 669 } 670 671 PetscCall(PetscLogFlops(2.0 * a->nz)); 672 PetscCall(VecRestoreArrayRead(xx, &x)); 673 PetscCall(VecRestoreArrayPair(yy, zz, &y, &z)); 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 677 678 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 679 static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 680 { 681 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr; 682 sparse_matrix_t csrA, csrB, csrC; 683 PetscInt nrows, ncols; 684 struct matrix_descr descr_type_gen; 685 PetscObjectState state; 686 687 PetscFunctionBegin; 688 /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does 689 * not handle sparse matrices with zero rows or columns. */ 690 if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N; 691 else nrows = A->cmap->N; 692 if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N; 693 else ncols = B->rmap->N; 694 695 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 696 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 697 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 698 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 699 csrA = a->csrA; 700 csrB = b->csrA; 701 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 702 703 if (csrA && csrB) { 704 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC); 705 } else { 706 csrC = NULL; 707 } 708 709 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C)); 710 PetscFunctionReturn(PETSC_SUCCESS); 711 } 712 713 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C) 714 { 715 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 716 sparse_matrix_t csrA, csrB, csrC; 717 struct matrix_descr descr_type_gen; 718 PetscObjectState state; 719 720 PetscFunctionBegin; 721 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 722 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 723 PetscCall(PetscObjectStateGet((PetscObject)B, &state)); 724 if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B)); 725 csrA = a->csrA; 726 csrB = b->csrA; 727 csrC = c->csrA; 728 descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL; 729 730 if (csrA && csrB) { 731 PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC); 732 } else { 733 csrC = NULL; 734 } 735 736 /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */ 737 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 738 PetscFunctionReturn(PETSC_SUCCESS); 739 } 740 741 PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 742 { 743 PetscFunctionBegin; 744 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 745 PetscFunctionReturn(PETSC_SUCCESS); 746 } 747 748 PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 749 { 750 PetscFunctionBegin; 751 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 752 PetscFunctionReturn(PETSC_SUCCESS); 753 } 754 755 PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 756 { 757 PetscFunctionBegin; 758 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 763 { 764 PetscFunctionBegin; 765 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C)); 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C) 770 { 771 PetscFunctionBegin; 772 PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C) 777 { 778 PetscFunctionBegin; 779 PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C)); 780 PetscFunctionReturn(PETSC_SUCCESS); 781 } 782 783 static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 784 { 785 Mat_Product *product = C->product; 786 Mat A = product->A, B = product->B; 787 788 PetscFunctionBegin; 789 PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C)); 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C) 794 { 795 Mat_Product *product = C->product; 796 Mat A = product->A, B = product->B; 797 PetscReal fill = product->fill; 798 799 PetscFunctionBegin; 800 PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C)); 801 C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL; 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C) 806 { 807 Mat Ct; 808 Vec zeros; 809 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr; 810 sparse_matrix_t csrA, csrP, csrC; 811 PetscBool set, flag; 812 struct matrix_descr descr_type_sym; 813 PetscObjectState state; 814 815 PetscFunctionBegin; 816 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 817 PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric"); 818 819 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 820 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 821 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 822 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 823 csrA = a->csrA; 824 csrP = p->csrA; 825 csrC = c->csrA; 826 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 827 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 828 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 829 830 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 831 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT); 832 833 /* Update the PETSc AIJ representation for matrix C from contents of MKL handle. 834 * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix, 835 * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix. 836 * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY 837 * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might 838 * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output 839 * the full matrix. */ 840 PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C)); 841 PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct)); 842 PetscCall(MatCreateVecs(C, &zeros, NULL)); 843 PetscCall(VecSetFromOptions(zeros)); 844 PetscCall(VecZeroEntries(zeros)); 845 PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES)); 846 PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN)); 847 /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */ 848 PetscCall(MatProductCreateWithMat(A, P, NULL, C)); 849 PetscCall(MatProductSetType(C, MATPRODUCT_PtAP)); 850 PetscCall(MatSeqAIJMKL_create_mkl_handle(C)); 851 PetscCall(VecDestroy(&zeros)); 852 PetscCall(MatDestroy(&Ct)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C) 857 { 858 Mat_Product *product = C->product; 859 Mat A = product->A, P = product->B; 860 Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr; 861 sparse_matrix_t csrA, csrP, csrC; 862 struct matrix_descr descr_type_sym; 863 PetscObjectState state; 864 865 PetscFunctionBegin; 866 PetscCall(PetscObjectStateGet((PetscObject)A, &state)); 867 if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A)); 868 PetscCall(PetscObjectStateGet((PetscObject)P, &state)); 869 if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P)); 870 csrA = a->csrA; 871 csrP = p->csrA; 872 descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC; 873 descr_type_sym.mode = SPARSE_FILL_MODE_UPPER; 874 descr_type_sym.diag = SPARSE_DIAG_NON_UNIT; 875 876 /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */ 877 if (csrP && csrA) { 878 PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL); 879 } else { 880 csrC = NULL; 881 } 882 883 /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle. 884 * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain 885 * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that 886 * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this 887 * is guaranteed. */ 888 PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C)); 889 890 C->ops->productnumeric = MatProductNumeric_PtAP; 891 PetscFunctionReturn(PETSC_SUCCESS); 892 } 893 894 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C) 895 { 896 PetscFunctionBegin; 897 C->ops->productsymbolic = MatProductSymbolic_AB; 898 C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C) 903 { 904 PetscFunctionBegin; 905 C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL; 906 PetscFunctionReturn(PETSC_SUCCESS); 907 } 908 909 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C) 910 { 911 PetscFunctionBegin; 912 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ; 913 C->ops->productsymbolic = MatProductSymbolic_ABt; 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } 916 917 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C) 918 { 919 Mat_Product *product = C->product; 920 Mat A = product->A; 921 PetscBool set, flag; 922 923 PetscFunctionBegin; 924 /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */ 925 PetscCall(MatIsSymmetricKnown(A, &set, &flag)); 926 PetscCheck(!PetscDefined(USE_COMPLEX) && set && flag, PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_PtAP not supported for type SeqAIJMKL"); 927 /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal() */ 928 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } 931 932 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C) 933 { 934 PetscFunctionBegin; 935 C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */ 936 PetscFunctionReturn(PETSC_SUCCESS); 937 } 938 939 static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C) 940 { 941 PetscFunctionBegin; 942 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MATPRODUCT_ABC not supported for type SeqAIJMKL"); 943 } 944 945 PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C) 946 { 947 Mat_Product *product = C->product; 948 949 PetscFunctionBegin; 950 switch (product->type) { 951 case MATPRODUCT_AB: 952 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C)); 953 break; 954 case MATPRODUCT_AtB: 955 PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C)); 956 break; 957 case MATPRODUCT_ABt: 958 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C)); 959 break; 960 case MATPRODUCT_PtAP: 961 PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C)); 962 break; 963 case MATPRODUCT_RARt: 964 PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C)); 965 break; 966 case MATPRODUCT_ABC: 967 PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C)); 968 break; 969 default: 970 break; 971 } 972 PetscFunctionReturn(PETSC_SUCCESS); 973 } 974 #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */ 975 976 /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a 977 * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL() 978 * routine, but can also be used to convert an assembled SeqAIJ matrix 979 * into a SeqAIJMKL one. */ 980 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat) 981 { 982 Mat B = *newmat; 983 Mat_SeqAIJMKL *aijmkl; 984 PetscBool set; 985 PetscBool sametype; 986 987 PetscFunctionBegin; 988 if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B)); 989 990 PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype)); 991 if (sametype) PetscFunctionReturn(PETSC_SUCCESS); 992 993 PetscCall(PetscNew(&aijmkl)); 994 B->spptr = (void *)aijmkl; 995 996 /* Set function pointers for methods that we inherit from AIJ but override. 997 * We also parse some command line options below, since those determine some of the methods we point to. */ 998 B->ops->duplicate = MatDuplicate_SeqAIJMKL; 999 B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL; 1000 B->ops->destroy = MatDestroy_SeqAIJMKL; 1001 1002 aijmkl->sparse_optimized = PETSC_FALSE; 1003 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1004 aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */ 1005 #else 1006 aijmkl->no_SpMV2 = PETSC_TRUE; 1007 #endif 1008 aijmkl->eager_inspection = PETSC_FALSE; 1009 1010 /* Parse command line options. */ 1011 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat"); 1012 PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set)); 1013 PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set)); 1014 PetscOptionsEnd(); 1015 #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1016 if (!aijmkl->no_SpMV2) { 1017 PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n")); 1018 aijmkl->no_SpMV2 = PETSC_TRUE; 1019 } 1020 #endif 1021 1022 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 1023 B->ops->mult = MatMult_SeqAIJMKL_SpMV2; 1024 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2; 1025 B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2; 1026 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2; 1027 #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE) 1028 B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL; 1029 B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL; 1030 B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1031 B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL; 1032 B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL; 1033 #if !defined(PETSC_USE_COMPLEX) 1034 B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal; 1035 #else 1036 B->ops->ptapnumeric = NULL; 1037 #endif 1038 #endif 1039 #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */ 1040 1041 #if !defined(PETSC_MKL_SPBLAS_DEPRECATED) 1042 /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the 1043 * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not 1044 * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For 1045 * versions in which the older interface has not been deprecated, we use the old interface. */ 1046 if (aijmkl->no_SpMV2) { 1047 B->ops->mult = MatMult_SeqAIJMKL; 1048 B->ops->multtranspose = MatMultTranspose_SeqAIJMKL; 1049 B->ops->multadd = MatMultAdd_SeqAIJMKL; 1050 B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL; 1051 } 1052 #endif 1053 1054 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ)); 1055 1056 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL)); 1057 *newmat = B; 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 /*@C 1062 MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`. 1063 1064 Collective 1065 1066 Input Parameters: 1067 + comm - MPI communicator, set to `PETSC_COMM_SELF` 1068 . m - number of rows 1069 . n - number of columns 1070 . nz - number of nonzeros per row (same for all rows) 1071 - nnz - array containing the number of nonzeros in the various rows 1072 (possibly different for each row) or `NULL` 1073 1074 Output Parameter: 1075 . A - the matrix 1076 1077 Options Database Keys: 1078 + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines 1079 - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection, 1080 performing this step the first time the matrix is applied 1081 1082 Level: intermediate 1083 1084 Notes: 1085 If `nnz` is given then `nz` is ignored 1086 1087 This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS 1088 routines from Intel MKL whenever possible. 1089 1090 If the installed version of MKL supports the "SpMV2" sparse 1091 inspector-executor routines, then those are used by default. 1092 1093 `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()` 1094 (for symmetric A) operations are currently supported. 1095 MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`. 1096 1097 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()` 1098 @*/ 1099 PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 1100 { 1101 PetscFunctionBegin; 1102 PetscCall(MatCreate(comm, A)); 1103 PetscCall(MatSetSizes(*A, m, n, m, n)); 1104 PetscCall(MatSetType(*A, MATSEQAIJMKL)); 1105 PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz)); 1106 PetscFunctionReturn(PETSC_SUCCESS); 1107 } 1108 1109 PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A) 1110 { 1111 PetscFunctionBegin; 1112 PetscCall(MatSetType(A, MATSEQAIJ)); 1113 PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A)); 1114 PetscFunctionReturn(PETSC_SUCCESS); 1115 } 1116