xref: /petsc/src/mat/impls/baij/mpi/baijmkl/mpibaijmkl.c (revision a623e290c7eaa252b385564179837fe27521fbac)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I   "petscmat.h"   I*/
2 
3 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
4 
MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)5 static PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJMKL(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
6 {
7   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
8 
9   PetscFunctionBegin;
10   PetscCall(MatMPIBAIJSetPreallocation_MPIBAIJ(B, bs, d_nz, d_nnz, o_nz, o_nnz));
11   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->A, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->A));
12   PetscCall(MatConvert_SeqBAIJ_SeqBAIJMKL(b->B, MATSEQBAIJMKL, MAT_INPLACE_MATRIX, &b->B));
13   PetscFunctionReturn(PETSC_SUCCESS);
14 }
15 
MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A,MatType type,MatReuse reuse,Mat * newmat)16 static PetscErrorCode MatConvert_MPIBAIJ_MPIBAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
17 {
18   Mat B = *newmat;
19 
20   PetscFunctionBegin;
21   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
22 
23   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJMKL));
24   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJMKL));
25   *newmat = B;
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
30 /*@
31   MatCreateBAIJMKL - Creates a sparse parallel matrix in `MATBAIJMKL` format (block compressed row).
32 
33   Collective
34 
35   Input Parameters:
36 + comm  - MPI communicator
37 . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
38           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
39 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
40           This value should be the same as the local size used in creating the
41           y vector for the matrix-vector product y = Ax.
42 . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
43           This value should be the same as the local size used in creating the
44           x vector for the matrix-vector product y = Ax.
45 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
46 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
47 . d_nz  - number of nonzero blocks per block row in diagonal portion of local
48           submatrix  (same for all local rows)
49 . d_nnz - array containing the number of nonzero blocks in the various block rows
50           of the in diagonal portion of the local (possibly different for each block
51           row) or `NULL`.  If you plan to factor the matrix you must leave room for the diagonal entry
52           and set it even if it is zero.
53 . o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
54           submatrix (same for all local rows).
55 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
56           off-diagonal portion of the local submatrix (possibly different for
57           each block row) or `NULL`.
58 
59   Output Parameter:
60 . A - the matrix
61 
62   Options Database Keys:
63 + -mat_block_size            - size of the blocks to use
64 - -mat_use_hash_table <fact> - set hash table factor
65 
66   Level: intermediate
67 
68   Notes:
69   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
70   MatXXXXSetPreallocation() paradigm instead of this routine directly.
71   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
72 
73   This type inherits from `MATBAIJ` and is largely identical, but uses sparse BLAS
74   routines from Intel MKL whenever possible.
75   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, and `MatMultTransposeAdd()`
76   operations are currently supported.
77   If the installed version of MKL supports the "SpMV2" sparse
78   inspector-executor routines, then those are used by default.
79   Default PETSc kernels are used otherwise.
80   For good matrix assembly performance the user should preallocate the matrix
81   storage by setting the parameters `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
82   By setting these parameters accurately, performance can be increased by more
83   than a factor of 50.
84 
85   If the *_nnz parameter is given then the *_nz parameter is ignored
86 
87   A nonzero block is any block that as 1 or more nonzeros in it
88 
89   The user MUST specify either the local or global matrix dimensions
90   (possibly both).
91 
92   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
93   than it must be used on all processors that share the object for that argument.
94 
95   Storage Information:
96   For a square global matrix we define each processor's diagonal portion
97   to be its local rows and the corresponding columns (a square submatrix);
98   each processor's off-diagonal portion encompasses the remainder of the
99   local matrix (a rectangular submatrix).
100 
101   The user can specify preallocated storage for the diagonal part of
102   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
103   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
104   memory allocation.  Likewise, specify preallocated storage for the
105   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
106 
107   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
108   the figure below we depict these three local rows and all columns (0-11).
109 
110 .vb
111            0 1 2 3 4 5 6 7 8 9 10 11
112           --------------------------
113    row 3  |o o o d d d o o o o  o  o
114    row 4  |o o o d d d o o o o  o  o
115    row 5  |o o o d d d o o o o  o  o
116           --------------------------
117 .ve
118 
119   Thus, any entries in the d locations are stored in the d (diagonal)
120   submatrix, and any entries in the o locations are stored in the
121   o (off-diagonal) submatrix.  Note that the d and the o submatrices are
122   stored simply in the `MATSEQBAIJMKL` format for compressed row storage.
123 
124   Now `d_nz` should indicate the number of block nonzeros per row in the d matrix,
125   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
126   In general, for PDE problems in which most nonzeros are near the diagonal,
127   one expects `d_nz` >> `o_nz`.
128 
129 .seealso: [](ch_matrices), `Mat`, `MATBAIJMKL`, `MATBAIJ`, `MatCreate()`, `MatCreateSeqBAIJMKL()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
130 @*/
MatCreateBAIJMKL(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)131 PetscErrorCode MatCreateBAIJMKL(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
132 {
133   PetscMPIInt size;
134 
135   PetscFunctionBegin;
136   PetscCall(MatCreate(comm, A));
137   PetscCall(MatSetSizes(*A, m, n, M, N));
138   PetscCallMPI(MPI_Comm_size(comm, &size));
139   if (size > 1) {
140     PetscCall(MatSetType(*A, MATMPIBAIJMKL));
141     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
142   } else {
143     PetscCall(MatSetType(*A, MATSEQBAIJMKL));
144     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
145   }
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148 
MatCreate_MPIBAIJMKL(Mat A)149 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJMKL(Mat A)
150 {
151   PetscFunctionBegin;
152   PetscCall(MatSetType(A, MATMPIBAIJ));
153   PetscCall(MatConvert_MPIBAIJ_MPIBAIJMKL(A, MATMPIBAIJMKL, MAT_INPLACE_MATRIX, &A));
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
157 /*MC
158    MATBAIJMKL - MATBAIJMKL = "BAIJMKL" - A matrix type to be used for sparse matrices.
159 
160    This matrix type is identical to `MATSEQBAIJMKL` when constructed with a single process communicator,
161    and `MATMPIBAIJMKL` otherwise.  As a result, for single process communicators,
162   `MatSeqBAIJSetPreallocation()` is supported, and similarly `MatMPIBAIJSetPreallocation()` is supported
163   for communicators controlling multiple processes.  It is recommended that you call both of
164   the above preallocation routines for simplicity.
165 
166    Options Database Key:
167 . -mat_type baijmkl - sets the matrix type to `MATBAIJMKL` during a call to `MatSetFromOptions()`
168 
169   Level: beginner
170 
171 .seealso: [](ch_matrices), `Mat`, `MatCreateBAIJMKL()`, `MATSEQBAIJMKL`, `MATMPIBAIJMKL`
172 M*/
173