xref: /petsc/src/mat/impls/aij/seq/mkl_pardiso/mkl_pardiso.c (revision e8c0849ab8fe171bed529bea27238c9b402db591)
1d37c1e25SSatish Balay #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
2de1ba746SStefano Zampini #include <../src/mat/impls/sbaij/seq/sbaij.h>
3d37c1e25SSatish Balay #include <../src/mat/impls/dense/seq/dense.h>
4d37c1e25SSatish Balay 
52475b7caSBarry Smith #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
669ab9685SBarry Smith   #define MKL_ILP64
769ab9685SBarry Smith #endif
8f84b7162SStefano Zampini #include <mkl_pardiso.h>
9d37c1e25SSatish Balay 
1061ce1fc3SStefano Zampini PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int);
1161ce1fc3SStefano Zampini 
12d37c1e25SSatish Balay /*
13d615f992SSatish Balay  *  Possible mkl_pardiso phases that controls the execution of the solver.
14d615f992SSatish Balay  *  For more information check mkl_pardiso manual.
15d37c1e25SSatish Balay  */
16d37c1e25SSatish Balay #define JOB_ANALYSIS                                                    11
17d37c1e25SSatish Balay #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION                            12
18d37c1e25SSatish Balay #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13
19d37c1e25SSatish Balay #define JOB_NUMERICAL_FACTORIZATION                                     22
20d37c1e25SSatish Balay #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT          23
21d37c1e25SSatish Balay #define JOB_SOLVE_ITERATIVE_REFINEMENT                                  33
22d37c1e25SSatish Balay #define JOB_SOLVE_FORWARD_SUBSTITUTION                                  331
23d37c1e25SSatish Balay #define JOB_SOLVE_DIAGONAL_SUBSTITUTION                                 332
24d37c1e25SSatish Balay #define JOB_SOLVE_BACKWARD_SUBSTITUTION                                 333
25d37c1e25SSatish Balay #define JOB_RELEASE_OF_LU_MEMORY                                        0
26d37c1e25SSatish Balay #define JOB_RELEASE_OF_ALL_MEMORY                                       -1
27d37c1e25SSatish Balay 
28d37c1e25SSatish Balay #define IPARM_SIZE 64
29d305a81bSVasiliy Kozyrev 
30d37c1e25SSatish Balay #if defined(PETSC_USE_64BIT_INDICES)
312475b7caSBarry Smith   #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
32aec6e1dcSBarry Smith     #define INT_TYPE         long long int
33d305a81bSVasiliy Kozyrev     #define MKL_PARDISO      pardiso
34d305a81bSVasiliy Kozyrev     #define MKL_PARDISO_INIT pardisoinit
35d305a81bSVasiliy Kozyrev   #else
367de69702SBarry Smith     /* this is the case where the MKL BLAS/LAPACK are 32-bit integers but the 64-bit integer version of
371d27aa22SBarry Smith      of PARDISO code is used; hence the need for the 64 below*/
38d37c1e25SSatish Balay     #define INT_TYPE         long long int
39d37c1e25SSatish Balay     #define MKL_PARDISO      pardiso_64
40d615f992SSatish Balay     #define MKL_PARDISO_INIT pardiso_64init
pardiso_64init(void * pt,INT_TYPE * mtype,INT_TYPE iparm[])41d71ae5a4SJacob Faibussowitsch void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm[])
42d71ae5a4SJacob Faibussowitsch {
435f7675d9SJose E. Roman   PetscBLASInt iparm_copy[IPARM_SIZE], mtype_copy;
4469ab9685SBarry Smith 
455f7675d9SJose E. Roman   PetscCallVoid(PetscBLASIntCast(*mtype, &mtype_copy));
4669ab9685SBarry Smith   pardisoinit(pt, &mtype_copy, iparm_copy);
475f7675d9SJose E. Roman   for (PetscInt i = 0; i < IPARM_SIZE; i++) iparm[i] = iparm_copy[i];
4869ab9685SBarry Smith }
49d305a81bSVasiliy Kozyrev   #endif
50d37c1e25SSatish Balay #else
51aec6e1dcSBarry Smith   #define INT_TYPE         int
52d37c1e25SSatish Balay   #define MKL_PARDISO      pardiso
53d615f992SSatish Balay   #define MKL_PARDISO_INIT pardisoinit
54d37c1e25SSatish Balay #endif
55d37c1e25SSatish Balay 
56aaaa354bSBarry Smith #define PetscCallPardiso(f) PetscStackCallExternalVoid("MKL_PARDISO", f);
57aaaa354bSBarry Smith 
58d37c1e25SSatish Balay /*
591d27aa22SBarry Smith    Internal data structure.
60d37c1e25SSatish Balay  */
61d37c1e25SSatish Balay typedef struct {
62d37c1e25SSatish Balay   /* Configuration vector*/
63d37c1e25SSatish Balay   INT_TYPE iparm[IPARM_SIZE];
64d37c1e25SSatish Balay 
65d37c1e25SSatish Balay   /*
661d27aa22SBarry Smith      Internal MKL PARDISO memory location.
671d27aa22SBarry Smith      After the first call to MKL PARDISO do not modify pt, as that could cause a serious memory leak.
68d37c1e25SSatish Balay    */
69d37c1e25SSatish Balay   void *pt[IPARM_SIZE];
70d37c1e25SSatish Balay 
711d27aa22SBarry Smith   /* Basic MKL PARDISO info */
72d37c1e25SSatish Balay   INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err;
73d37c1e25SSatish Balay 
740b4b7b1cSBarry Smith   /* Matrix nonzero structure and values */
75d37c1e25SSatish Balay   void     *a;
76d37c1e25SSatish Balay   INT_TYPE *ia, *ja;
77d37c1e25SSatish Balay 
78d37c1e25SSatish Balay   /* Number of non-zero elements */
79d37c1e25SSatish Balay   INT_TYPE nz;
80d37c1e25SSatish Balay 
81d37c1e25SSatish Balay   /* Row permutaton vector */
82d37c1e25SSatish Balay   INT_TYPE *perm;
83d37c1e25SSatish Balay 
842b26979fSBarry Smith   /* Define if matrix preserves sparse structure. */
85d37c1e25SSatish Balay   MatStructure matstruc;
86d37c1e25SSatish Balay 
87de1ba746SStefano Zampini   PetscBool needsym;
88de1ba746SStefano Zampini   PetscBool freeaij;
89de1ba746SStefano Zampini 
902254c79cSStefano Zampini   /* Schur complement */
912254c79cSStefano Zampini   PetscScalar *schur;
922254c79cSStefano Zampini   PetscInt     schur_size;
932254c79cSStefano Zampini   PetscInt    *schur_idxs;
942254c79cSStefano Zampini   PetscScalar *schur_work;
952254c79cSStefano Zampini   PetscBLASInt schur_work_size;
96ad9a362dSStefano Zampini   PetscBool    solve_interior;
972254c79cSStefano Zampini 
981d27aa22SBarry Smith   /* True if MKL PARDISO function have been used. */
99d615f992SSatish Balay   PetscBool CleanUp;
100de1ba746SStefano Zampini 
101de1ba746SStefano Zampini   /* Conversion to a format suitable for MKL */
1022ee68a28SStefano Zampini   PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool *, INT_TYPE *, INT_TYPE **, INT_TYPE **, PetscScalar **);
103d615f992SSatish Balay } Mat_MKL_PARDISO;
104d37c1e25SSatish Balay 
MatMKLPardiso_Convert_seqsbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool * free,INT_TYPE * nnz,INT_TYPE ** r,INT_TYPE ** c,PetscScalar ** v)10566976f2fSJacob Faibussowitsch static PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
106d71ae5a4SJacob Faibussowitsch {
107de1ba746SStefano Zampini   Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data;
10851d8ba46SPierre Jolivet   PetscInt      bs = A->rmap->bs, i;
109a578bbc7SStefano Zampini 
110de1ba746SStefano Zampini   PetscFunctionBegin;
11128b400f6SJacob Faibussowitsch   PetscCheck(sym, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen");
112a578bbc7SStefano Zampini   *v = aa->a;
11351d8ba46SPierre Jolivet   if (bs == 1) { /* already in the correct format */
11457f04983SBarry Smith     /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
11557f04983SBarry Smith     *r    = (INT_TYPE *)aa->i;
11657f04983SBarry Smith     *c    = (INT_TYPE *)aa->j;
11757f04983SBarry Smith     *nnz  = (INT_TYPE)aa->nz;
118a578bbc7SStefano Zampini     *free = PETSC_FALSE;
11951d8ba46SPierre Jolivet   } else if (reuse == MAT_INITIAL_MATRIX) {
12051d8ba46SPierre Jolivet     PetscInt  m = A->rmap->n, nz = aa->nz;
12151d8ba46SPierre Jolivet     PetscInt *row, *col;
1229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
123ad540459SPierre Jolivet     for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1;
124ad540459SPierre Jolivet     for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1;
12551d8ba46SPierre Jolivet     *r    = (INT_TYPE *)row;
12651d8ba46SPierre Jolivet     *c    = (INT_TYPE *)col;
12751d8ba46SPierre Jolivet     *nnz  = (INT_TYPE)nz;
12851d8ba46SPierre Jolivet     *free = PETSC_TRUE;
129de1ba746SStefano Zampini   }
1303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131de1ba746SStefano Zampini }
132de1ba746SStefano Zampini 
MatMKLPardiso_Convert_seqbaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool * free,INT_TYPE * nnz,INT_TYPE ** r,INT_TYPE ** c,PetscScalar ** v)13366976f2fSJacob Faibussowitsch static PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
134d71ae5a4SJacob Faibussowitsch {
13551d8ba46SPierre Jolivet   Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data;
13651d8ba46SPierre Jolivet   PetscInt     bs = A->rmap->bs, i;
13751d8ba46SPierre Jolivet 
138de1ba746SStefano Zampini   PetscFunctionBegin;
13951d8ba46SPierre Jolivet   if (!sym) {
14051d8ba46SPierre Jolivet     *v = aa->a;
14151d8ba46SPierre Jolivet     if (bs == 1) { /* already in the correct format */
14251d8ba46SPierre Jolivet       /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */
14351d8ba46SPierre Jolivet       *r    = (INT_TYPE *)aa->i;
14451d8ba46SPierre Jolivet       *c    = (INT_TYPE *)aa->j;
14551d8ba46SPierre Jolivet       *nnz  = (INT_TYPE)aa->nz;
14651d8ba46SPierre Jolivet       *free = PETSC_FALSE;
1473ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
14851d8ba46SPierre Jolivet     } else if (reuse == MAT_INITIAL_MATRIX) {
14951d8ba46SPierre Jolivet       PetscInt  m = A->rmap->n, nz = aa->nz;
15051d8ba46SPierre Jolivet       PetscInt *row, *col;
1519566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
152ad540459SPierre Jolivet       for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1;
153ad540459SPierre Jolivet       for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1;
15451d8ba46SPierre Jolivet       *r   = (INT_TYPE *)row;
15551d8ba46SPierre Jolivet       *c   = (INT_TYPE *)col;
15651d8ba46SPierre Jolivet       *nnz = (INT_TYPE)nz;
15751d8ba46SPierre Jolivet     }
15851d8ba46SPierre Jolivet     *free = PETSC_TRUE;
15951d8ba46SPierre Jolivet   } else {
16051d8ba46SPierre Jolivet     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen");
16151d8ba46SPierre Jolivet   }
1623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
163de1ba746SStefano Zampini }
164de1ba746SStefano Zampini 
MatMKLPardiso_Convert_seqaij(Mat A,PetscBool sym,MatReuse reuse,PetscBool * free,INT_TYPE * nnz,INT_TYPE ** r,INT_TYPE ** c,PetscScalar ** v)16566976f2fSJacob Faibussowitsch static PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v)
166d71ae5a4SJacob Faibussowitsch {
167de1ba746SStefano Zampini   Mat_SeqAIJ  *aa = (Mat_SeqAIJ *)A->data;
1687c15a11cSStefano Zampini   PetscScalar *aav;
169de1ba746SStefano Zampini 
170de1ba746SStefano Zampini   PetscFunctionBegin;
1719566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJGetArrayRead(A, (const PetscScalar **)&aav));
172de1ba746SStefano Zampini   if (!sym) { /* already in the correct format */
1737c15a11cSStefano Zampini     *v    = aav;
17457f04983SBarry Smith     *r    = (INT_TYPE *)aa->i;
17557f04983SBarry Smith     *c    = (INT_TYPE *)aa->j;
17657f04983SBarry Smith     *nnz  = (INT_TYPE)aa->nz;
177a578bbc7SStefano Zampini     *free = PETSC_FALSE;
1787c15a11cSStefano Zampini   } else if (reuse == MAT_INITIAL_MATRIX) { /* need to get the triangular part */
179de1ba746SStefano Zampini     PetscScalar    *vals, *vv;
180de1ba746SStefano Zampini     PetscInt       *row, *col, *jj;
181de1ba746SStefano Zampini     PetscInt        m = A->rmap->n, nz, i;
182*421480d9SBarry Smith     const PetscInt *adiag;
183de1ba746SStefano Zampini 
184*421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
185de1ba746SStefano Zampini     nz = 0;
186*421480d9SBarry Smith     for (i = 0; i < m; i++) nz += aa->i[i + 1] - adiag[i];
1879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(m + 1, &row, nz, &col));
1889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(nz, &vals));
189de1ba746SStefano Zampini     jj = col;
190de1ba746SStefano Zampini     vv = vals;
191de1ba746SStefano Zampini 
192de1ba746SStefano Zampini     row[0] = 0;
193de1ba746SStefano Zampini     for (i = 0; i < m; i++) {
194*421480d9SBarry Smith       PetscInt    *aj = aa->j + adiag[i];
195*421480d9SBarry Smith       PetscScalar *av = aav + adiag[i];
196*421480d9SBarry Smith       PetscInt     rl = aa->i[i + 1] - adiag[i], j;
1977c15a11cSStefano Zampini 
198de1ba746SStefano Zampini       for (j = 0; j < rl; j++) {
1999371c9d4SSatish Balay         *jj = *aj;
2009371c9d4SSatish Balay         jj++;
2019371c9d4SSatish Balay         aj++;
2029371c9d4SSatish Balay         *vv = *av;
2039371c9d4SSatish Balay         vv++;
2049371c9d4SSatish Balay         av++;
205de1ba746SStefano Zampini       }
206de1ba746SStefano Zampini       row[i + 1] = row[i] + rl;
207de1ba746SStefano Zampini     }
208de1ba746SStefano Zampini     *v    = vals;
20957f04983SBarry Smith     *r    = (INT_TYPE *)row;
21057f04983SBarry Smith     *c    = (INT_TYPE *)col;
21157f04983SBarry Smith     *nnz  = (INT_TYPE)nz;
2127c15a11cSStefano Zampini     *free = PETSC_TRUE;
213de1ba746SStefano Zampini   } else {
214de1ba746SStefano Zampini     PetscScalar    *vv;
215de1ba746SStefano Zampini     PetscInt        m = A->rmap->n, i;
216*421480d9SBarry Smith     const PetscInt *adiag;
217de1ba746SStefano Zampini 
218*421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
219de1ba746SStefano Zampini     vv = *v;
220de1ba746SStefano Zampini     for (i = 0; i < m; i++) {
221*421480d9SBarry Smith       PetscScalar *av = aav + adiag[i];
222*421480d9SBarry Smith       PetscInt     rl = aa->i[i + 1] - adiag[i], j;
223de1ba746SStefano Zampini       for (j = 0; j < rl; j++) {
2249371c9d4SSatish Balay         *vv = *av;
2259371c9d4SSatish Balay         vv++;
2269371c9d4SSatish Balay         av++;
227de1ba746SStefano Zampini       }
228de1ba746SStefano Zampini     }
22951d8ba46SPierre Jolivet     *free = PETSC_TRUE;
2307c15a11cSStefano Zampini   }
2319566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJRestoreArrayRead(A, (const PetscScalar **)&aav));
2323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
233de1ba746SStefano Zampini }
234d37c1e25SSatish Balay 
MatMKLPardisoSolveSchur_Private(Mat F,PetscScalar * B,PetscScalar * X)235d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat F, PetscScalar *B, PetscScalar *X)
236d71ae5a4SJacob Faibussowitsch {
237b3cb21ddSStefano Zampini   Mat_MKL_PARDISO     *mpardiso = (Mat_MKL_PARDISO *)F->data;
238b3cb21ddSStefano Zampini   Mat                  S, Xmat, Bmat;
239b3cb21ddSStefano Zampini   MatFactorSchurStatus schurstatus;
240d37c1e25SSatish Balay 
2412254c79cSStefano Zampini   PetscFunctionBegin;
2429566063dSJacob Faibussowitsch   PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus));
243aed4548fSBarry Smith   PetscCheck(X != B || schurstatus != MAT_FACTOR_SCHUR_INVERTED, PETSC_COMM_SELF, PETSC_ERR_SUP, "X and B cannot point to the same address");
2449566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, B, &Bmat));
2459566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, X, &Xmat));
2469566063dSJacob Faibussowitsch   PetscCall(MatSetType(Bmat, ((PetscObject)S)->type_name));
2479566063dSJacob Faibussowitsch   PetscCall(MatSetType(Xmat, ((PetscObject)S)->type_name));
2487c15a11cSStefano Zampini #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
2499566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU(Xmat, S->boundtocpu));
2509566063dSJacob Faibussowitsch   PetscCall(MatBindToCPU(Bmat, S->boundtocpu));
2517c15a11cSStefano Zampini #endif
2522254c79cSStefano Zampini 
253b3cb21ddSStefano Zampini #if defined(PETSC_USE_COMPLEX)
25408401ef6SPierre Jolivet   PetscCheck(mpardiso->iparm[12 - 1] != 1, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Hermitian solve not implemented yet");
255b3cb21ddSStefano Zampini #endif
2562254c79cSStefano Zampini 
257b3cb21ddSStefano Zampini   switch (schurstatus) {
258b3cb21ddSStefano Zampini   case MAT_FACTOR_SCHUR_FACTORED:
259b3cb21ddSStefano Zampini     if (!mpardiso->iparm[12 - 1]) {
2609566063dSJacob Faibussowitsch       PetscCall(MatMatSolve(S, Bmat, Xmat));
261b3cb21ddSStefano Zampini     } else { /* transpose solve */
2629566063dSJacob Faibussowitsch       PetscCall(MatMatSolveTranspose(S, Bmat, Xmat));
2632254c79cSStefano Zampini     }
2642254c79cSStefano Zampini     break;
265b3cb21ddSStefano Zampini   case MAT_FACTOR_SCHUR_INVERTED:
2669566063dSJacob Faibussowitsch     PetscCall(MatProductCreateWithMat(S, Bmat, NULL, Xmat));
267b3cb21ddSStefano Zampini     if (!mpardiso->iparm[12 - 1]) {
2689566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(Xmat, MATPRODUCT_AB));
269b3cb21ddSStefano Zampini     } else { /* transpose solve */
2709566063dSJacob Faibussowitsch       PetscCall(MatProductSetType(Xmat, MATPRODUCT_AtB));
27113aee51cSStefano Zampini     }
2729566063dSJacob Faibussowitsch     PetscCall(MatProductSetFromOptions(Xmat));
2739566063dSJacob Faibussowitsch     PetscCall(MatProductSymbolic(Xmat));
2749566063dSJacob Faibussowitsch     PetscCall(MatProductNumeric(Xmat));
2759566063dSJacob Faibussowitsch     PetscCall(MatProductClear(Xmat));
27613aee51cSStefano Zampini     break;
277d71ae5a4SJacob Faibussowitsch   default:
278e54da482SJose E. Roman     SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", (int)F->schur_status);
279d71ae5a4SJacob Faibussowitsch     break;
2802254c79cSStefano Zampini   }
2819566063dSJacob Faibussowitsch   PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus));
2829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Bmat));
2839566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Xmat));
2843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2852254c79cSStefano Zampini }
2862254c79cSStefano Zampini 
MatFactorSetSchurIS_MKL_PARDISO(Mat F,IS is)28766976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is)
288d71ae5a4SJacob Faibussowitsch {
289dfa13051SBarry Smith   Mat_MKL_PARDISO   *mpardiso = (Mat_MKL_PARDISO *)F->data;
2907c15a11cSStefano Zampini   const PetscScalar *arr;
2912254c79cSStefano Zampini   const PetscInt    *idxs;
2922254c79cSStefano Zampini   PetscInt           size, i;
2932254c79cSStefano Zampini   PetscMPIInt        csize;
2947c48b089SStefano Zampini   PetscBool          sorted;
2952254c79cSStefano Zampini 
2962254c79cSStefano Zampini   PetscFunctionBegin;
2979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &csize));
2981d27aa22SBarry Smith   PetscCheck(csize <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "MKL PARDISO parallel Schur complements not yet supported from PETSc");
2999566063dSJacob Faibussowitsch   PetscCall(ISSorted(is, &sorted));
3001d27aa22SBarry Smith   PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS for MKL PARDISO Schur complements needs to be sorted");
3019566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(is, &size));
3029566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpardiso->schur_work));
3039566063dSJacob Faibussowitsch   PetscCall(PetscBLASIntCast(PetscMax(mpardiso->n, 2 * size), &mpardiso->schur_work_size));
3049566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mpardiso->schur_work_size, &mpardiso->schur_work));
3059566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&F->schur));
3069566063dSJacob Faibussowitsch   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur));
3079566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(F->schur, &arr));
3087c15a11cSStefano Zampini   mpardiso->schur      = (PetscScalar *)arr;
3097c15a11cSStefano Zampini   mpardiso->schur_size = size;
3109566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(F->schur, &arr));
31148a46eb9SPierre Jolivet   if (mpardiso->mtype == 2) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE));
312b3cb21ddSStefano Zampini 
3139566063dSJacob Faibussowitsch   PetscCall(PetscFree(mpardiso->schur_idxs));
3149566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(size, &mpardiso->schur_idxs));
3159566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(mpardiso->perm, mpardiso->n));
3169566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(is, &idxs));
3179566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(mpardiso->schur_idxs, idxs, size));
3182254c79cSStefano Zampini   for (i = 0; i < size; i++) mpardiso->perm[idxs[i]] = 1;
3199566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(is, &idxs));
3207c2f51b8SStefano Zampini   if (size) { /* turn on Schur switch if the set of indices is not empty */
3212254c79cSStefano Zampini     mpardiso->iparm[36 - 1] = 2;
3222254c79cSStefano Zampini   }
3233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
324e8ade678SStefano Zampini }
325e8ade678SStefano Zampini 
MatDestroy_MKL_PARDISO(Mat A)32666976f2fSJacob Faibussowitsch static PetscErrorCode MatDestroy_MKL_PARDISO(Mat A)
327d71ae5a4SJacob Faibussowitsch {
328dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
329d37c1e25SSatish Balay 
330d37c1e25SSatish Balay   PetscFunctionBegin;
331d615f992SSatish Balay   if (mat_mkl_pardiso->CleanUp) {
332d615f992SSatish Balay     mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY;
333d37c1e25SSatish Balay 
334aaaa354bSBarry Smith     PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, NULL, NULL, NULL, NULL, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm,
335aaaa354bSBarry Smith                                  &mat_mkl_pardiso->msglvl, NULL, NULL, &mat_mkl_pardiso->err));
336d37c1e25SSatish Balay   }
3379566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat_mkl_pardiso->perm));
3389566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat_mkl_pardiso->schur_work));
3399566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat_mkl_pardiso->schur_idxs));
340de1ba746SStefano Zampini   if (mat_mkl_pardiso->freeaij) {
3419566063dSJacob Faibussowitsch     PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja));
34248a46eb9SPierre Jolivet     if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a));
34351d8ba46SPierre Jolivet   }
3449566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
345d37c1e25SSatish Balay 
346d37c1e25SSatish Balay   /* clear composed functions */
3479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
3489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL));
3499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMkl_PardisoSetCntl_C", NULL));
3503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
351d37c1e25SSatish Balay }
352d37c1e25SSatish Balay 
MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO * mpardiso,PetscScalar * whole,PetscScalar * schur,PetscBool reduce)353d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce)
354d71ae5a4SJacob Faibussowitsch {
3552254c79cSStefano Zampini   PetscFunctionBegin;
3562254c79cSStefano Zampini   if (reduce) { /* data given for the whole matrix */
3572254c79cSStefano Zampini     PetscInt i, m = 0, p = 0;
3582254c79cSStefano Zampini     for (i = 0; i < mpardiso->nrhs; i++) {
3592254c79cSStefano Zampini       PetscInt j;
360ad540459SPierre Jolivet       for (j = 0; j < mpardiso->schur_size; j++) schur[p + j] = whole[m + mpardiso->schur_idxs[j]];
3612254c79cSStefano Zampini       m += mpardiso->n;
3622254c79cSStefano Zampini       p += mpardiso->schur_size;
3632254c79cSStefano Zampini     }
3642254c79cSStefano Zampini   } else { /* from Schur to whole */
3652254c79cSStefano Zampini     PetscInt i, m = 0, p = 0;
3662254c79cSStefano Zampini     for (i = 0; i < mpardiso->nrhs; i++) {
3672254c79cSStefano Zampini       PetscInt j;
368ad540459SPierre Jolivet       for (j = 0; j < mpardiso->schur_size; j++) whole[m + mpardiso->schur_idxs[j]] = schur[p + j];
3692254c79cSStefano Zampini       m += mpardiso->n;
3702254c79cSStefano Zampini       p += mpardiso->schur_size;
3712254c79cSStefano Zampini     }
3722254c79cSStefano Zampini   }
3733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3742254c79cSStefano Zampini }
3752254c79cSStefano Zampini 
MatSolve_MKL_PARDISO(Mat A,Vec b,Vec x)37666976f2fSJacob Faibussowitsch static PetscErrorCode MatSolve_MKL_PARDISO(Mat A, Vec b, Vec x)
377d71ae5a4SJacob Faibussowitsch {
378b3cb21ddSStefano Zampini   Mat_MKL_PARDISO   *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
379d9ca1df4SBarry Smith   PetscScalar       *xarray;
380d9ca1df4SBarry Smith   const PetscScalar *barray;
381d37c1e25SSatish Balay 
382d37c1e25SSatish Balay   PetscFunctionBegin;
383d615f992SSatish Balay   mat_mkl_pardiso->nrhs = 1;
3849566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(x, &xarray));
3859566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(b, &barray));
386d37c1e25SSatish Balay 
387dfa13051SBarry Smith   if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
388dfa13051SBarry Smith   else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
389ad9a362dSStefano Zampini 
390d5c7275dSStefano Zampini   if (barray == xarray) { /* if the two vectors share the same memory */
391d5c7275dSStefano Zampini     PetscScalar *work;
392d5c7275dSStefano Zampini     if (!mat_mkl_pardiso->schur_work) {
3939566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat_mkl_pardiso->n, &work));
394d5c7275dSStefano Zampini     } else {
395d5c7275dSStefano Zampini       work = mat_mkl_pardiso->schur_work;
396d5c7275dSStefano Zampini     }
397d5c7275dSStefano Zampini     mat_mkl_pardiso->iparm[6 - 1] = 1;
398aaaa354bSBarry Smith     PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, NULL,
399aaaa354bSBarry Smith                                  &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)work, &mat_mkl_pardiso->err));
40048a46eb9SPierre Jolivet     if (!mat_mkl_pardiso->schur_work) PetscCall(PetscFree(work));
401d5c7275dSStefano Zampini   } else {
402d5c7275dSStefano Zampini     mat_mkl_pardiso->iparm[6 - 1] = 0;
403aaaa354bSBarry Smith     PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja,
404aaaa354bSBarry Smith                                  mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err));
405d5c7275dSStefano Zampini   }
4069566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(b, &barray));
407d37c1e25SSatish Balay 
408e54da482SJose E. Roman   PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
4092254c79cSStefano Zampini 
41059f888c2SStefano Zampini   if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
411ee70c6cfSStefano Zampini     if (!mat_mkl_pardiso->solve_interior) {
4122254c79cSStefano Zampini       PetscInt shift = mat_mkl_pardiso->schur_size;
4132254c79cSStefano Zampini 
4149566063dSJacob Faibussowitsch       PetscCall(MatFactorFactorizeSchurComplement(A));
4152254c79cSStefano Zampini       /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
4167c15a11cSStefano Zampini       if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
4172254c79cSStefano Zampini 
4182254c79cSStefano Zampini       /* solve Schur complement */
4199566063dSJacob Faibussowitsch       PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE));
4209566063dSJacob Faibussowitsch       PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift));
4219566063dSJacob Faibussowitsch       PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE));
422b3cb21ddSStefano Zampini     } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substituted to xarray[schur] will be neglected */
423ad9a362dSStefano Zampini       PetscInt i;
424ad540459SPierre Jolivet       for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.;
425ad9a362dSStefano Zampini     }
426d5c7275dSStefano Zampini 
4272254c79cSStefano Zampini     /* expansion phase */
4282254c79cSStefano Zampini     mat_mkl_pardiso->iparm[6 - 1] = 1;
4292254c79cSStefano Zampini     mat_mkl_pardiso->phase        = JOB_SOLVE_BACKWARD_SUBSTITUTION;
430aaaa354bSBarry Smith     PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja,
431aaaa354bSBarry Smith                                  mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
432aaaa354bSBarry Smith                                  &mat_mkl_pardiso->err));
433e54da482SJose E. Roman     PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
4342254c79cSStefano Zampini     mat_mkl_pardiso->iparm[6 - 1] = 0;
4352254c79cSStefano Zampini   }
4369566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(x, &xarray));
437d615f992SSatish Balay   mat_mkl_pardiso->CleanUp = PETSC_TRUE;
4383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
439d37c1e25SSatish Balay }
440d37c1e25SSatish Balay 
MatForwardSolve_MKL_PARDISO(Mat A,Vec b,Vec x)441bd5dbebeSNils Friess static PetscErrorCode MatForwardSolve_MKL_PARDISO(Mat A, Vec b, Vec x)
442bd5dbebeSNils Friess {
443bd5dbebeSNils Friess   Mat_MKL_PARDISO   *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
444bd5dbebeSNils Friess   PetscScalar       *xarray;
445bd5dbebeSNils Friess   const PetscScalar *barray;
446bd5dbebeSNils Friess 
447bd5dbebeSNils Friess   PetscFunctionBegin;
448bd5dbebeSNils Friess   PetscCheck(!mat_mkl_pardiso->schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Forward substitution not supported with Schur complement");
449bd5dbebeSNils Friess 
450bd5dbebeSNils Friess   mat_mkl_pardiso->nrhs = 1;
451bd5dbebeSNils Friess   PetscCall(VecGetArrayWrite(x, &xarray));
452bd5dbebeSNils Friess   PetscCall(VecGetArrayRead(b, &barray));
453bd5dbebeSNils Friess 
454bd5dbebeSNils Friess   mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
455bd5dbebeSNils Friess 
456aaaa354bSBarry Smith   PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
457aaaa354bSBarry Smith                                &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err));
458e54da482SJose E. Roman   PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
459bd5dbebeSNils Friess 
460bd5dbebeSNils Friess   PetscCall(VecRestoreArrayRead(b, &barray));
461bd5dbebeSNils Friess   PetscCall(VecRestoreArrayWrite(x, &xarray));
462bd5dbebeSNils Friess   mat_mkl_pardiso->CleanUp = PETSC_TRUE;
463bd5dbebeSNils Friess   PetscFunctionReturn(PETSC_SUCCESS);
464bd5dbebeSNils Friess }
465bd5dbebeSNils Friess 
MatBackwardSolve_MKL_PARDISO(Mat A,Vec b,Vec x)466bd5dbebeSNils Friess static PetscErrorCode MatBackwardSolve_MKL_PARDISO(Mat A, Vec b, Vec x)
467bd5dbebeSNils Friess {
468bd5dbebeSNils Friess   Mat_MKL_PARDISO   *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
469bd5dbebeSNils Friess   PetscScalar       *xarray;
470bd5dbebeSNils Friess   const PetscScalar *barray;
471bd5dbebeSNils Friess 
472bd5dbebeSNils Friess   PetscFunctionBegin;
473bd5dbebeSNils Friess   PetscCheck(!mat_mkl_pardiso->schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Backward substitution not supported with Schur complement");
474bd5dbebeSNils Friess 
475bd5dbebeSNils Friess   mat_mkl_pardiso->nrhs = 1;
476bd5dbebeSNils Friess   PetscCall(VecGetArrayWrite(x, &xarray));
477bd5dbebeSNils Friess   PetscCall(VecGetArrayRead(b, &barray));
478bd5dbebeSNils Friess 
479bd5dbebeSNils Friess   mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION;
480bd5dbebeSNils Friess 
481aaaa354bSBarry Smith   PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
482aaaa354bSBarry Smith                                &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err));
483e54da482SJose E. Roman   PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
484bd5dbebeSNils Friess 
485bd5dbebeSNils Friess   PetscCall(VecRestoreArrayRead(b, &barray));
486bd5dbebeSNils Friess   PetscCall(VecRestoreArrayWrite(x, &xarray));
487bd5dbebeSNils Friess   mat_mkl_pardiso->CleanUp = PETSC_TRUE;
488bd5dbebeSNils Friess   PetscFunctionReturn(PETSC_SUCCESS);
489bd5dbebeSNils Friess }
490bd5dbebeSNils Friess 
MatSolveTranspose_MKL_PARDISO(Mat A,Vec b,Vec x)49166976f2fSJacob Faibussowitsch static PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A, Vec b, Vec x)
492d71ae5a4SJacob Faibussowitsch {
493dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
4942ee68a28SStefano Zampini   PetscInt         oiparm12;
495d37c1e25SSatish Balay 
496d37c1e25SSatish Balay   PetscFunctionBegin;
4972ee68a28SStefano Zampini   oiparm12                       = mat_mkl_pardiso->iparm[12 - 1];
498d615f992SSatish Balay   mat_mkl_pardiso->iparm[12 - 1] = 2;
4999566063dSJacob Faibussowitsch   PetscCall(MatSolve_MKL_PARDISO(A, b, x));
5002ee68a28SStefano Zampini   mat_mkl_pardiso->iparm[12 - 1] = oiparm12;
5013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
502d37c1e25SSatish Balay }
503d37c1e25SSatish Balay 
MatMatSolve_MKL_PARDISO(Mat A,Mat B,Mat X)50466976f2fSJacob Faibussowitsch static PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A, Mat B, Mat X)
505d71ae5a4SJacob Faibussowitsch {
506bd5dbebeSNils Friess   Mat_MKL_PARDISO   *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
5071683a169SBarry Smith   const PetscScalar *barray;
5081683a169SBarry Smith   PetscScalar       *xarray;
509d37c1e25SSatish Balay   PetscBool          flg;
510d37c1e25SSatish Balay 
511d37c1e25SSatish Balay   PetscFunctionBegin;
5129566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)B, MATSEQDENSE, &flg));
51328b400f6SJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATSEQDENSE matrix");
514f9fb9879SHong Zhang   if (X != B) {
5159566063dSJacob Faibussowitsch     PetscCall(PetscObjectBaseTypeCompare((PetscObject)X, MATSEQDENSE, &flg));
51628b400f6SJacob Faibussowitsch     PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATSEQDENSE matrix");
517f9fb9879SHong Zhang   }
518d37c1e25SSatish Balay 
5199566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, NULL, (PetscInt *)&mat_mkl_pardiso->nrhs));
520d37c1e25SSatish Balay 
521d615f992SSatish Balay   if (mat_mkl_pardiso->nrhs > 0) {
5229566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayRead(B, &barray));
5239566063dSJacob Faibussowitsch     PetscCall(MatDenseGetArrayWrite(X, &xarray));
524d37c1e25SSatish Balay 
52508401ef6SPierre Jolivet     PetscCheck(barray != xarray, PETSC_COMM_SELF, PETSC_ERR_SUP, "B and X cannot share the same memory location");
526dfa13051SBarry Smith     if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT;
527dfa13051SBarry Smith     else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION;
528ad9a362dSStefano Zampini 
529aaaa354bSBarry Smith     PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja,
530aaaa354bSBarry Smith                                  mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err));
531e54da482SJose E. Roman     PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
5322254c79cSStefano Zampini 
5339566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayRead(B, &barray));
5342254c79cSStefano Zampini     if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */
5352254c79cSStefano Zampini       PetscScalar *o_schur_work = NULL;
536ee70c6cfSStefano Zampini 
537ee70c6cfSStefano Zampini       /* solve Schur complement */
538ee70c6cfSStefano Zampini       if (!mat_mkl_pardiso->solve_interior) {
5392254c79cSStefano Zampini         PetscInt shift = mat_mkl_pardiso->schur_size * mat_mkl_pardiso->nrhs, scale;
5402254c79cSStefano Zampini         PetscInt mem   = mat_mkl_pardiso->n * mat_mkl_pardiso->nrhs;
5412254c79cSStefano Zampini 
5429566063dSJacob Faibussowitsch         PetscCall(MatFactorFactorizeSchurComplement(A));
5432254c79cSStefano Zampini         /* allocate extra memory if it is needed */
5442254c79cSStefano Zampini         scale = 1;
545b3cb21ddSStefano Zampini         if (A->schur_status == MAT_FACTOR_SCHUR_INVERTED) scale = 2;
5462254c79cSStefano Zampini         mem *= scale;
5472254c79cSStefano Zampini         if (mem > mat_mkl_pardiso->schur_work_size) {
5482254c79cSStefano Zampini           o_schur_work = mat_mkl_pardiso->schur_work;
5499566063dSJacob Faibussowitsch           PetscCall(PetscMalloc1(mem, &mat_mkl_pardiso->schur_work));
5502254c79cSStefano Zampini         }
5512254c79cSStefano Zampini         /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */
552b3cb21ddSStefano Zampini         if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0;
5539566063dSJacob Faibussowitsch         PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE));
5549566063dSJacob Faibussowitsch         PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift));
5559566063dSJacob Faibussowitsch         PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE));
556b3cb21ddSStefano Zampini       } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substituted to xarray[schur,n] will be neglected */
557ad9a362dSStefano Zampini         PetscInt i, n, m = 0;
558ad9a362dSStefano Zampini         for (n = 0; n < mat_mkl_pardiso->nrhs; n++) {
559ad540459SPierre Jolivet           for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i] + m] = 0.;
560ad9a362dSStefano Zampini           m += mat_mkl_pardiso->n;
561ad9a362dSStefano Zampini         }
562ad9a362dSStefano Zampini       }
563d5c7275dSStefano Zampini 
5642254c79cSStefano Zampini       /* expansion phase */
5652254c79cSStefano Zampini       mat_mkl_pardiso->iparm[6 - 1] = 1;
5662254c79cSStefano Zampini       mat_mkl_pardiso->phase        = JOB_SOLVE_BACKWARD_SUBSTITUTION;
567aaaa354bSBarry Smith       PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja,
568aaaa354bSBarry Smith                                    mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */
569aaaa354bSBarry Smith                                    &mat_mkl_pardiso->err));
5701d27aa22SBarry Smith       if (o_schur_work) { /* restore original Schur_work (minimal size) */
5719566063dSJacob Faibussowitsch         PetscCall(PetscFree(mat_mkl_pardiso->schur_work));
5722254c79cSStefano Zampini         mat_mkl_pardiso->schur_work = o_schur_work;
5732254c79cSStefano Zampini       }
574e54da482SJose E. Roman       PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
5752254c79cSStefano Zampini       mat_mkl_pardiso->iparm[6 - 1] = 0;
5762254c79cSStefano Zampini     }
5779566063dSJacob Faibussowitsch     PetscCall(MatDenseRestoreArrayWrite(X, &xarray));
578d37c1e25SSatish Balay   }
579d615f992SSatish Balay   mat_mkl_pardiso->CleanUp = PETSC_TRUE;
5803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
581d37c1e25SSatish Balay }
582d37c1e25SSatish Balay 
MatFactorNumeric_MKL_PARDISO(Mat F,Mat A,const MatFactorInfo * info)58366976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F, Mat A, const MatFactorInfo *info)
584d71ae5a4SJacob Faibussowitsch {
585bd5dbebeSNils Friess   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
586d37c1e25SSatish Balay 
587d37c1e25SSatish Balay   PetscFunctionBegin;
588d615f992SSatish Balay   mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
5899566063dSJacob Faibussowitsch   PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_REUSE_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a));
590d37c1e25SSatish Balay 
591d615f992SSatish Balay   mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION;
592aaaa354bSBarry Smith   PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
593aaaa354bSBarry Smith                                &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, (void *)mat_mkl_pardiso->schur, &mat_mkl_pardiso->err));
594e54da482SJose E. Roman   PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
595d37c1e25SSatish Balay 
5967c15a11cSStefano Zampini   /* report flops */
59748a46eb9SPierre Jolivet   if (mat_mkl_pardiso->iparm[18] > 0) PetscCall(PetscLogFlops(PetscPowRealInt(10., 6) * mat_mkl_pardiso->iparm[18]));
5987c15a11cSStefano Zampini 
599b3cb21ddSStefano Zampini   if (F->schur) { /* schur output from pardiso is in row major format */
6007c15a11cSStefano Zampini #if defined(PETSC_HAVE_CUDA)
601c70f7ee4SJunchao Zhang     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
6027c15a11cSStefano Zampini #endif
6039566063dSJacob Faibussowitsch     PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED));
6049566063dSJacob Faibussowitsch     PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur));
6052254c79cSStefano Zampini   }
606d615f992SSatish Balay   mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN;
607d615f992SSatish Balay   mat_mkl_pardiso->CleanUp  = PETSC_TRUE;
6083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
609d37c1e25SSatish Balay }
610d37c1e25SSatish Balay 
MatSetFromOptions_MKL_PARDISO(Mat F,Mat A)61166976f2fSJacob Faibussowitsch static PetscErrorCode MatSetFromOptions_MKL_PARDISO(Mat F, Mat A)
612d71ae5a4SJacob Faibussowitsch {
613dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
614560a203cSprj-   PetscInt         icntl, bs, threads = 1;
615d37c1e25SSatish Balay   PetscBool        flg;
616d37c1e25SSatish Balay 
617d37c1e25SSatish Balay   PetscFunctionBegin;
61826cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL_PARDISO Options", "Mat");
619d37c1e25SSatish Balay 
6201d27aa22SBarry Smith   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_65", "Suggested number of threads to use within MKL PARDISO", "None", threads, &threads, &flg));
62161ce1fc3SStefano Zampini   if (flg) PetscSetMKL_PARDISOThreads((int)threads);
62261ce1fc3SStefano Zampini 
6239566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_pardiso->maxfct, &icntl, &flg));
624d615f992SSatish Balay   if (flg) mat_mkl_pardiso->maxfct = icntl;
625d37c1e25SSatish Balay 
6269566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_pardiso->mnum, &icntl, &flg));
627d615f992SSatish Balay   if (flg) mat_mkl_pardiso->mnum = icntl;
628d37c1e25SSatish Balay 
6299566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_68", "Message level information", "None", mat_mkl_pardiso->msglvl, &icntl, &flg));
630d615f992SSatish Balay   if (flg) mat_mkl_pardiso->msglvl = icntl;
631d37c1e25SSatish Balay 
6329566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_69", "Defines the matrix type", "None", mat_mkl_pardiso->mtype, &icntl, &flg));
633d37c1e25SSatish Balay   if (flg) {
6347b37fee5SLisandro Dalcin     void *pt[IPARM_SIZE];
635d615f992SSatish Balay     mat_mkl_pardiso->mtype = icntl;
636560a203cSprj-     icntl                  = mat_mkl_pardiso->iparm[34];
637560a203cSprj-     bs                     = mat_mkl_pardiso->iparm[36];
6387b37fee5SLisandro Dalcin     MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
639d37c1e25SSatish Balay #if defined(PETSC_USE_REAL_SINGLE)
640d615f992SSatish Balay     mat_mkl_pardiso->iparm[27] = 1;
641d37c1e25SSatish Balay #else
642d615f992SSatish Balay     mat_mkl_pardiso->iparm[27] = 0;
643d37c1e25SSatish Balay #endif
644560a203cSprj-     mat_mkl_pardiso->iparm[34] = icntl;
645560a203cSprj-     mat_mkl_pardiso->iparm[36] = bs;
646d37c1e25SSatish Balay   }
647d37c1e25SSatish Balay 
6489566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_1", "Use default values (if 0)", "None", mat_mkl_pardiso->iparm[0], &icntl, &flg));
649ee70c6cfSStefano Zampini   if (flg) mat_mkl_pardiso->iparm[0] = icntl;
650ee70c6cfSStefano Zampini 
6519566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_pardiso->iparm[1], &icntl, &flg));
652d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[1] = icntl;
653d37c1e25SSatish Balay 
6549566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_pardiso->iparm[3], &icntl, &flg));
655d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[3] = icntl;
656d37c1e25SSatish Balay 
6579566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_5", "User permutation", "None", mat_mkl_pardiso->iparm[4], &icntl, &flg));
658d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[4] = icntl;
659d37c1e25SSatish Balay 
6609566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_6", "Write solution on x", "None", mat_mkl_pardiso->iparm[5], &icntl, &flg));
661d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[5] = icntl;
662d37c1e25SSatish Balay 
6639566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_8", "Iterative refinement step", "None", mat_mkl_pardiso->iparm[7], &icntl, &flg));
664d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[7] = icntl;
665d37c1e25SSatish Balay 
6669566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_10", "Pivoting perturbation", "None", mat_mkl_pardiso->iparm[9], &icntl, &flg));
667d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[9] = icntl;
668d37c1e25SSatish Balay 
6699566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_11", "Scaling vectors", "None", mat_mkl_pardiso->iparm[10], &icntl, &flg));
670d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[10] = icntl;
671d37c1e25SSatish Balay 
6729566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_pardiso->iparm[11], &icntl, &flg));
673d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[11] = icntl;
674d37c1e25SSatish Balay 
6759566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_pardiso->iparm[12], &icntl, &flg));
676d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[12] = icntl;
677d37c1e25SSatish Balay 
6789566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_18", "Numbers of non-zero elements", "None", mat_mkl_pardiso->iparm[17], &icntl, &flg));
679d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[17] = icntl;
680d37c1e25SSatish Balay 
6819566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_19", "Report number of floating point operations (0 to disable)", "None", mat_mkl_pardiso->iparm[18], &icntl, &flg));
682d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[18] = icntl;
683d37c1e25SSatish Balay 
6849566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_pardiso->iparm[20], &icntl, &flg));
685d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[20] = icntl;
686d37c1e25SSatish Balay 
6879566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_24", "Parallel factorization control", "None", mat_mkl_pardiso->iparm[23], &icntl, &flg));
688d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[23] = icntl;
689d37c1e25SSatish Balay 
6909566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_pardiso->iparm[24], &icntl, &flg));
691d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[24] = icntl;
692d37c1e25SSatish Balay 
6939566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_27", "Matrix checker", "None", mat_mkl_pardiso->iparm[26], &icntl, &flg));
694d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[26] = icntl;
695d37c1e25SSatish Balay 
6969566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_pardiso->iparm[30], &icntl, &flg));
697d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[30] = icntl;
698d37c1e25SSatish Balay 
6999566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_pardiso->iparm[33], &icntl, &flg));
700d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[33] = icntl;
701d37c1e25SSatish Balay 
7021d27aa22SBarry Smith   PetscCall(PetscOptionsInt("-mat_mkl_pardiso_60", "Intel MKL PARDISO mode", "None", mat_mkl_pardiso->iparm[59], &icntl, &flg));
703d615f992SSatish Balay   if (flg) mat_mkl_pardiso->iparm[59] = icntl;
704d37c1e25SSatish Balay   PetscOptionsEnd();
7053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
706d37c1e25SSatish Balay }
707d37c1e25SSatish Balay 
MatFactorMKL_PARDISOInitialize_Private(Mat A,MatFactorType ftype,Mat_MKL_PARDISO * mat_mkl_pardiso)70866976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso)
709d71ae5a4SJacob Faibussowitsch {
71051d8ba46SPierre Jolivet   PetscInt  i, bs;
71151d8ba46SPierre Jolivet   PetscBool match;
712d37c1e25SSatish Balay 
713d37c1e25SSatish Balay   PetscFunctionBegin;
71451d8ba46SPierre Jolivet   for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->iparm[i] = 0;
71551d8ba46SPierre Jolivet   for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->pt[i] = 0;
716560a203cSprj- #if defined(PETSC_USE_REAL_SINGLE)
717560a203cSprj-   mat_mkl_pardiso->iparm[27] = 1;
718560a203cSprj- #else
719560a203cSprj-   mat_mkl_pardiso->iparm[27] = 0;
720560a203cSprj- #endif
72139bb8a47SPascal Tremblay   /* Default options for both sym and unsym */
722da81f932SPierre Jolivet   mat_mkl_pardiso->iparm[0]  = 1;  /* Solver default parameters overridden with provided by iparm */
72339bb8a47SPascal Tremblay   mat_mkl_pardiso->iparm[1]  = 2;  /* Metis reordering */
72439bb8a47SPascal Tremblay   mat_mkl_pardiso->iparm[5]  = 0;  /* Write solution into x */
725de1ba746SStefano Zampini   mat_mkl_pardiso->iparm[7]  = 0;  /* Max number of iterative refinement steps */
72639bb8a47SPascal Tremblay   mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */
72739bb8a47SPascal Tremblay   mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */
72839bb8a47SPascal Tremblay #if 0
72939bb8a47SPascal Tremblay   mat_mkl_pardiso->iparm[23] =  1; /* Parallel factorization control*/
73039bb8a47SPascal Tremblay #endif
7319566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &match, MATSEQBAIJ, MATSEQSBAIJ, ""));
7329566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(A, &bs));
73351d8ba46SPierre Jolivet   if (!match || bs == 1) {
73439bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */
73551d8ba46SPierre Jolivet     mat_mkl_pardiso->n         = A->rmap->N;
73651d8ba46SPierre Jolivet   } else {
73751d8ba46SPierre Jolivet     mat_mkl_pardiso->iparm[34] = 0; /* Cluster Sparse Solver use Fortran-style indexing for ia and ja arrays */
73851d8ba46SPierre Jolivet     mat_mkl_pardiso->iparm[36] = bs;
73951d8ba46SPierre Jolivet     mat_mkl_pardiso->n         = A->rmap->N / bs;
74051d8ba46SPierre Jolivet   }
7419dddd249SSatish Balay   mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on rank-0 */
74239bb8a47SPascal Tremblay 
743d615f992SSatish Balay   mat_mkl_pardiso->CleanUp = PETSC_FALSE;
74439bb8a47SPascal Tremblay   mat_mkl_pardiso->maxfct  = 1; /* Maximum number of numerical factorizations. */
74539bb8a47SPascal Tremblay   mat_mkl_pardiso->mnum    = 1; /* Which factorization to use. */
74639bb8a47SPascal Tremblay   mat_mkl_pardiso->msglvl  = 0; /* 0: do not print 1: Print statistical information in file */
74739bb8a47SPascal Tremblay   mat_mkl_pardiso->phase   = -1;
74839bb8a47SPascal Tremblay   mat_mkl_pardiso->err     = 0;
74939bb8a47SPascal Tremblay 
750d615f992SSatish Balay   mat_mkl_pardiso->nrhs  = 1;
751d615f992SSatish Balay   mat_mkl_pardiso->err   = 0;
752d615f992SSatish Balay   mat_mkl_pardiso->phase = -1;
7530f21f8ebSPascal Tremblay 
7540f21f8ebSPascal Tremblay   if (ftype == MAT_FACTOR_LU) {
75539bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[9]  = 13; /* Perturb the pivot elements with 1E-13 */
75639bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[10] = 1;  /* Use nonsymmetric permutation and scaling MPS */
75739bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[12] = 1;  /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
7580f21f8ebSPascal Tremblay   } else {
7597c15a11cSStefano Zampini     mat_mkl_pardiso->iparm[9]  = 8; /* Perturb the pivot elements with 1E-8 */
76039bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
76139bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */
76239bb8a47SPascal Tremblay #if defined(PETSC_USE_DEBUG)
76339bb8a47SPascal Tremblay     mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */
76439bb8a47SPascal Tremblay #endif
76539bb8a47SPascal Tremblay   }
7669566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->N * sizeof(INT_TYPE), &mat_mkl_pardiso->perm));
7672254c79cSStefano Zampini   mat_mkl_pardiso->schur_size = 0;
7683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
769d37c1e25SSatish Balay }
770d37c1e25SSatish Balay 
MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F,Mat A,const MatFactorInfo * info)77166976f2fSJacob Faibussowitsch static PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F, Mat A, const MatFactorInfo *info)
772d71ae5a4SJacob Faibussowitsch {
773dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
774d37c1e25SSatish Balay 
775d37c1e25SSatish Balay   PetscFunctionBegin;
776d615f992SSatish Balay   mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN;
77726cc229bSBarry Smith   PetscCall(MatSetFromOptions_MKL_PARDISO(F, A));
778a578bbc7SStefano Zampini   /* throw away any previously computed structure */
779de1ba746SStefano Zampini   if (mat_mkl_pardiso->freeaij) {
7809566063dSJacob Faibussowitsch     PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja));
78148a46eb9SPierre Jolivet     if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a));
78251d8ba46SPierre Jolivet   }
7839566063dSJacob Faibussowitsch   PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_INITIAL_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a));
78451d8ba46SPierre Jolivet   if (mat_mkl_pardiso->iparm[34] == 1) mat_mkl_pardiso->n = A->rmap->N;
78551d8ba46SPierre Jolivet   else mat_mkl_pardiso->n = A->rmap->N / A->rmap->bs;
786d37c1e25SSatish Balay 
787d615f992SSatish Balay   mat_mkl_pardiso->phase = JOB_ANALYSIS;
788d37c1e25SSatish Balay 
7897c15a11cSStefano Zampini   /* reset flops counting if requested */
7907c15a11cSStefano Zampini   if (mat_mkl_pardiso->iparm[18]) mat_mkl_pardiso->iparm[18] = -1;
7917c15a11cSStefano Zampini 
792aaaa354bSBarry Smith   PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm,
793aaaa354bSBarry Smith                                &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, NULL, &mat_mkl_pardiso->err));
794e54da482SJose E. Roman   PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err);
795d37c1e25SSatish Balay 
796d615f992SSatish Balay   mat_mkl_pardiso->CleanUp = PETSC_TRUE;
79739bb8a47SPascal Tremblay 
798dfa13051SBarry Smith   if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO;
799dfa13051SBarry Smith   else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO;
800dfa13051SBarry Smith 
801d615f992SSatish Balay   F->ops->solve          = MatSolve_MKL_PARDISO;
802d615f992SSatish Balay   F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO;
803d615f992SSatish Balay   F->ops->matsolve       = MatMatSolve_MKL_PARDISO;
804bd5dbebeSNils Friess   if (F->factortype == MAT_FACTOR_LU || (!PetscDefined(USE_COMPLEX) && F->factortype == MAT_FACTOR_CHOLESKY && A->spd == PETSC_BOOL3_TRUE)) {
805bd5dbebeSNils Friess     F->ops->backwardsolve = MatBackwardSolve_MKL_PARDISO;
806bd5dbebeSNils Friess     F->ops->forwardsolve  = MatForwardSolve_MKL_PARDISO;
807bd5dbebeSNils Friess   }
8083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
809d37c1e25SSatish Balay }
810d37c1e25SSatish Balay 
MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)81166976f2fSJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
812d71ae5a4SJacob Faibussowitsch {
81339bb8a47SPascal Tremblay   PetscFunctionBegin;
8149566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info));
8153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
81639bb8a47SPascal Tremblay }
81739bb8a47SPascal Tremblay 
81819d49a3bSHong Zhang #if !defined(PETSC_USE_COMPLEX)
MatGetInertia_MKL_PARDISO(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)81966976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
820d71ae5a4SJacob Faibussowitsch {
821dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
82219d49a3bSHong Zhang 
82319d49a3bSHong Zhang   PetscFunctionBegin;
82419d49a3bSHong Zhang   if (nneg) *nneg = mat_mkl_pardiso->iparm[22];
82519d49a3bSHong Zhang   if (npos) *npos = mat_mkl_pardiso->iparm[21];
82619d49a3bSHong Zhang   if (nzero) *nzero = F->rmap->N - (mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]);
8273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
82819d49a3bSHong Zhang }
82919d49a3bSHong Zhang #endif
83019d49a3bSHong Zhang 
MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F,Mat A,IS r,const MatFactorInfo * info)83166976f2fSJacob Faibussowitsch static PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, const MatFactorInfo *info)
832d71ae5a4SJacob Faibussowitsch {
83339bb8a47SPascal Tremblay   PetscFunctionBegin;
8349566063dSJacob Faibussowitsch   PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info));
83519d49a3bSHong Zhang   F->ops->getinertia = NULL;
8368ca48ce9SPierre Jolivet #if !defined(PETSC_USE_COMPLEX)
83719d49a3bSHong Zhang   F->ops->getinertia = MatGetInertia_MKL_PARDISO;
83819d49a3bSHong Zhang #endif
8393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
84039bb8a47SPascal Tremblay }
841d37c1e25SSatish Balay 
MatView_MKL_PARDISO(Mat A,PetscViewer viewer)84266976f2fSJacob Faibussowitsch static PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer)
843d71ae5a4SJacob Faibussowitsch {
8449f196a02SMartin Diehl   PetscBool         isascii;
845d37c1e25SSatish Balay   PetscViewerFormat format;
846dfa13051SBarry Smith   Mat_MKL_PARDISO  *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
847d37c1e25SSatish Balay   PetscInt          i;
848d37c1e25SSatish Balay 
849d37c1e25SSatish Balay   PetscFunctionBegin;
8503ba16761SJacob Faibussowitsch   if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(PETSC_SUCCESS);
851d37c1e25SSatish Balay 
8529f196a02SMartin Diehl   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
8539f196a02SMartin Diehl   if (isascii) {
8549566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
855d37c1e25SSatish Balay     if (format == PETSC_VIEWER_ASCII_INFO) {
8561d27aa22SBarry Smith       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO run parameters:\n"));
857e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO phase:             %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->phase));
858e54da482SJose E. Roman       for (i = 1; i <= 64; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO iparm[%" PetscInt_FMT "]:     %" PetscInt_FMT "\n", i, (PetscInt)mat_mkl_pardiso->iparm[i - 1]));
859e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO maxfct:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->maxfct));
860e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO mnum:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->mnum));
861e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO mtype:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->mtype));
862e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO n:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->n));
863e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO nrhs:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->nrhs));
864e54da482SJose E. Roman       PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO msglvl:     %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->msglvl));
865d37c1e25SSatish Balay     }
866d37c1e25SSatish Balay   }
8673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
868d37c1e25SSatish Balay }
869d37c1e25SSatish Balay 
MatGetInfo_MKL_PARDISO(Mat A,MatInfoType flag,MatInfo * info)87066976f2fSJacob Faibussowitsch static PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info)
871d71ae5a4SJacob Faibussowitsch {
872dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data;
873d37c1e25SSatish Balay 
874d37c1e25SSatish Balay   PetscFunctionBegin;
875d37c1e25SSatish Balay   info->block_size        = 1.0;
8767c15a11cSStefano Zampini   info->nz_used           = mat_mkl_pardiso->iparm[17];
8777c15a11cSStefano Zampini   info->nz_allocated      = mat_mkl_pardiso->iparm[17];
878d37c1e25SSatish Balay   info->nz_unneeded       = 0.0;
879d37c1e25SSatish Balay   info->assemblies        = 0.0;
880d37c1e25SSatish Balay   info->mallocs           = 0.0;
881d37c1e25SSatish Balay   info->memory            = 0.0;
882d37c1e25SSatish Balay   info->fill_ratio_given  = 0;
883d37c1e25SSatish Balay   info->fill_ratio_needed = 0;
884d37c1e25SSatish Balay   info->factor_mallocs    = 0;
8853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
886d37c1e25SSatish Balay }
887d37c1e25SSatish Balay 
MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F,PetscInt icntl,PetscInt ival)88866976f2fSJacob Faibussowitsch static PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F, PetscInt icntl, PetscInt ival)
889d71ae5a4SJacob Faibussowitsch {
890560a203cSprj-   PetscInt         backup, bs;
891dfa13051SBarry Smith   Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data;
8920f21f8ebSPascal Tremblay 
893d37c1e25SSatish Balay   PetscFunctionBegin;
894d37c1e25SSatish Balay   if (icntl <= 64) {
895d615f992SSatish Balay     mat_mkl_pardiso->iparm[icntl - 1] = ival;
896d37c1e25SSatish Balay   } else {
89730404b96SJose E. Roman     if (icntl == 65) PetscSetMKL_PARDISOThreads((int)ival);
898dfa13051SBarry Smith     else if (icntl == 66) mat_mkl_pardiso->maxfct = ival;
899dfa13051SBarry Smith     else if (icntl == 67) mat_mkl_pardiso->mnum = ival;
900dfa13051SBarry Smith     else if (icntl == 68) mat_mkl_pardiso->msglvl = ival;
901d37c1e25SSatish Balay     else if (icntl == 69) {
9027b37fee5SLisandro Dalcin       void *pt[IPARM_SIZE];
903560a203cSprj-       backup                 = mat_mkl_pardiso->iparm[34];
904560a203cSprj-       bs                     = mat_mkl_pardiso->iparm[36];
905d615f992SSatish Balay       mat_mkl_pardiso->mtype = ival;
9067b37fee5SLisandro Dalcin       MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm);
907d37c1e25SSatish Balay #if defined(PETSC_USE_REAL_SINGLE)
908d615f992SSatish Balay       mat_mkl_pardiso->iparm[27] = 1;
909d37c1e25SSatish Balay #else
910d615f992SSatish Balay       mat_mkl_pardiso->iparm[27] = 0;
911d37c1e25SSatish Balay #endif
912560a203cSprj-       mat_mkl_pardiso->iparm[34] = backup;
913560a203cSprj-       mat_mkl_pardiso->iparm[36] = bs;
914dfa13051SBarry Smith     } else if (icntl == 70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival;
915d37c1e25SSatish Balay   }
9163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
917d37c1e25SSatish Balay }
918d37c1e25SSatish Balay 
919d37c1e25SSatish Balay /*@
9201d27aa22SBarry Smith   MatMkl_PardisoSetCntl - Set MKL PARDISO <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html> parameters
921d37c1e25SSatish Balay 
922c3339decSBarry Smith   Logically Collective
923d37c1e25SSatish Balay 
924d37c1e25SSatish Balay   Input Parameters:
92511a5261eSBarry Smith + F     - the factored matrix obtained by calling `MatGetFactor()`
9261d27aa22SBarry Smith . icntl - index of MKL PARDISO parameter
9271d27aa22SBarry Smith - ival  - value of MKL PARDISO parameter
928d37c1e25SSatish Balay 
9293c7db156SBarry Smith   Options Database Key:
930147403d9SBarry Smith . -mat_mkl_pardiso_<icntl> <ival> - change the option numbered icntl to the value ival
931d37c1e25SSatish Balay 
932d37c1e25SSatish Balay   Level: beginner
933d37c1e25SSatish Balay 
9341cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSOLVERMKL_PARDISO`, `MatGetFactor()`
935d37c1e25SSatish Balay @*/
MatMkl_PardisoSetCntl(Mat F,PetscInt icntl,PetscInt ival)936d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMkl_PardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival)
937d71ae5a4SJacob Faibussowitsch {
938d37c1e25SSatish Balay   PetscFunctionBegin;
939cac4c232SBarry Smith   PetscTryMethod(F, "MatMkl_PardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival));
9403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
941d37c1e25SSatish Balay }
942d37c1e25SSatish Balay 
943d37c1e25SSatish Balay /*MC
94411a5261eSBarry Smith   MATSOLVERMKL_PARDISO -  A matrix type providing direct solvers, LU, for
9451d27aa22SBarry Smith   `MATSEQAIJ` matrices via the external package MKL PARDISO
9461d27aa22SBarry Smith   <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-0/onemkl-pardiso-parallel-direct-sparse-solver-iface.html>.
947d37c1e25SSatish Balay 
9482ef1f0ffSBarry Smith   Use `-pc_type lu` `-pc_factor_mat_solver_type mkl_pardiso` to use this direct solver
949c2b89b5dSBarry Smith 
950d37c1e25SSatish Balay   Options Database Keys:
9511d27aa22SBarry Smith + -mat_mkl_pardiso_65 - Suggested number of threads to use within MKL PARDISO
952d615f992SSatish Balay . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time
953d615f992SSatish Balay . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase
9549b4359b8SBarry Smith . -mat_mkl_pardiso_68 - Message level information, use 1 to get detailed information on the solver options
955d615f992SSatish Balay . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type
956d615f992SSatish Balay . -mat_mkl_pardiso_1  - Use default values
957d615f992SSatish Balay . -mat_mkl_pardiso_2  - Fill-in reducing ordering for the input matrix
958d615f992SSatish Balay . -mat_mkl_pardiso_4  - Preconditioned CGS/CG
959d615f992SSatish Balay . -mat_mkl_pardiso_5  - User permutation
960d615f992SSatish Balay . -mat_mkl_pardiso_6  - Write solution on x
961d615f992SSatish Balay . -mat_mkl_pardiso_8  - Iterative refinement step
962d615f992SSatish Balay . -mat_mkl_pardiso_10 - Pivoting perturbation
963d615f992SSatish Balay . -mat_mkl_pardiso_11 - Scaling vectors
964d615f992SSatish Balay . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A
965d615f992SSatish Balay . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching
966d615f992SSatish Balay . -mat_mkl_pardiso_18 - Numbers of non-zero elements
967d615f992SSatish Balay . -mat_mkl_pardiso_19 - Report number of floating point operations
968d615f992SSatish Balay . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices
969d615f992SSatish Balay . -mat_mkl_pardiso_24 - Parallel factorization control
970d615f992SSatish Balay . -mat_mkl_pardiso_25 - Parallel forward/backward solve control
971d615f992SSatish Balay . -mat_mkl_pardiso_27 - Matrix checker
972d615f992SSatish Balay . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors
973d615f992SSatish Balay . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode
9741d27aa22SBarry Smith - -mat_mkl_pardiso_60 - Intel MKL PARDISO mode
975d37c1e25SSatish Balay 
976d37c1e25SSatish Balay   Level: beginner
977d37c1e25SSatish Balay 
9789b4359b8SBarry Smith   Notes:
9792ef1f0ffSBarry Smith   Use `-mat_mkl_pardiso_68 1` to display the number of threads the solver is using. MKL does not provide a way to directly access this
9809b4359b8SBarry Smith   information.
9819b4359b8SBarry Smith 
9821d27aa22SBarry Smith   For more information on the options check the MKL PARDISO manual
983d37c1e25SSatish Balay 
9841d27aa22SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_PardisoSetCntl()`, `MATSOLVERMKL_CPARDISO`
985d37c1e25SSatish Balay M*/
MatFactorGetSolverType_mkl_pardiso(Mat A,MatSolverType * type)986d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_mkl_pardiso(Mat A, MatSolverType *type)
987d71ae5a4SJacob Faibussowitsch {
988d37c1e25SSatish Balay   PetscFunctionBegin;
989d615f992SSatish Balay   *type = MATSOLVERMKL_PARDISO;
9903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
991d37c1e25SSatish Balay }
992d37c1e25SSatish Balay 
MatGetFactor_aij_mkl_pardiso(Mat A,MatFactorType ftype,Mat * F)993d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A, MatFactorType ftype, Mat *F)
994d71ae5a4SJacob Faibussowitsch {
99539bb8a47SPascal Tremblay   Mat              B;
99639bb8a47SPascal Tremblay   Mat_MKL_PARDISO *mat_mkl_pardiso;
997de1ba746SStefano Zampini   PetscBool        isSeqAIJ, isSeqBAIJ, isSeqSBAIJ;
99839bb8a47SPascal Tremblay 
99939bb8a47SPascal Tremblay   PetscFunctionBegin;
10009566063dSJacob Faibussowitsch   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ));
10019566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ));
10029566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ));
10039566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
10049566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
10059566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("mkl_pardiso", &((PetscObject)B)->type_name));
10069566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
100739bb8a47SPascal Tremblay 
10084dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mat_mkl_pardiso));
1009dfa13051SBarry Smith   B->data = mat_mkl_pardiso;
1010de1ba746SStefano Zampini 
10119566063dSJacob Faibussowitsch   PetscCall(MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso));
10122254c79cSStefano Zampini   if (ftype == MAT_FACTOR_LU) {
101339bb8a47SPascal Tremblay     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO;
101439bb8a47SPascal Tremblay     B->factortype            = MAT_FACTOR_LU;
1015a578bbc7SStefano Zampini     mat_mkl_pardiso->needsym = PETSC_FALSE;
1016dfa13051SBarry Smith     if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1017dfa13051SBarry Smith     else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1018f7d195e4SLawrence Mitchell     else {
10191d27aa22SBarry Smith       PetscCheck(!isSeqSBAIJ, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead");
10201d27aa22SBarry Smith       SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO LU with %s format", ((PetscObject)A)->type_name);
1021f7d195e4SLawrence Mitchell     }
1022de1ba746SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1023de1ba746SStefano Zampini     mat_mkl_pardiso->mtype = 13;
1024de1ba746SStefano Zampini #else
1025a6d73b37SStefano Zampini     mat_mkl_pardiso->mtype = 11;
1026de1ba746SStefano Zampini #endif
1027de1ba746SStefano Zampini   } else {
10282254c79cSStefano Zampini     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO;
10292254c79cSStefano Zampini     B->factortype                  = MAT_FACTOR_CHOLESKY;
1030dfa13051SBarry Smith     if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij;
1031dfa13051SBarry Smith     else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij;
1032dfa13051SBarry Smith     else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij;
103398921bdaSJacob Faibussowitsch     else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with %s format", ((PetscObject)A)->type_name);
1034dfa13051SBarry Smith 
1035de1ba746SStefano Zampini     mat_mkl_pardiso->needsym = PETSC_TRUE;
103653888129SPierre Jolivet #if !defined(PETSC_USE_COMPLEX)
1037b94d7dedSBarry Smith     if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_pardiso->mtype = 2;
1038b3cb21ddSStefano Zampini     else mat_mkl_pardiso->mtype = -2;
103953888129SPierre Jolivet #else
104053888129SPierre Jolivet     mat_mkl_pardiso->mtype = 6;
10411d27aa22SBarry Smith     PetscCheck(A->hermitian != PETSC_BOOL3_TRUE, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO CHOLESKY with Hermitian matrices! Use MAT_FACTOR_LU instead");
104253888129SPierre Jolivet #endif
1043de1ba746SStefano Zampini   }
104439bb8a47SPascal Tremblay   B->ops->destroy = MatDestroy_MKL_PARDISO;
104539bb8a47SPascal Tremblay   B->ops->view    = MatView_MKL_PARDISO;
104639bb8a47SPascal Tremblay   B->ops->getinfo = MatGetInfo_MKL_PARDISO;
10477c15a11cSStefano Zampini   B->factortype   = ftype;
1048de1ba746SStefano Zampini   B->assembled    = PETSC_TRUE;
104939bb8a47SPascal Tremblay 
10509566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
10519566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERMKL_PARDISO, &B->solvertype));
105200c67f3bSHong Zhang 
10539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_pardiso));
10549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MKL_PARDISO));
10559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMkl_PardisoSetCntl_C", MatMkl_PardisoSetCntl_MKL_PARDISO));
1056d37c1e25SSatish Balay 
1057d37c1e25SSatish Balay   *F = B;
10583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1059d37c1e25SSatish Balay }
1060d37c1e25SSatish Balay 
MatSolverTypeRegister_MKL_Pardiso(void)1061d1f0640dSPierre Jolivet PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MKL_Pardiso(void)
1062d71ae5a4SJacob Faibussowitsch {
106342c9c57cSBarry Smith   PetscFunctionBegin;
10649566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso));
10659566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso));
10669566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso));
10679566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso));
10683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
106942c9c57cSBarry Smith }
1070