1 #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <../src/mat/impls/dense/seq/dense.h> 4 5 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 6 #define MKL_ILP64 7 #endif 8 #include <mkl_pardiso.h> 9 10 PETSC_EXTERN void PetscSetMKL_PARDISOThreads(int); 11 12 /* 13 * Possible mkl_pardiso phases that controls the execution of the solver. 14 * For more information check mkl_pardiso manual. 15 */ 16 #define JOB_ANALYSIS 11 17 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION 12 18 #define JOB_ANALYSIS_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 13 19 #define JOB_NUMERICAL_FACTORIZATION 22 20 #define JOB_NUMERICAL_FACTORIZATION_SOLVE_ITERATIVE_REFINEMENT 23 21 #define JOB_SOLVE_ITERATIVE_REFINEMENT 33 22 #define JOB_SOLVE_FORWARD_SUBSTITUTION 331 23 #define JOB_SOLVE_DIAGONAL_SUBSTITUTION 332 24 #define JOB_SOLVE_BACKWARD_SUBSTITUTION 333 25 #define JOB_RELEASE_OF_LU_MEMORY 0 26 #define JOB_RELEASE_OF_ALL_MEMORY -1 27 28 #define IPARM_SIZE 64 29 30 #if defined(PETSC_USE_64BIT_INDICES) 31 #if defined(PETSC_HAVE_MKL_INTEL_ILP64) 32 #define INT_TYPE long long int 33 #define MKL_PARDISO pardiso 34 #define MKL_PARDISO_INIT pardisoinit 35 #else 36 /* this is the case where the MKL BLAS/LAPACK are 32-bit integers but the 64-bit integer version of 37 of PARDISO code is used; hence the need for the 64 below*/ 38 #define INT_TYPE long long int 39 #define MKL_PARDISO pardiso_64 40 #define MKL_PARDISO_INIT pardiso_64init 41 void pardiso_64init(void *pt, INT_TYPE *mtype, INT_TYPE iparm[]) 42 { 43 PetscBLASInt iparm_copy[IPARM_SIZE], mtype_copy; 44 45 PetscCallVoid(PetscBLASIntCast(*mtype, &mtype_copy)); 46 pardisoinit(pt, &mtype_copy, iparm_copy); 47 for (PetscInt i = 0; i < IPARM_SIZE; i++) iparm[i] = iparm_copy[i]; 48 } 49 #endif 50 #else 51 #define INT_TYPE int 52 #define MKL_PARDISO pardiso 53 #define MKL_PARDISO_INIT pardisoinit 54 #endif 55 56 #define PetscCallPardiso(f) PetscStackCallExternalVoid("MKL_PARDISO", f); 57 58 /* 59 Internal data structure. 60 */ 61 typedef struct { 62 /* Configuration vector*/ 63 INT_TYPE iparm[IPARM_SIZE]; 64 65 /* 66 Internal MKL PARDISO memory location. 67 After the first call to MKL PARDISO do not modify pt, as that could cause a serious memory leak. 68 */ 69 void *pt[IPARM_SIZE]; 70 71 /* Basic MKL PARDISO info */ 72 INT_TYPE phase, maxfct, mnum, mtype, n, nrhs, msglvl, err; 73 74 /* Matrix nonzero structure and values */ 75 void *a; 76 INT_TYPE *ia, *ja; 77 78 /* Number of non-zero elements */ 79 INT_TYPE nz; 80 81 /* Row permutaton vector */ 82 INT_TYPE *perm; 83 84 /* Define if matrix preserves sparse structure. */ 85 MatStructure matstruc; 86 87 PetscBool needsym; 88 PetscBool freeaij; 89 90 /* Schur complement */ 91 PetscScalar *schur; 92 PetscInt schur_size; 93 PetscInt *schur_idxs; 94 PetscScalar *schur_work; 95 PetscBLASInt schur_work_size; 96 PetscBool solve_interior; 97 98 /* True if MKL PARDISO function have been used. */ 99 PetscBool CleanUp; 100 101 /* Conversion to a format suitable for MKL */ 102 PetscErrorCode (*Convert)(Mat, PetscBool, MatReuse, PetscBool *, INT_TYPE *, INT_TYPE **, INT_TYPE **, PetscScalar **); 103 } Mat_MKL_PARDISO; 104 105 static PetscErrorCode MatMKLPardiso_Convert_seqsbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v) 106 { 107 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 108 PetscInt bs = A->rmap->bs, i; 109 110 PetscFunctionBegin; 111 PetscCheck(sym, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen"); 112 *v = aa->a; 113 if (bs == 1) { /* already in the correct format */ 114 /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */ 115 *r = (INT_TYPE *)aa->i; 116 *c = (INT_TYPE *)aa->j; 117 *nnz = (INT_TYPE)aa->nz; 118 *free = PETSC_FALSE; 119 } else if (reuse == MAT_INITIAL_MATRIX) { 120 PetscInt m = A->rmap->n, nz = aa->nz; 121 PetscInt *row, *col; 122 PetscCall(PetscMalloc2(m + 1, &row, nz, &col)); 123 for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1; 124 for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1; 125 *r = (INT_TYPE *)row; 126 *c = (INT_TYPE *)col; 127 *nnz = (INT_TYPE)nz; 128 *free = PETSC_TRUE; 129 } 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 static PetscErrorCode MatMKLPardiso_Convert_seqbaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v) 134 { 135 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 136 PetscInt bs = A->rmap->bs, i; 137 138 PetscFunctionBegin; 139 if (!sym) { 140 *v = aa->a; 141 if (bs == 1) { /* already in the correct format */ 142 /* though PetscInt and INT_TYPE are of the same size since they are defined differently the Intel compiler requires a cast */ 143 *r = (INT_TYPE *)aa->i; 144 *c = (INT_TYPE *)aa->j; 145 *nnz = (INT_TYPE)aa->nz; 146 *free = PETSC_FALSE; 147 PetscFunctionReturn(PETSC_SUCCESS); 148 } else if (reuse == MAT_INITIAL_MATRIX) { 149 PetscInt m = A->rmap->n, nz = aa->nz; 150 PetscInt *row, *col; 151 PetscCall(PetscMalloc2(m + 1, &row, nz, &col)); 152 for (i = 0; i < m + 1; i++) row[i] = aa->i[i] + 1; 153 for (i = 0; i < nz; i++) col[i] = aa->j[i] + 1; 154 *r = (INT_TYPE *)row; 155 *c = (INT_TYPE *)col; 156 *nnz = (INT_TYPE)nz; 157 } 158 *free = PETSC_TRUE; 159 } else { 160 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "This should not happen"); 161 } 162 PetscFunctionReturn(PETSC_SUCCESS); 163 } 164 165 static PetscErrorCode MatMKLPardiso_Convert_seqaij(Mat A, PetscBool sym, MatReuse reuse, PetscBool *free, INT_TYPE *nnz, INT_TYPE **r, INT_TYPE **c, PetscScalar **v) 166 { 167 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 168 PetscScalar *aav; 169 170 PetscFunctionBegin; 171 PetscCall(MatSeqAIJGetArrayRead(A, (const PetscScalar **)&aav)); 172 if (!sym) { /* already in the correct format */ 173 *v = aav; 174 *r = (INT_TYPE *)aa->i; 175 *c = (INT_TYPE *)aa->j; 176 *nnz = (INT_TYPE)aa->nz; 177 *free = PETSC_FALSE; 178 } else if (reuse == MAT_INITIAL_MATRIX) { /* need to get the triangular part */ 179 PetscScalar *vals, *vv; 180 PetscInt *row, *col, *jj; 181 PetscInt m = A->rmap->n, nz, i; 182 const PetscInt *adiag; 183 184 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 185 nz = 0; 186 for (i = 0; i < m; i++) nz += aa->i[i + 1] - adiag[i]; 187 PetscCall(PetscMalloc2(m + 1, &row, nz, &col)); 188 PetscCall(PetscMalloc1(nz, &vals)); 189 jj = col; 190 vv = vals; 191 192 row[0] = 0; 193 for (i = 0; i < m; i++) { 194 PetscInt *aj = aa->j + adiag[i]; 195 PetscScalar *av = aav + adiag[i]; 196 PetscInt rl = aa->i[i + 1] - adiag[i], j; 197 198 for (j = 0; j < rl; j++) { 199 *jj = *aj; 200 jj++; 201 aj++; 202 *vv = *av; 203 vv++; 204 av++; 205 } 206 row[i + 1] = row[i] + rl; 207 } 208 *v = vals; 209 *r = (INT_TYPE *)row; 210 *c = (INT_TYPE *)col; 211 *nnz = (INT_TYPE)nz; 212 *free = PETSC_TRUE; 213 } else { 214 PetscScalar *vv; 215 PetscInt m = A->rmap->n, i; 216 const PetscInt *adiag; 217 218 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL)); 219 vv = *v; 220 for (i = 0; i < m; i++) { 221 PetscScalar *av = aav + adiag[i]; 222 PetscInt rl = aa->i[i + 1] - adiag[i], j; 223 for (j = 0; j < rl; j++) { 224 *vv = *av; 225 vv++; 226 av++; 227 } 228 } 229 *free = PETSC_TRUE; 230 } 231 PetscCall(MatSeqAIJRestoreArrayRead(A, (const PetscScalar **)&aav)); 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode MatMKLPardisoSolveSchur_Private(Mat F, PetscScalar *B, PetscScalar *X) 236 { 237 Mat_MKL_PARDISO *mpardiso = (Mat_MKL_PARDISO *)F->data; 238 Mat S, Xmat, Bmat; 239 MatFactorSchurStatus schurstatus; 240 241 PetscFunctionBegin; 242 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 243 PetscCheck(X != B || schurstatus != MAT_FACTOR_SCHUR_INVERTED, PETSC_COMM_SELF, PETSC_ERR_SUP, "X and B cannot point to the same address"); 244 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, B, &Bmat)); 245 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mpardiso->schur_size, mpardiso->nrhs, X, &Xmat)); 246 PetscCall(MatSetType(Bmat, ((PetscObject)S)->type_name)); 247 PetscCall(MatSetType(Xmat, ((PetscObject)S)->type_name)); 248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 249 PetscCall(MatBindToCPU(Xmat, S->boundtocpu)); 250 PetscCall(MatBindToCPU(Bmat, S->boundtocpu)); 251 #endif 252 253 #if defined(PETSC_USE_COMPLEX) 254 PetscCheck(mpardiso->iparm[12 - 1] != 1, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Hermitian solve not implemented yet"); 255 #endif 256 257 switch (schurstatus) { 258 case MAT_FACTOR_SCHUR_FACTORED: 259 if (!mpardiso->iparm[12 - 1]) { 260 PetscCall(MatMatSolve(S, Bmat, Xmat)); 261 } else { /* transpose solve */ 262 PetscCall(MatMatSolveTranspose(S, Bmat, Xmat)); 263 } 264 break; 265 case MAT_FACTOR_SCHUR_INVERTED: 266 PetscCall(MatProductCreateWithMat(S, Bmat, NULL, Xmat)); 267 if (!mpardiso->iparm[12 - 1]) { 268 PetscCall(MatProductSetType(Xmat, MATPRODUCT_AB)); 269 } else { /* transpose solve */ 270 PetscCall(MatProductSetType(Xmat, MATPRODUCT_AtB)); 271 } 272 PetscCall(MatProductSetFromOptions(Xmat)); 273 PetscCall(MatProductSymbolic(Xmat)); 274 PetscCall(MatProductNumeric(Xmat)); 275 PetscCall(MatProductClear(Xmat)); 276 break; 277 default: 278 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", (int)F->schur_status); 279 break; 280 } 281 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 282 PetscCall(MatDestroy(&Bmat)); 283 PetscCall(MatDestroy(&Xmat)); 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFactorSetSchurIS_MKL_PARDISO(Mat F, IS is) 288 { 289 Mat_MKL_PARDISO *mpardiso = (Mat_MKL_PARDISO *)F->data; 290 const PetscScalar *arr; 291 const PetscInt *idxs; 292 PetscInt size, i; 293 PetscMPIInt csize; 294 PetscBool sorted; 295 296 PetscFunctionBegin; 297 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &csize)); 298 PetscCheck(csize <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "MKL PARDISO parallel Schur complements not yet supported from PETSc"); 299 PetscCall(ISSorted(is, &sorted)); 300 PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS for MKL PARDISO Schur complements needs to be sorted"); 301 PetscCall(ISGetLocalSize(is, &size)); 302 PetscCall(PetscFree(mpardiso->schur_work)); 303 PetscCall(PetscBLASIntCast(PetscMax(mpardiso->n, 2 * size), &mpardiso->schur_work_size)); 304 PetscCall(PetscMalloc1(mpardiso->schur_work_size, &mpardiso->schur_work)); 305 PetscCall(MatDestroy(&F->schur)); 306 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 307 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 308 mpardiso->schur = (PetscScalar *)arr; 309 mpardiso->schur_size = size; 310 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 311 if (mpardiso->mtype == 2) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 312 313 PetscCall(PetscFree(mpardiso->schur_idxs)); 314 PetscCall(PetscMalloc1(size, &mpardiso->schur_idxs)); 315 PetscCall(PetscArrayzero(mpardiso->perm, mpardiso->n)); 316 PetscCall(ISGetIndices(is, &idxs)); 317 PetscCall(PetscArraycpy(mpardiso->schur_idxs, idxs, size)); 318 for (i = 0; i < size; i++) mpardiso->perm[idxs[i]] = 1; 319 PetscCall(ISRestoreIndices(is, &idxs)); 320 if (size) { /* turn on Schur switch if the set of indices is not empty */ 321 mpardiso->iparm[36 - 1] = 2; 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static PetscErrorCode MatDestroy_MKL_PARDISO(Mat A) 327 { 328 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 329 330 PetscFunctionBegin; 331 if (mat_mkl_pardiso->CleanUp) { 332 mat_mkl_pardiso->phase = JOB_RELEASE_OF_ALL_MEMORY; 333 334 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, NULL, NULL, NULL, NULL, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, 335 &mat_mkl_pardiso->msglvl, NULL, NULL, &mat_mkl_pardiso->err)); 336 } 337 PetscCall(PetscFree(mat_mkl_pardiso->perm)); 338 PetscCall(PetscFree(mat_mkl_pardiso->schur_work)); 339 PetscCall(PetscFree(mat_mkl_pardiso->schur_idxs)); 340 if (mat_mkl_pardiso->freeaij) { 341 PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja)); 342 if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a)); 343 } 344 PetscCall(PetscFree(A->data)); 345 346 /* clear composed functions */ 347 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 348 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 349 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMkl_PardisoSetCntl_C", NULL)); 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 static PetscErrorCode MatMKLPardisoScatterSchur_Private(Mat_MKL_PARDISO *mpardiso, PetscScalar *whole, PetscScalar *schur, PetscBool reduce) 354 { 355 PetscFunctionBegin; 356 if (reduce) { /* data given for the whole matrix */ 357 PetscInt i, m = 0, p = 0; 358 for (i = 0; i < mpardiso->nrhs; i++) { 359 PetscInt j; 360 for (j = 0; j < mpardiso->schur_size; j++) schur[p + j] = whole[m + mpardiso->schur_idxs[j]]; 361 m += mpardiso->n; 362 p += mpardiso->schur_size; 363 } 364 } else { /* from Schur to whole */ 365 PetscInt i, m = 0, p = 0; 366 for (i = 0; i < mpardiso->nrhs; i++) { 367 PetscInt j; 368 for (j = 0; j < mpardiso->schur_size; j++) whole[m + mpardiso->schur_idxs[j]] = schur[p + j]; 369 m += mpardiso->n; 370 p += mpardiso->schur_size; 371 } 372 } 373 PetscFunctionReturn(PETSC_SUCCESS); 374 } 375 376 static PetscErrorCode MatSolve_MKL_PARDISO(Mat A, Vec b, Vec x) 377 { 378 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 379 PetscScalar *xarray; 380 const PetscScalar *barray; 381 382 PetscFunctionBegin; 383 mat_mkl_pardiso->nrhs = 1; 384 PetscCall(VecGetArrayWrite(x, &xarray)); 385 PetscCall(VecGetArrayRead(b, &barray)); 386 387 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 388 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 389 390 if (barray == xarray) { /* if the two vectors share the same memory */ 391 PetscScalar *work; 392 if (!mat_mkl_pardiso->schur_work) { 393 PetscCall(PetscMalloc1(mat_mkl_pardiso->n, &work)); 394 } else { 395 work = mat_mkl_pardiso->schur_work; 396 } 397 mat_mkl_pardiso->iparm[6 - 1] = 1; 398 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, NULL, 399 &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)work, &mat_mkl_pardiso->err)); 400 if (!mat_mkl_pardiso->schur_work) PetscCall(PetscFree(work)); 401 } else { 402 mat_mkl_pardiso->iparm[6 - 1] = 0; 403 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, 404 mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err)); 405 } 406 PetscCall(VecRestoreArrayRead(b, &barray)); 407 408 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 409 410 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 411 if (!mat_mkl_pardiso->solve_interior) { 412 PetscInt shift = mat_mkl_pardiso->schur_size; 413 414 PetscCall(MatFactorFactorizeSchurComplement(A)); 415 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 416 if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0; 417 418 /* solve Schur complement */ 419 PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE)); 420 PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift)); 421 PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE)); 422 } else { /* if we are solving for the interior problem, any value in barray[schur] forward-substituted to xarray[schur] will be neglected */ 423 PetscInt i; 424 for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i]] = 0.; 425 } 426 427 /* expansion phase */ 428 mat_mkl_pardiso->iparm[6 - 1] = 1; 429 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 430 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, 431 mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 432 &mat_mkl_pardiso->err)); 433 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 434 mat_mkl_pardiso->iparm[6 - 1] = 0; 435 } 436 PetscCall(VecRestoreArrayWrite(x, &xarray)); 437 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 438 PetscFunctionReturn(PETSC_SUCCESS); 439 } 440 441 static PetscErrorCode MatForwardSolve_MKL_PARDISO(Mat A, Vec b, Vec x) 442 { 443 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 444 PetscScalar *xarray; 445 const PetscScalar *barray; 446 447 PetscFunctionBegin; 448 PetscCheck(!mat_mkl_pardiso->schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Forward substitution not supported with Schur complement"); 449 450 mat_mkl_pardiso->nrhs = 1; 451 PetscCall(VecGetArrayWrite(x, &xarray)); 452 PetscCall(VecGetArrayRead(b, &barray)); 453 454 mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 455 456 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm, 457 &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err)); 458 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 459 460 PetscCall(VecRestoreArrayRead(b, &barray)); 461 PetscCall(VecRestoreArrayWrite(x, &xarray)); 462 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode MatBackwardSolve_MKL_PARDISO(Mat A, Vec b, Vec x) 467 { 468 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 469 PetscScalar *xarray; 470 const PetscScalar *barray; 471 472 PetscFunctionBegin; 473 PetscCheck(!mat_mkl_pardiso->schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Backward substitution not supported with Schur complement"); 474 475 mat_mkl_pardiso->nrhs = 1; 476 PetscCall(VecGetArrayWrite(x, &xarray)); 477 PetscCall(VecGetArrayRead(b, &barray)); 478 479 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 480 481 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm, 482 &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err)); 483 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 484 485 PetscCall(VecRestoreArrayRead(b, &barray)); 486 PetscCall(VecRestoreArrayWrite(x, &xarray)); 487 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } 490 491 static PetscErrorCode MatSolveTranspose_MKL_PARDISO(Mat A, Vec b, Vec x) 492 { 493 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 494 PetscInt oiparm12; 495 496 PetscFunctionBegin; 497 oiparm12 = mat_mkl_pardiso->iparm[12 - 1]; 498 mat_mkl_pardiso->iparm[12 - 1] = 2; 499 PetscCall(MatSolve_MKL_PARDISO(A, b, x)); 500 mat_mkl_pardiso->iparm[12 - 1] = oiparm12; 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 static PetscErrorCode MatMatSolve_MKL_PARDISO(Mat A, Mat B, Mat X) 505 { 506 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 507 const PetscScalar *barray; 508 PetscScalar *xarray; 509 PetscBool flg; 510 511 PetscFunctionBegin; 512 PetscCall(PetscObjectBaseTypeCompare((PetscObject)B, MATSEQDENSE, &flg)); 513 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATSEQDENSE matrix"); 514 if (X != B) { 515 PetscCall(PetscObjectBaseTypeCompare((PetscObject)X, MATSEQDENSE, &flg)); 516 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATSEQDENSE matrix"); 517 } 518 519 PetscCall(MatGetSize(B, NULL, (PetscInt *)&mat_mkl_pardiso->nrhs)); 520 521 if (mat_mkl_pardiso->nrhs > 0) { 522 PetscCall(MatDenseGetArrayRead(B, &barray)); 523 PetscCall(MatDenseGetArrayWrite(X, &xarray)); 524 525 PetscCheck(barray != xarray, PETSC_COMM_SELF, PETSC_ERR_SUP, "B and X cannot share the same memory location"); 526 if (!mat_mkl_pardiso->schur) mat_mkl_pardiso->phase = JOB_SOLVE_ITERATIVE_REFINEMENT; 527 else mat_mkl_pardiso->phase = JOB_SOLVE_FORWARD_SUBSTITUTION; 528 529 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, 530 mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)barray, (void *)xarray, &mat_mkl_pardiso->err)); 531 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 532 533 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 534 if (mat_mkl_pardiso->schur) { /* solve Schur complement and expand solution */ 535 PetscScalar *o_schur_work = NULL; 536 537 /* solve Schur complement */ 538 if (!mat_mkl_pardiso->solve_interior) { 539 PetscInt shift = mat_mkl_pardiso->schur_size * mat_mkl_pardiso->nrhs, scale; 540 PetscInt mem = mat_mkl_pardiso->n * mat_mkl_pardiso->nrhs; 541 542 PetscCall(MatFactorFactorizeSchurComplement(A)); 543 /* allocate extra memory if it is needed */ 544 scale = 1; 545 if (A->schur_status == MAT_FACTOR_SCHUR_INVERTED) scale = 2; 546 mem *= scale; 547 if (mem > mat_mkl_pardiso->schur_work_size) { 548 o_schur_work = mat_mkl_pardiso->schur_work; 549 PetscCall(PetscMalloc1(mem, &mat_mkl_pardiso->schur_work)); 550 } 551 /* if inverted, uses BLAS *MM subroutines, otherwise LAPACK *TRS */ 552 if (A->schur_status != MAT_FACTOR_SCHUR_INVERTED) shift = 0; 553 PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work, PETSC_TRUE)); 554 PetscCall(MatMKLPardisoSolveSchur_Private(A, mat_mkl_pardiso->schur_work, mat_mkl_pardiso->schur_work + shift)); 555 PetscCall(MatMKLPardisoScatterSchur_Private(mat_mkl_pardiso, xarray, mat_mkl_pardiso->schur_work + shift, PETSC_FALSE)); 556 } else { /* if we are solving for the interior problem, any value in barray[schur,n] forward-substituted to xarray[schur,n] will be neglected */ 557 PetscInt i, n, m = 0; 558 for (n = 0; n < mat_mkl_pardiso->nrhs; n++) { 559 for (i = 0; i < mat_mkl_pardiso->schur_size; i++) xarray[mat_mkl_pardiso->schur_idxs[i] + m] = 0.; 560 m += mat_mkl_pardiso->n; 561 } 562 } 563 564 /* expansion phase */ 565 mat_mkl_pardiso->iparm[6 - 1] = 1; 566 mat_mkl_pardiso->phase = JOB_SOLVE_BACKWARD_SUBSTITUTION; 567 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, 568 mat_mkl_pardiso->perm, &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, (void *)xarray, (void *)mat_mkl_pardiso->schur_work, /* according to the specs, the solution vector is always used */ 569 &mat_mkl_pardiso->err)); 570 if (o_schur_work) { /* restore original Schur_work (minimal size) */ 571 PetscCall(PetscFree(mat_mkl_pardiso->schur_work)); 572 mat_mkl_pardiso->schur_work = o_schur_work; 573 } 574 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 575 mat_mkl_pardiso->iparm[6 - 1] = 0; 576 } 577 PetscCall(MatDenseRestoreArrayWrite(X, &xarray)); 578 } 579 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatFactorNumeric_MKL_PARDISO(Mat F, Mat A, const MatFactorInfo *info) 584 { 585 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data; 586 587 PetscFunctionBegin; 588 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 589 PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_REUSE_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a)); 590 591 mat_mkl_pardiso->phase = JOB_NUMERICAL_FACTORIZATION; 592 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm, 593 &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, (void *)mat_mkl_pardiso->schur, &mat_mkl_pardiso->err)); 594 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 595 596 /* report flops */ 597 if (mat_mkl_pardiso->iparm[18] > 0) PetscCall(PetscLogFlops(PetscPowRealInt(10., 6) * mat_mkl_pardiso->iparm[18])); 598 599 if (F->schur) { /* schur output from pardiso is in row major format */ 600 #if defined(PETSC_HAVE_CUDA) 601 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 602 #endif 603 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 604 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 605 } 606 mat_mkl_pardiso->matstruc = SAME_NONZERO_PATTERN; 607 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 static PetscErrorCode MatSetFromOptions_MKL_PARDISO(Mat F, Mat A) 612 { 613 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data; 614 PetscInt icntl, bs, threads = 1; 615 PetscBool flg; 616 617 PetscFunctionBegin; 618 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MKL_PARDISO Options", "Mat"); 619 620 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_65", "Suggested number of threads to use within MKL PARDISO", "None", threads, &threads, &flg)); 621 if (flg) PetscSetMKL_PARDISOThreads((int)threads); 622 623 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_66", "Maximum number of factors with identical sparsity structure that must be kept in memory at the same time", "None", mat_mkl_pardiso->maxfct, &icntl, &flg)); 624 if (flg) mat_mkl_pardiso->maxfct = icntl; 625 626 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_67", "Indicates the actual matrix for the solution phase", "None", mat_mkl_pardiso->mnum, &icntl, &flg)); 627 if (flg) mat_mkl_pardiso->mnum = icntl; 628 629 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_68", "Message level information", "None", mat_mkl_pardiso->msglvl, &icntl, &flg)); 630 if (flg) mat_mkl_pardiso->msglvl = icntl; 631 632 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_69", "Defines the matrix type", "None", mat_mkl_pardiso->mtype, &icntl, &flg)); 633 if (flg) { 634 void *pt[IPARM_SIZE]; 635 mat_mkl_pardiso->mtype = icntl; 636 icntl = mat_mkl_pardiso->iparm[34]; 637 bs = mat_mkl_pardiso->iparm[36]; 638 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 639 #if defined(PETSC_USE_REAL_SINGLE) 640 mat_mkl_pardiso->iparm[27] = 1; 641 #else 642 mat_mkl_pardiso->iparm[27] = 0; 643 #endif 644 mat_mkl_pardiso->iparm[34] = icntl; 645 mat_mkl_pardiso->iparm[36] = bs; 646 } 647 648 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_1", "Use default values (if 0)", "None", mat_mkl_pardiso->iparm[0], &icntl, &flg)); 649 if (flg) mat_mkl_pardiso->iparm[0] = icntl; 650 651 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_2", "Fill-in reducing ordering for the input matrix", "None", mat_mkl_pardiso->iparm[1], &icntl, &flg)); 652 if (flg) mat_mkl_pardiso->iparm[1] = icntl; 653 654 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_4", "Preconditioned CGS/CG", "None", mat_mkl_pardiso->iparm[3], &icntl, &flg)); 655 if (flg) mat_mkl_pardiso->iparm[3] = icntl; 656 657 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_5", "User permutation", "None", mat_mkl_pardiso->iparm[4], &icntl, &flg)); 658 if (flg) mat_mkl_pardiso->iparm[4] = icntl; 659 660 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_6", "Write solution on x", "None", mat_mkl_pardiso->iparm[5], &icntl, &flg)); 661 if (flg) mat_mkl_pardiso->iparm[5] = icntl; 662 663 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_8", "Iterative refinement step", "None", mat_mkl_pardiso->iparm[7], &icntl, &flg)); 664 if (flg) mat_mkl_pardiso->iparm[7] = icntl; 665 666 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_10", "Pivoting perturbation", "None", mat_mkl_pardiso->iparm[9], &icntl, &flg)); 667 if (flg) mat_mkl_pardiso->iparm[9] = icntl; 668 669 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_11", "Scaling vectors", "None", mat_mkl_pardiso->iparm[10], &icntl, &flg)); 670 if (flg) mat_mkl_pardiso->iparm[10] = icntl; 671 672 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_12", "Solve with transposed or conjugate transposed matrix A", "None", mat_mkl_pardiso->iparm[11], &icntl, &flg)); 673 if (flg) mat_mkl_pardiso->iparm[11] = icntl; 674 675 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_13", "Improved accuracy using (non-) symmetric weighted matching", "None", mat_mkl_pardiso->iparm[12], &icntl, &flg)); 676 if (flg) mat_mkl_pardiso->iparm[12] = icntl; 677 678 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_18", "Numbers of non-zero elements", "None", mat_mkl_pardiso->iparm[17], &icntl, &flg)); 679 if (flg) mat_mkl_pardiso->iparm[17] = icntl; 680 681 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_19", "Report number of floating point operations (0 to disable)", "None", mat_mkl_pardiso->iparm[18], &icntl, &flg)); 682 if (flg) mat_mkl_pardiso->iparm[18] = icntl; 683 684 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_21", "Pivoting for symmetric indefinite matrices", "None", mat_mkl_pardiso->iparm[20], &icntl, &flg)); 685 if (flg) mat_mkl_pardiso->iparm[20] = icntl; 686 687 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_24", "Parallel factorization control", "None", mat_mkl_pardiso->iparm[23], &icntl, &flg)); 688 if (flg) mat_mkl_pardiso->iparm[23] = icntl; 689 690 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_25", "Parallel forward/backward solve control", "None", mat_mkl_pardiso->iparm[24], &icntl, &flg)); 691 if (flg) mat_mkl_pardiso->iparm[24] = icntl; 692 693 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_27", "Matrix checker", "None", mat_mkl_pardiso->iparm[26], &icntl, &flg)); 694 if (flg) mat_mkl_pardiso->iparm[26] = icntl; 695 696 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_31", "Partial solve and computing selected components of the solution vectors", "None", mat_mkl_pardiso->iparm[30], &icntl, &flg)); 697 if (flg) mat_mkl_pardiso->iparm[30] = icntl; 698 699 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_34", "Optimal number of threads for conditional numerical reproducibility (CNR) mode", "None", mat_mkl_pardiso->iparm[33], &icntl, &flg)); 700 if (flg) mat_mkl_pardiso->iparm[33] = icntl; 701 702 PetscCall(PetscOptionsInt("-mat_mkl_pardiso_60", "Intel MKL PARDISO mode", "None", mat_mkl_pardiso->iparm[59], &icntl, &flg)); 703 if (flg) mat_mkl_pardiso->iparm[59] = icntl; 704 PetscOptionsEnd(); 705 PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 static PetscErrorCode MatFactorMKL_PARDISOInitialize_Private(Mat A, MatFactorType ftype, Mat_MKL_PARDISO *mat_mkl_pardiso) 709 { 710 PetscInt i, bs; 711 PetscBool match; 712 713 PetscFunctionBegin; 714 for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->iparm[i] = 0; 715 for (i = 0; i < IPARM_SIZE; i++) mat_mkl_pardiso->pt[i] = 0; 716 #if defined(PETSC_USE_REAL_SINGLE) 717 mat_mkl_pardiso->iparm[27] = 1; 718 #else 719 mat_mkl_pardiso->iparm[27] = 0; 720 #endif 721 /* Default options for both sym and unsym */ 722 mat_mkl_pardiso->iparm[0] = 1; /* Solver default parameters overridden with provided by iparm */ 723 mat_mkl_pardiso->iparm[1] = 2; /* Metis reordering */ 724 mat_mkl_pardiso->iparm[5] = 0; /* Write solution into x */ 725 mat_mkl_pardiso->iparm[7] = 0; /* Max number of iterative refinement steps */ 726 mat_mkl_pardiso->iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ 727 mat_mkl_pardiso->iparm[18] = -1; /* Output: Mflops for LU factorization */ 728 #if 0 729 mat_mkl_pardiso->iparm[23] = 1; /* Parallel factorization control*/ 730 #endif 731 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &match, MATSEQBAIJ, MATSEQSBAIJ, "")); 732 PetscCall(MatGetBlockSize(A, &bs)); 733 if (!match || bs == 1) { 734 mat_mkl_pardiso->iparm[34] = 1; /* Cluster Sparse Solver use C-style indexing for ia and ja arrays */ 735 mat_mkl_pardiso->n = A->rmap->N; 736 } else { 737 mat_mkl_pardiso->iparm[34] = 0; /* Cluster Sparse Solver use Fortran-style indexing for ia and ja arrays */ 738 mat_mkl_pardiso->iparm[36] = bs; 739 mat_mkl_pardiso->n = A->rmap->N / bs; 740 } 741 mat_mkl_pardiso->iparm[39] = 0; /* Input: matrix/rhs/solution stored on rank-0 */ 742 743 mat_mkl_pardiso->CleanUp = PETSC_FALSE; 744 mat_mkl_pardiso->maxfct = 1; /* Maximum number of numerical factorizations. */ 745 mat_mkl_pardiso->mnum = 1; /* Which factorization to use. */ 746 mat_mkl_pardiso->msglvl = 0; /* 0: do not print 1: Print statistical information in file */ 747 mat_mkl_pardiso->phase = -1; 748 mat_mkl_pardiso->err = 0; 749 750 mat_mkl_pardiso->nrhs = 1; 751 mat_mkl_pardiso->err = 0; 752 mat_mkl_pardiso->phase = -1; 753 754 if (ftype == MAT_FACTOR_LU) { 755 mat_mkl_pardiso->iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ 756 mat_mkl_pardiso->iparm[10] = 1; /* Use nonsymmetric permutation and scaling MPS */ 757 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 758 } else { 759 mat_mkl_pardiso->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */ 760 mat_mkl_pardiso->iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */ 761 mat_mkl_pardiso->iparm[12] = 1; /* Switch on Maximum Weighted Matching algorithm (default for non-symmetric) */ 762 #if defined(PETSC_USE_DEBUG) 763 mat_mkl_pardiso->iparm[26] = 1; /* Matrix checker */ 764 #endif 765 } 766 PetscCall(PetscCalloc1(A->rmap->N * sizeof(INT_TYPE), &mat_mkl_pardiso->perm)); 767 mat_mkl_pardiso->schur_size = 0; 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 static PetscErrorCode MatFactorSymbolic_AIJMKL_PARDISO_Private(Mat F, Mat A, const MatFactorInfo *info) 772 { 773 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data; 774 775 PetscFunctionBegin; 776 mat_mkl_pardiso->matstruc = DIFFERENT_NONZERO_PATTERN; 777 PetscCall(MatSetFromOptions_MKL_PARDISO(F, A)); 778 /* throw away any previously computed structure */ 779 if (mat_mkl_pardiso->freeaij) { 780 PetscCall(PetscFree2(mat_mkl_pardiso->ia, mat_mkl_pardiso->ja)); 781 if (mat_mkl_pardiso->iparm[34] == 1) PetscCall(PetscFree(mat_mkl_pardiso->a)); 782 } 783 PetscCall((*mat_mkl_pardiso->Convert)(A, mat_mkl_pardiso->needsym, MAT_INITIAL_MATRIX, &mat_mkl_pardiso->freeaij, &mat_mkl_pardiso->nz, &mat_mkl_pardiso->ia, &mat_mkl_pardiso->ja, (PetscScalar **)&mat_mkl_pardiso->a)); 784 if (mat_mkl_pardiso->iparm[34] == 1) mat_mkl_pardiso->n = A->rmap->N; 785 else mat_mkl_pardiso->n = A->rmap->N / A->rmap->bs; 786 787 mat_mkl_pardiso->phase = JOB_ANALYSIS; 788 789 /* reset flops counting if requested */ 790 if (mat_mkl_pardiso->iparm[18]) mat_mkl_pardiso->iparm[18] = -1; 791 792 PetscCallPardiso(MKL_PARDISO(mat_mkl_pardiso->pt, &mat_mkl_pardiso->maxfct, &mat_mkl_pardiso->mnum, &mat_mkl_pardiso->mtype, &mat_mkl_pardiso->phase, &mat_mkl_pardiso->n, mat_mkl_pardiso->a, mat_mkl_pardiso->ia, mat_mkl_pardiso->ja, mat_mkl_pardiso->perm, 793 &mat_mkl_pardiso->nrhs, mat_mkl_pardiso->iparm, &mat_mkl_pardiso->msglvl, NULL, NULL, &mat_mkl_pardiso->err)); 794 PetscCheck(mat_mkl_pardiso->err >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error reported by MKL PARDISO: err=%" PetscInt_FMT ". Please check manual", (PetscInt)mat_mkl_pardiso->err); 795 796 mat_mkl_pardiso->CleanUp = PETSC_TRUE; 797 798 if (F->factortype == MAT_FACTOR_LU) F->ops->lufactornumeric = MatFactorNumeric_MKL_PARDISO; 799 else F->ops->choleskyfactornumeric = MatFactorNumeric_MKL_PARDISO; 800 801 F->ops->solve = MatSolve_MKL_PARDISO; 802 F->ops->solvetranspose = MatSolveTranspose_MKL_PARDISO; 803 F->ops->matsolve = MatMatSolve_MKL_PARDISO; 804 if (F->factortype == MAT_FACTOR_LU || (!PetscDefined(USE_COMPLEX) && F->factortype == MAT_FACTOR_CHOLESKY && A->spd == PETSC_BOOL3_TRUE)) { 805 F->ops->backwardsolve = MatBackwardSolve_MKL_PARDISO; 806 F->ops->forwardsolve = MatForwardSolve_MKL_PARDISO; 807 } 808 PetscFunctionReturn(PETSC_SUCCESS); 809 } 810 811 static PetscErrorCode MatLUFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 812 { 813 PetscFunctionBegin; 814 PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info)); 815 PetscFunctionReturn(PETSC_SUCCESS); 816 } 817 818 #if !defined(PETSC_USE_COMPLEX) 819 static PetscErrorCode MatGetInertia_MKL_PARDISO(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 820 { 821 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data; 822 823 PetscFunctionBegin; 824 if (nneg) *nneg = mat_mkl_pardiso->iparm[22]; 825 if (npos) *npos = mat_mkl_pardiso->iparm[21]; 826 if (nzero) *nzero = F->rmap->N - (mat_mkl_pardiso->iparm[22] + mat_mkl_pardiso->iparm[21]); 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 #endif 830 831 static PetscErrorCode MatCholeskyFactorSymbolic_AIJMKL_PARDISO(Mat F, Mat A, IS r, const MatFactorInfo *info) 832 { 833 PetscFunctionBegin; 834 PetscCall(MatFactorSymbolic_AIJMKL_PARDISO_Private(F, A, info)); 835 F->ops->getinertia = NULL; 836 #if !defined(PETSC_USE_COMPLEX) 837 F->ops->getinertia = MatGetInertia_MKL_PARDISO; 838 #endif 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 842 static PetscErrorCode MatView_MKL_PARDISO(Mat A, PetscViewer viewer) 843 { 844 PetscBool isascii; 845 PetscViewerFormat format; 846 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 847 PetscInt i; 848 849 PetscFunctionBegin; 850 if (A->ops->solve != MatSolve_MKL_PARDISO) PetscFunctionReturn(PETSC_SUCCESS); 851 852 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 853 if (isascii) { 854 PetscCall(PetscViewerGetFormat(viewer, &format)); 855 if (format == PETSC_VIEWER_ASCII_INFO) { 856 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO run parameters:\n")); 857 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO phase: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->phase)); 858 for (i = 1; i <= 64; i++) PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO iparm[%" PetscInt_FMT "]: %" PetscInt_FMT "\n", i, (PetscInt)mat_mkl_pardiso->iparm[i - 1])); 859 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO maxfct: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->maxfct)); 860 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO mnum: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->mnum)); 861 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO mtype: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->mtype)); 862 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO n: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->n)); 863 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO nrhs: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->nrhs)); 864 PetscCall(PetscViewerASCIIPrintf(viewer, "MKL PARDISO msglvl: %" PetscInt_FMT "\n", (PetscInt)mat_mkl_pardiso->msglvl)); 865 } 866 } 867 PetscFunctionReturn(PETSC_SUCCESS); 868 } 869 870 static PetscErrorCode MatGetInfo_MKL_PARDISO(Mat A, MatInfoType flag, MatInfo *info) 871 { 872 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)A->data; 873 874 PetscFunctionBegin; 875 info->block_size = 1.0; 876 info->nz_used = mat_mkl_pardiso->iparm[17]; 877 info->nz_allocated = mat_mkl_pardiso->iparm[17]; 878 info->nz_unneeded = 0.0; 879 info->assemblies = 0.0; 880 info->mallocs = 0.0; 881 info->memory = 0.0; 882 info->fill_ratio_given = 0; 883 info->fill_ratio_needed = 0; 884 info->factor_mallocs = 0; 885 PetscFunctionReturn(PETSC_SUCCESS); 886 } 887 888 static PetscErrorCode MatMkl_PardisoSetCntl_MKL_PARDISO(Mat F, PetscInt icntl, PetscInt ival) 889 { 890 PetscInt backup, bs; 891 Mat_MKL_PARDISO *mat_mkl_pardiso = (Mat_MKL_PARDISO *)F->data; 892 893 PetscFunctionBegin; 894 if (icntl <= 64) { 895 mat_mkl_pardiso->iparm[icntl - 1] = ival; 896 } else { 897 if (icntl == 65) PetscSetMKL_PARDISOThreads((int)ival); 898 else if (icntl == 66) mat_mkl_pardiso->maxfct = ival; 899 else if (icntl == 67) mat_mkl_pardiso->mnum = ival; 900 else if (icntl == 68) mat_mkl_pardiso->msglvl = ival; 901 else if (icntl == 69) { 902 void *pt[IPARM_SIZE]; 903 backup = mat_mkl_pardiso->iparm[34]; 904 bs = mat_mkl_pardiso->iparm[36]; 905 mat_mkl_pardiso->mtype = ival; 906 MKL_PARDISO_INIT(pt, &mat_mkl_pardiso->mtype, mat_mkl_pardiso->iparm); 907 #if defined(PETSC_USE_REAL_SINGLE) 908 mat_mkl_pardiso->iparm[27] = 1; 909 #else 910 mat_mkl_pardiso->iparm[27] = 0; 911 #endif 912 mat_mkl_pardiso->iparm[34] = backup; 913 mat_mkl_pardiso->iparm[36] = bs; 914 } else if (icntl == 70) mat_mkl_pardiso->solve_interior = (PetscBool)!!ival; 915 } 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 /*@ 920 MatMkl_PardisoSetCntl - Set MKL PARDISO <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2023-2/onemkl-pardiso-parallel-direct-sparse-solver-iface.html> parameters 921 922 Logically Collective 923 924 Input Parameters: 925 + F - the factored matrix obtained by calling `MatGetFactor()` 926 . icntl - index of MKL PARDISO parameter 927 - ival - value of MKL PARDISO parameter 928 929 Options Database Key: 930 . -mat_mkl_pardiso_<icntl> <ival> - change the option numbered icntl to the value ival 931 932 Level: beginner 933 934 .seealso: [](ch_matrices), `Mat`, `MATSOLVERMKL_PARDISO`, `MatGetFactor()` 935 @*/ 936 PetscErrorCode MatMkl_PardisoSetCntl(Mat F, PetscInt icntl, PetscInt ival) 937 { 938 PetscFunctionBegin; 939 PetscTryMethod(F, "MatMkl_PardisoSetCntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 940 PetscFunctionReturn(PETSC_SUCCESS); 941 } 942 943 /*MC 944 MATSOLVERMKL_PARDISO - A matrix type providing direct solvers, LU, for 945 `MATSEQAIJ` matrices via the external package MKL PARDISO 946 <https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-0/onemkl-pardiso-parallel-direct-sparse-solver-iface.html>. 947 948 Use `-pc_type lu` `-pc_factor_mat_solver_type mkl_pardiso` to use this direct solver 949 950 Options Database Keys: 951 + -mat_mkl_pardiso_65 - Suggested number of threads to use within MKL PARDISO 952 . -mat_mkl_pardiso_66 - Maximum number of factors with identical sparsity structure that must be kept in memory at the same time 953 . -mat_mkl_pardiso_67 - Indicates the actual matrix for the solution phase 954 . -mat_mkl_pardiso_68 - Message level information, use 1 to get detailed information on the solver options 955 . -mat_mkl_pardiso_69 - Defines the matrix type. IMPORTANT: When you set this flag, iparm parameters are going to be set to the default ones for the matrix type 956 . -mat_mkl_pardiso_1 - Use default values 957 . -mat_mkl_pardiso_2 - Fill-in reducing ordering for the input matrix 958 . -mat_mkl_pardiso_4 - Preconditioned CGS/CG 959 . -mat_mkl_pardiso_5 - User permutation 960 . -mat_mkl_pardiso_6 - Write solution on x 961 . -mat_mkl_pardiso_8 - Iterative refinement step 962 . -mat_mkl_pardiso_10 - Pivoting perturbation 963 . -mat_mkl_pardiso_11 - Scaling vectors 964 . -mat_mkl_pardiso_12 - Solve with transposed or conjugate transposed matrix A 965 . -mat_mkl_pardiso_13 - Improved accuracy using (non-) symmetric weighted matching 966 . -mat_mkl_pardiso_18 - Numbers of non-zero elements 967 . -mat_mkl_pardiso_19 - Report number of floating point operations 968 . -mat_mkl_pardiso_21 - Pivoting for symmetric indefinite matrices 969 . -mat_mkl_pardiso_24 - Parallel factorization control 970 . -mat_mkl_pardiso_25 - Parallel forward/backward solve control 971 . -mat_mkl_pardiso_27 - Matrix checker 972 . -mat_mkl_pardiso_31 - Partial solve and computing selected components of the solution vectors 973 . -mat_mkl_pardiso_34 - Optimal number of threads for conditional numerical reproducibility (CNR) mode 974 - -mat_mkl_pardiso_60 - Intel MKL PARDISO mode 975 976 Level: beginner 977 978 Notes: 979 Use `-mat_mkl_pardiso_68 1` to display the number of threads the solver is using. MKL does not provide a way to directly access this 980 information. 981 982 For more information on the options check the MKL PARDISO manual 983 984 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMkl_PardisoSetCntl()`, `MATSOLVERMKL_CPARDISO` 985 M*/ 986 static PetscErrorCode MatFactorGetSolverType_mkl_pardiso(Mat A, MatSolverType *type) 987 { 988 PetscFunctionBegin; 989 *type = MATSOLVERMKL_PARDISO; 990 PetscFunctionReturn(PETSC_SUCCESS); 991 } 992 993 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mkl_pardiso(Mat A, MatFactorType ftype, Mat *F) 994 { 995 Mat B; 996 Mat_MKL_PARDISO *mat_mkl_pardiso; 997 PetscBool isSeqAIJ, isSeqBAIJ, isSeqSBAIJ; 998 999 PetscFunctionBegin; 1000 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 1001 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 1002 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 1003 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1004 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 1005 PetscCall(PetscStrallocpy("mkl_pardiso", &((PetscObject)B)->type_name)); 1006 PetscCall(MatSetUp(B)); 1007 1008 PetscCall(PetscNew(&mat_mkl_pardiso)); 1009 B->data = mat_mkl_pardiso; 1010 1011 PetscCall(MatFactorMKL_PARDISOInitialize_Private(A, ftype, mat_mkl_pardiso)); 1012 if (ftype == MAT_FACTOR_LU) { 1013 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMKL_PARDISO; 1014 B->factortype = MAT_FACTOR_LU; 1015 mat_mkl_pardiso->needsym = PETSC_FALSE; 1016 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1017 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1018 else { 1019 PetscCheck(!isSeqSBAIJ, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO LU factor with SEQSBAIJ format! Use MAT_FACTOR_CHOLESKY instead"); 1020 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO LU with %s format", ((PetscObject)A)->type_name); 1021 } 1022 #if defined(PETSC_USE_COMPLEX) 1023 mat_mkl_pardiso->mtype = 13; 1024 #else 1025 mat_mkl_pardiso->mtype = 11; 1026 #endif 1027 } else { 1028 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_AIJMKL_PARDISO; 1029 B->factortype = MAT_FACTOR_CHOLESKY; 1030 if (isSeqAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqaij; 1031 else if (isSeqBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqbaij; 1032 else if (isSeqSBAIJ) mat_mkl_pardiso->Convert = MatMKLPardiso_Convert_seqsbaij; 1033 else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for PARDISO CHOLESKY with %s format", ((PetscObject)A)->type_name); 1034 1035 mat_mkl_pardiso->needsym = PETSC_TRUE; 1036 #if !defined(PETSC_USE_COMPLEX) 1037 if (A->spd == PETSC_BOOL3_TRUE) mat_mkl_pardiso->mtype = 2; 1038 else mat_mkl_pardiso->mtype = -2; 1039 #else 1040 mat_mkl_pardiso->mtype = 6; 1041 PetscCheck(A->hermitian != PETSC_BOOL3_TRUE, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for MKL PARDISO CHOLESKY with Hermitian matrices! Use MAT_FACTOR_LU instead"); 1042 #endif 1043 } 1044 B->ops->destroy = MatDestroy_MKL_PARDISO; 1045 B->ops->view = MatView_MKL_PARDISO; 1046 B->ops->getinfo = MatGetInfo_MKL_PARDISO; 1047 B->factortype = ftype; 1048 B->assembled = PETSC_TRUE; 1049 1050 PetscCall(PetscFree(B->solvertype)); 1051 PetscCall(PetscStrallocpy(MATSOLVERMKL_PARDISO, &B->solvertype)); 1052 1053 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mkl_pardiso)); 1054 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MKL_PARDISO)); 1055 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMkl_PardisoSetCntl_C", MatMkl_PardisoSetCntl_MKL_PARDISO)); 1056 1057 *F = B; 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MKL_Pardiso(void) 1062 { 1063 PetscFunctionBegin; 1064 PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso)); 1065 PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso)); 1066 PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mkl_pardiso)); 1067 PetscCall(MatSolverTypeRegister(MATSOLVERMKL_PARDISO, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mkl_pardiso)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070