1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include <../src/mat/impls/baij/seq/baij.h> 5 #include <petsc/private/kernels/blockinvert.h> 6 7 /* 8 This is used to set the numeric factorization for both LU and ILU symbolic factorization 9 */ 10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 11 { 12 PetscFunctionBegin; 13 if (natural) { 14 switch (fact->rmap->bs) { 15 case 1: 16 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17 break; 18 case 2: 19 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 20 break; 21 case 3: 22 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 23 break; 24 case 4: 25 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 26 break; 27 case 5: 28 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 29 break; 30 case 6: 31 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 32 break; 33 case 7: 34 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 35 break; 36 case 9: 37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 38 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 39 #else 40 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 41 #endif 42 break; 43 case 15: 44 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 45 break; 46 default: 47 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 48 break; 49 } 50 } else { 51 switch (fact->rmap->bs) { 52 case 1: 53 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 54 break; 55 case 2: 56 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 57 break; 58 case 3: 59 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 60 break; 61 case 4: 62 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 63 break; 64 case 5: 65 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 66 break; 67 case 6: 68 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 69 break; 70 case 7: 71 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 72 break; 73 default: 74 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 75 break; 76 } 77 } 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 82 { 83 PetscFunctionBegin; 84 if (natural) { 85 switch (inA->rmap->bs) { 86 case 1: 87 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 88 break; 89 case 2: 90 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 91 break; 92 case 3: 93 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 94 break; 95 case 4: 96 #if defined(PETSC_USE_REAL_MAT_SINGLE) 97 { 98 PetscBool sse_enabled_local; 99 PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL)); 100 if (sse_enabled_local) { 101 #if defined(PETSC_HAVE_SSE) 102 int i, *AJ = a->j, nz = a->nz, n = a->mbs; 103 if (n == (unsigned short)n) { 104 unsigned short *aj = (unsigned short *)AJ; 105 for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i]; 106 107 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 108 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109 110 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 111 } else { 112 /* Scale the column indices for easier indexing in MatSolve. */ 113 /* for (i=0;i<nz;i++) { */ 114 /* AJ[i] = AJ[i]*4; */ 115 /* } */ 116 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 117 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 118 119 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 120 } 121 #else 122 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 123 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable"); 124 #endif 125 } else { 126 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 127 } 128 } 129 #else 130 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 131 #endif 132 break; 133 case 5: 134 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 135 break; 136 case 6: 137 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 138 break; 139 case 7: 140 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 141 break; 142 default: 143 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 144 break; 145 } 146 } else { 147 switch (inA->rmap->bs) { 148 case 1: 149 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 150 break; 151 case 2: 152 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 153 break; 154 case 3: 155 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 156 break; 157 case 4: 158 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 159 break; 160 case 5: 161 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 162 break; 163 case 6: 164 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 165 break; 166 case 7: 167 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 168 break; 169 default: 170 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 171 break; 172 } 173 } 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /* 178 The symbolic factorization code is identical to that for AIJ format, 179 except for very small changes since this is now a SeqBAIJ datastructure. 180 NOT good code reuse. 181 */ 182 #include <petscbt.h> 183 #include <../src/mat/utils/freespace.h> 184 185 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 186 { 187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 188 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 189 PetscBool row_identity, col_identity, both_identity; 190 IS isicol; 191 const PetscInt *r, *ic; 192 PetscInt i, *ai = a->i, *aj = a->j; 193 PetscInt *bi, *bj, *ajtmp; 194 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 195 PetscReal f; 196 PetscInt nlnk, *lnk, k, **bi_ptr; 197 PetscFreeSpaceList free_space = NULL, current_space = NULL; 198 PetscBT lnkbt; 199 PetscBool missing; 200 201 PetscFunctionBegin; 202 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 203 PetscCall(MatMissingDiagonal(A, &missing, &i)); 204 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 205 206 if (bs > 1) { /* check shifttype */ 207 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 208 } 209 210 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 211 PetscCall(ISGetIndices(isrow, &r)); 212 PetscCall(ISGetIndices(isicol, &ic)); 213 214 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 215 PetscCall(PetscMalloc1(n + 1, &bi)); 216 PetscCall(PetscMalloc1(n + 1, &bdiag)); 217 bi[0] = bdiag[0] = 0; 218 219 /* linked list for storing column indices of the active row */ 220 nlnk = n + 1; 221 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 222 223 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 224 225 /* initial FreeSpace size is f*(ai[n]+1) */ 226 f = info->fill; 227 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 228 229 current_space = free_space; 230 231 for (i = 0; i < n; i++) { 232 /* copy previous fill into linked list */ 233 nzi = 0; 234 nnz = ai[r[i] + 1] - ai[r[i]]; 235 ajtmp = aj + ai[r[i]]; 236 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 237 nzi += nlnk; 238 239 /* add pivot rows into linked list */ 240 row = lnk[n]; 241 while (row < i) { 242 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 243 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 244 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 245 nzi += nlnk; 246 row = lnk[row]; 247 } 248 bi[i + 1] = bi[i] + nzi; 249 im[i] = nzi; 250 251 /* mark bdiag */ 252 nzbd = 0; 253 nnz = nzi; 254 k = lnk[n]; 255 while (nnz-- && k < i) { 256 nzbd++; 257 k = lnk[k]; 258 } 259 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 260 261 /* if free space is not available, make more free space */ 262 if (current_space->local_remaining < nzi) { 263 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 264 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 265 reallocs++; 266 } 267 268 /* copy data into free space, then initialize lnk */ 269 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 270 271 bi_ptr[i] = current_space->array; 272 current_space->array += nzi; 273 current_space->local_used += nzi; 274 current_space->local_remaining -= nzi; 275 } 276 277 PetscCall(ISRestoreIndices(isrow, &r)); 278 PetscCall(ISRestoreIndices(isicol, &ic)); 279 280 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 281 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 282 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 283 PetscCall(PetscLLDestroy(lnk, lnkbt)); 284 PetscCall(PetscFree2(bi_ptr, im)); 285 286 /* put together the new matrix */ 287 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 288 b = (Mat_SeqBAIJ *)(B)->data; 289 290 b->free_a = PETSC_TRUE; 291 b->free_ij = PETSC_TRUE; 292 b->singlemalloc = PETSC_FALSE; 293 294 PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a)); 295 b->j = bj; 296 b->i = bi; 297 b->diag = bdiag; 298 b->free_diag = PETSC_TRUE; 299 b->ilen = NULL; 300 b->imax = NULL; 301 b->row = isrow; 302 b->col = iscol; 303 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 304 305 PetscCall(PetscObjectReference((PetscObject)isrow)); 306 PetscCall(PetscObjectReference((PetscObject)iscol)); 307 b->icol = isicol; 308 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 309 310 b->maxnz = b->nz = bdiag[0] + 1; 311 312 B->factortype = MAT_FACTOR_LU; 313 B->info.factor_mallocs = reallocs; 314 B->info.fill_ratio_given = f; 315 316 if (ai[n] != 0) { 317 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 318 } else { 319 B->info.fill_ratio_needed = 0.0; 320 } 321 #if defined(PETSC_USE_INFO) 322 if (ai[n] != 0) { 323 PetscReal af = B->info.fill_ratio_needed; 324 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 325 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 326 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 327 PetscCall(PetscInfo(A, "for best performance.\n")); 328 } else { 329 PetscCall(PetscInfo(A, "Empty matrix\n")); 330 } 331 #endif 332 333 PetscCall(ISIdentity(isrow, &row_identity)); 334 PetscCall(ISIdentity(iscol, &col_identity)); 335 336 both_identity = (PetscBool)(row_identity && col_identity); 337 338 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 #if 0 343 // unused 344 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 345 { 346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 347 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 348 PetscBool row_identity, col_identity, both_identity; 349 IS isicol; 350 const PetscInt *r, *ic; 351 PetscInt i, *ai = a->i, *aj = a->j; 352 PetscInt *bi, *bj, *ajtmp; 353 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 354 PetscReal f; 355 PetscInt nlnk, *lnk, k, **bi_ptr; 356 PetscFreeSpaceList free_space = NULL, current_space = NULL; 357 PetscBT lnkbt; 358 PetscBool missing; 359 360 PetscFunctionBegin; 361 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 362 PetscCall(MatMissingDiagonal(A, &missing, &i)); 363 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 364 365 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 366 PetscCall(ISGetIndices(isrow, &r)); 367 PetscCall(ISGetIndices(isicol, &ic)); 368 369 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 370 PetscCall(PetscMalloc1(n + 1, &bi)); 371 PetscCall(PetscMalloc1(n + 1, &bdiag)); 372 373 bi[0] = bdiag[0] = 0; 374 375 /* linked list for storing column indices of the active row */ 376 nlnk = n + 1; 377 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 378 379 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 380 381 /* initial FreeSpace size is f*(ai[n]+1) */ 382 f = info->fill; 383 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 384 current_space = free_space; 385 386 for (i = 0; i < n; i++) { 387 /* copy previous fill into linked list */ 388 nzi = 0; 389 nnz = ai[r[i] + 1] - ai[r[i]]; 390 ajtmp = aj + ai[r[i]]; 391 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 392 nzi += nlnk; 393 394 /* add pivot rows into linked list */ 395 row = lnk[n]; 396 while (row < i) { 397 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 398 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 399 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 400 nzi += nlnk; 401 row = lnk[row]; 402 } 403 bi[i + 1] = bi[i] + nzi; 404 im[i] = nzi; 405 406 /* mark bdiag */ 407 nzbd = 0; 408 nnz = nzi; 409 k = lnk[n]; 410 while (nnz-- && k < i) { 411 nzbd++; 412 k = lnk[k]; 413 } 414 bdiag[i] = bi[i] + nzbd; 415 416 /* if free space is not available, make more free space */ 417 if (current_space->local_remaining < nzi) { 418 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 419 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 420 reallocs++; 421 } 422 423 /* copy data into free space, then initialize lnk */ 424 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 425 426 bi_ptr[i] = current_space->array; 427 current_space->array += nzi; 428 current_space->local_used += nzi; 429 current_space->local_remaining -= nzi; 430 } 431 #if defined(PETSC_USE_INFO) 432 if (ai[n] != 0) { 433 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 434 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 435 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 436 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 437 PetscCall(PetscInfo(A, "for best performance.\n")); 438 } else { 439 PetscCall(PetscInfo(A, "Empty matrix\n")); 440 } 441 #endif 442 443 PetscCall(ISRestoreIndices(isrow, &r)); 444 PetscCall(ISRestoreIndices(isicol, &ic)); 445 446 /* destroy list of free space and other temporary array(s) */ 447 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 448 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 449 PetscCall(PetscLLDestroy(lnk, lnkbt)); 450 PetscCall(PetscFree2(bi_ptr, im)); 451 452 /* put together the new matrix */ 453 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 454 b = (Mat_SeqBAIJ *)(B)->data; 455 456 b->free_a = PETSC_TRUE; 457 b->free_ij = PETSC_TRUE; 458 b->singlemalloc = PETSC_FALSE; 459 460 PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a)); 461 b->j = bj; 462 b->i = bi; 463 b->diag = bdiag; 464 b->free_diag = PETSC_TRUE; 465 b->ilen = NULL; 466 b->imax = NULL; 467 b->row = isrow; 468 b->col = iscol; 469 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 470 471 PetscCall(PetscObjectReference((PetscObject)isrow)); 472 PetscCall(PetscObjectReference((PetscObject)iscol)); 473 b->icol = isicol; 474 475 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 476 477 b->maxnz = b->nz = bi[n]; 478 479 (B)->factortype = MAT_FACTOR_LU; 480 (B)->info.factor_mallocs = reallocs; 481 (B)->info.fill_ratio_given = f; 482 483 if (ai[n] != 0) { 484 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 485 } else { 486 (B)->info.fill_ratio_needed = 0.0; 487 } 488 489 PetscCall(ISIdentity(isrow, &row_identity)); 490 PetscCall(ISIdentity(iscol, &col_identity)); 491 492 both_identity = (PetscBool)(row_identity && col_identity); 493 494 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 #endif 498