1 2 /* 3 Factorization code for BAIJ format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> 6 #include <petsc/private/kernels/blockinvert.h> 7 8 /* 9 This is used to set the numeric factorization for both LU and ILU symbolic factorization 10 */ 11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 12 { 13 PetscFunctionBegin; 14 if (natural) { 15 switch (fact->rmap->bs) { 16 case 1: 17 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18 break; 19 case 2: 20 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 21 break; 22 case 3: 23 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 24 break; 25 case 4: 26 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 27 break; 28 case 5: 29 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 30 break; 31 case 6: 32 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 33 break; 34 case 7: 35 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 36 break; 37 case 9: 38 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 39 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 40 #else 41 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 42 #endif 43 break; 44 case 15: 45 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 46 break; 47 default: 48 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 49 break; 50 } 51 } else { 52 switch (fact->rmap->bs) { 53 case 1: 54 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 55 break; 56 case 2: 57 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 58 break; 59 case 3: 60 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 61 break; 62 case 4: 63 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 64 break; 65 case 5: 66 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 67 break; 68 case 6: 69 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 70 break; 71 case 7: 72 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 73 break; 74 default: 75 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 76 break; 77 } 78 } 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 83 { 84 PetscFunctionBegin; 85 if (natural) { 86 switch (inA->rmap->bs) { 87 case 1: 88 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 89 break; 90 case 2: 91 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 92 break; 93 case 3: 94 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 95 break; 96 case 4: 97 #if defined(PETSC_USE_REAL_MAT_SINGLE) 98 { 99 PetscBool sse_enabled_local; 100 PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL)); 101 if (sse_enabled_local) { 102 #if defined(PETSC_HAVE_SSE) 103 int i, *AJ = a->j, nz = a->nz, n = a->mbs; 104 if (n == (unsigned short)n) { 105 unsigned short *aj = (unsigned short *)AJ; 106 for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i]; 107 108 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 110 111 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 112 } else { 113 /* Scale the column indices for easier indexing in MatSolve. */ 114 /* for (i=0;i<nz;i++) { */ 115 /* AJ[i] = AJ[i]*4; */ 116 /* } */ 117 inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 118 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 119 120 PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 121 } 122 #else 123 /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 124 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable"); 125 #endif 126 } else { 127 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 128 } 129 } 130 #else 131 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 132 #endif 133 break; 134 case 5: 135 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 136 break; 137 case 6: 138 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 139 break; 140 case 7: 141 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 142 break; 143 default: 144 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 145 break; 146 } 147 } else { 148 switch (inA->rmap->bs) { 149 case 1: 150 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 151 break; 152 case 2: 153 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 154 break; 155 case 3: 156 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 157 break; 158 case 4: 159 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 160 break; 161 case 5: 162 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 163 break; 164 case 6: 165 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 166 break; 167 case 7: 168 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 169 break; 170 default: 171 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 172 break; 173 } 174 } 175 PetscFunctionReturn(PETSC_SUCCESS); 176 } 177 178 /* 179 The symbolic factorization code is identical to that for AIJ format, 180 except for very small changes since this is now a SeqBAIJ datastructure. 181 NOT good code reuse. 182 */ 183 #include <petscbt.h> 184 #include <../src/mat/utils/freespace.h> 185 186 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 187 { 188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 189 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 190 PetscBool row_identity, col_identity, both_identity; 191 IS isicol; 192 const PetscInt *r, *ic; 193 PetscInt i, *ai = a->i, *aj = a->j; 194 PetscInt *bi, *bj, *ajtmp; 195 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 196 PetscReal f; 197 PetscInt nlnk, *lnk, k, **bi_ptr; 198 PetscFreeSpaceList free_space = NULL, current_space = NULL; 199 PetscBT lnkbt; 200 PetscBool missing; 201 202 PetscFunctionBegin; 203 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 204 PetscCall(MatMissingDiagonal(A, &missing, &i)); 205 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 206 207 if (bs > 1) { /* check shifttype */ 208 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 209 } 210 211 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 212 PetscCall(ISGetIndices(isrow, &r)); 213 PetscCall(ISGetIndices(isicol, &ic)); 214 215 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 216 PetscCall(PetscMalloc1(n + 1, &bi)); 217 PetscCall(PetscMalloc1(n + 1, &bdiag)); 218 bi[0] = bdiag[0] = 0; 219 220 /* linked list for storing column indices of the active row */ 221 nlnk = n + 1; 222 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 223 224 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 225 226 /* initial FreeSpace size is f*(ai[n]+1) */ 227 f = info->fill; 228 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 229 230 current_space = free_space; 231 232 for (i = 0; i < n; i++) { 233 /* copy previous fill into linked list */ 234 nzi = 0; 235 nnz = ai[r[i] + 1] - ai[r[i]]; 236 ajtmp = aj + ai[r[i]]; 237 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 238 nzi += nlnk; 239 240 /* add pivot rows into linked list */ 241 row = lnk[n]; 242 while (row < i) { 243 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 244 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 245 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 246 nzi += nlnk; 247 row = lnk[row]; 248 } 249 bi[i + 1] = bi[i] + nzi; 250 im[i] = nzi; 251 252 /* mark bdiag */ 253 nzbd = 0; 254 nnz = nzi; 255 k = lnk[n]; 256 while (nnz-- && k < i) { 257 nzbd++; 258 k = lnk[k]; 259 } 260 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 261 262 /* if free space is not available, make more free space */ 263 if (current_space->local_remaining < nzi) { 264 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 265 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 266 reallocs++; 267 } 268 269 /* copy data into free space, then initialize lnk */ 270 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 271 272 bi_ptr[i] = current_space->array; 273 current_space->array += nzi; 274 current_space->local_used += nzi; 275 current_space->local_remaining -= nzi; 276 } 277 278 PetscCall(ISRestoreIndices(isrow, &r)); 279 PetscCall(ISRestoreIndices(isicol, &ic)); 280 281 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 282 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 283 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 284 PetscCall(PetscLLDestroy(lnk, lnkbt)); 285 PetscCall(PetscFree2(bi_ptr, im)); 286 287 /* put together the new matrix */ 288 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 289 b = (Mat_SeqBAIJ *)(B)->data; 290 291 b->free_a = PETSC_TRUE; 292 b->free_ij = PETSC_TRUE; 293 b->singlemalloc = PETSC_FALSE; 294 295 PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a)); 296 b->j = bj; 297 b->i = bi; 298 b->diag = bdiag; 299 b->free_diag = PETSC_TRUE; 300 b->ilen = NULL; 301 b->imax = NULL; 302 b->row = isrow; 303 b->col = iscol; 304 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 305 306 PetscCall(PetscObjectReference((PetscObject)isrow)); 307 PetscCall(PetscObjectReference((PetscObject)iscol)); 308 b->icol = isicol; 309 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 310 311 b->maxnz = b->nz = bdiag[0] + 1; 312 313 B->factortype = MAT_FACTOR_LU; 314 B->info.factor_mallocs = reallocs; 315 B->info.fill_ratio_given = f; 316 317 if (ai[n] != 0) { 318 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 319 } else { 320 B->info.fill_ratio_needed = 0.0; 321 } 322 #if defined(PETSC_USE_INFO) 323 if (ai[n] != 0) { 324 PetscReal af = B->info.fill_ratio_needed; 325 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 326 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 327 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 328 PetscCall(PetscInfo(A, "for best performance.\n")); 329 } else { 330 PetscCall(PetscInfo(A, "Empty matrix\n")); 331 } 332 #endif 333 334 PetscCall(ISIdentity(isrow, &row_identity)); 335 PetscCall(ISIdentity(iscol, &col_identity)); 336 337 both_identity = (PetscBool)(row_identity && col_identity); 338 339 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 #if 0 344 // unused 345 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 346 { 347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 348 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 349 PetscBool row_identity, col_identity, both_identity; 350 IS isicol; 351 const PetscInt *r, *ic; 352 PetscInt i, *ai = a->i, *aj = a->j; 353 PetscInt *bi, *bj, *ajtmp; 354 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 355 PetscReal f; 356 PetscInt nlnk, *lnk, k, **bi_ptr; 357 PetscFreeSpaceList free_space = NULL, current_space = NULL; 358 PetscBT lnkbt; 359 PetscBool missing; 360 361 PetscFunctionBegin; 362 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 363 PetscCall(MatMissingDiagonal(A, &missing, &i)); 364 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 365 366 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 367 PetscCall(ISGetIndices(isrow, &r)); 368 PetscCall(ISGetIndices(isicol, &ic)); 369 370 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 371 PetscCall(PetscMalloc1(n + 1, &bi)); 372 PetscCall(PetscMalloc1(n + 1, &bdiag)); 373 374 bi[0] = bdiag[0] = 0; 375 376 /* linked list for storing column indices of the active row */ 377 nlnk = n + 1; 378 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 379 380 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 381 382 /* initial FreeSpace size is f*(ai[n]+1) */ 383 f = info->fill; 384 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 385 current_space = free_space; 386 387 for (i = 0; i < n; i++) { 388 /* copy previous fill into linked list */ 389 nzi = 0; 390 nnz = ai[r[i] + 1] - ai[r[i]]; 391 ajtmp = aj + ai[r[i]]; 392 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 393 nzi += nlnk; 394 395 /* add pivot rows into linked list */ 396 row = lnk[n]; 397 while (row < i) { 398 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 399 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 400 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 401 nzi += nlnk; 402 row = lnk[row]; 403 } 404 bi[i + 1] = bi[i] + nzi; 405 im[i] = nzi; 406 407 /* mark bdiag */ 408 nzbd = 0; 409 nnz = nzi; 410 k = lnk[n]; 411 while (nnz-- && k < i) { 412 nzbd++; 413 k = lnk[k]; 414 } 415 bdiag[i] = bi[i] + nzbd; 416 417 /* if free space is not available, make more free space */ 418 if (current_space->local_remaining < nzi) { 419 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 420 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 421 reallocs++; 422 } 423 424 /* copy data into free space, then initialize lnk */ 425 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 426 427 bi_ptr[i] = current_space->array; 428 current_space->array += nzi; 429 current_space->local_used += nzi; 430 current_space->local_remaining -= nzi; 431 } 432 #if defined(PETSC_USE_INFO) 433 if (ai[n] != 0) { 434 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 435 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 436 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 437 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 438 PetscCall(PetscInfo(A, "for best performance.\n")); 439 } else { 440 PetscCall(PetscInfo(A, "Empty matrix\n")); 441 } 442 #endif 443 444 PetscCall(ISRestoreIndices(isrow, &r)); 445 PetscCall(ISRestoreIndices(isicol, &ic)); 446 447 /* destroy list of free space and other temporary array(s) */ 448 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 449 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 450 PetscCall(PetscLLDestroy(lnk, lnkbt)); 451 PetscCall(PetscFree2(bi_ptr, im)); 452 453 /* put together the new matrix */ 454 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 455 b = (Mat_SeqBAIJ *)(B)->data; 456 457 b->free_a = PETSC_TRUE; 458 b->free_ij = PETSC_TRUE; 459 b->singlemalloc = PETSC_FALSE; 460 461 PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a)); 462 b->j = bj; 463 b->i = bi; 464 b->diag = bdiag; 465 b->free_diag = PETSC_TRUE; 466 b->ilen = NULL; 467 b->imax = NULL; 468 b->row = isrow; 469 b->col = iscol; 470 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 471 472 PetscCall(PetscObjectReference((PetscObject)isrow)); 473 PetscCall(PetscObjectReference((PetscObject)iscol)); 474 b->icol = isicol; 475 476 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 477 478 b->maxnz = b->nz = bi[n]; 479 480 (B)->factortype = MAT_FACTOR_LU; 481 (B)->info.factor_mallocs = reallocs; 482 (B)->info.fill_ratio_given = f; 483 484 if (ai[n] != 0) { 485 (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 486 } else { 487 (B)->info.fill_ratio_needed = 0.0; 488 } 489 490 PetscCall(ISIdentity(isrow, &row_identity)); 491 PetscCall(ISIdentity(iscol, &col_identity)); 492 493 both_identity = (PetscBool)(row_identity && col_identity); 494 495 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 496 PetscFunctionReturn(PETSC_SUCCESS); 497 } 498 #endif 499