1 /* 2 Factorization code for BAIJ format. 3 */ 4 #include <../src/mat/impls/baij/seq/baij.h> 5 #include <petsc/private/kernels/blockinvert.h> 6 7 /* 8 This is used to set the numeric factorization for both LU and ILU symbolic factorization 9 */ 10 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 11 { 12 PetscFunctionBegin; 13 if (natural) { 14 switch (fact->rmap->bs) { 15 case 1: 16 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17 break; 18 case 2: 19 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 20 break; 21 case 3: 22 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 23 break; 24 case 4: 25 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 26 break; 27 case 5: 28 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 29 break; 30 case 6: 31 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 32 break; 33 case 7: 34 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 35 break; 36 case 9: 37 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 38 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 39 #else 40 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 41 #endif 42 break; 43 case 15: 44 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 45 break; 46 default: 47 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 48 break; 49 } 50 } else { 51 switch (fact->rmap->bs) { 52 case 1: 53 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 54 break; 55 case 2: 56 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 57 break; 58 case 3: 59 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 60 break; 61 case 4: 62 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 63 break; 64 case 5: 65 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 66 break; 67 case 6: 68 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 69 break; 70 case 7: 71 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 72 break; 73 default: 74 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 75 break; 76 } 77 } 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 82 { 83 PetscFunctionBegin; 84 if (natural) { 85 switch (inA->rmap->bs) { 86 case 1: 87 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 88 break; 89 case 2: 90 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 91 break; 92 case 3: 93 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 94 break; 95 case 4: 96 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 97 break; 98 case 5: 99 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 100 break; 101 case 6: 102 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 103 break; 104 case 7: 105 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 106 break; 107 default: 108 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 109 break; 110 } 111 } else { 112 switch (inA->rmap->bs) { 113 case 1: 114 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 115 break; 116 case 2: 117 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 118 break; 119 case 3: 120 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 121 break; 122 case 4: 123 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 124 break; 125 case 5: 126 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 127 break; 128 case 6: 129 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 130 break; 131 case 7: 132 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 133 break; 134 default: 135 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 136 break; 137 } 138 } 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 /* 143 The symbolic factorization code is identical to that for AIJ format, 144 except for very small changes since this is now a SeqBAIJ datastructure. 145 NOT good code reuse. 146 */ 147 #include <petscbt.h> 148 #include <../src/mat/utils/freespace.h> 149 150 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 151 { 152 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 153 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 154 PetscBool row_identity, col_identity, both_identity; 155 IS isicol; 156 const PetscInt *r, *ic; 157 PetscInt i, *ai = a->i, *aj = a->j; 158 PetscInt *bi, *bj, *ajtmp; 159 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 160 PetscReal f; 161 PetscInt nlnk, *lnk, k, **bi_ptr; 162 PetscFreeSpaceList free_space = NULL, current_space = NULL; 163 PetscBT lnkbt; 164 PetscBool missing; 165 166 PetscFunctionBegin; 167 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 168 PetscCall(MatMissingDiagonal(A, &missing, &i)); 169 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 170 171 if (bs > 1) { /* check shifttype */ 172 PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 173 } 174 175 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 176 PetscCall(ISGetIndices(isrow, &r)); 177 PetscCall(ISGetIndices(isicol, &ic)); 178 179 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 180 PetscCall(PetscMalloc1(n + 1, &bi)); 181 PetscCall(PetscMalloc1(n + 1, &bdiag)); 182 bi[0] = bdiag[0] = 0; 183 184 /* linked list for storing column indices of the active row */ 185 nlnk = n + 1; 186 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 187 188 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 189 190 /* initial FreeSpace size is f*(ai[n]+1) */ 191 f = info->fill; 192 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 193 194 current_space = free_space; 195 196 for (i = 0; i < n; i++) { 197 /* copy previous fill into linked list */ 198 nzi = 0; 199 nnz = ai[r[i] + 1] - ai[r[i]]; 200 ajtmp = aj + ai[r[i]]; 201 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 202 nzi += nlnk; 203 204 /* add pivot rows into linked list */ 205 row = lnk[n]; 206 while (row < i) { 207 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 208 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 209 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 210 nzi += nlnk; 211 row = lnk[row]; 212 } 213 bi[i + 1] = bi[i] + nzi; 214 im[i] = nzi; 215 216 /* mark bdiag */ 217 nzbd = 0; 218 nnz = nzi; 219 k = lnk[n]; 220 while (nnz-- && k < i) { 221 nzbd++; 222 k = lnk[k]; 223 } 224 bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 225 226 /* if free space is not available, make more free space */ 227 if (current_space->local_remaining < nzi) { 228 nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 229 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 230 reallocs++; 231 } 232 233 /* copy data into free space, then initialize lnk */ 234 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 235 236 bi_ptr[i] = current_space->array; 237 current_space->array += nzi; 238 current_space->local_used += nzi; 239 current_space->local_remaining -= nzi; 240 } 241 242 PetscCall(ISRestoreIndices(isrow, &r)); 243 PetscCall(ISRestoreIndices(isicol, &ic)); 244 245 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 246 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 247 PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 248 PetscCall(PetscLLDestroy(lnk, lnkbt)); 249 PetscCall(PetscFree2(bi_ptr, im)); 250 251 /* put together the new matrix */ 252 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 253 b = (Mat_SeqBAIJ *)B->data; 254 255 b->free_ij = PETSC_TRUE; 256 PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 257 b->free_a = PETSC_TRUE; 258 b->j = bj; 259 b->i = bi; 260 b->diag = bdiag; 261 b->free_diag = PETSC_TRUE; 262 b->ilen = NULL; 263 b->imax = NULL; 264 b->row = isrow; 265 b->col = iscol; 266 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 267 268 PetscCall(PetscObjectReference((PetscObject)isrow)); 269 PetscCall(PetscObjectReference((PetscObject)iscol)); 270 b->icol = isicol; 271 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 272 273 b->maxnz = b->nz = bdiag[0] + 1; 274 275 B->factortype = MAT_FACTOR_LU; 276 B->info.factor_mallocs = reallocs; 277 B->info.fill_ratio_given = f; 278 279 if (ai[n] != 0) { 280 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 281 } else { 282 B->info.fill_ratio_needed = 0.0; 283 } 284 #if defined(PETSC_USE_INFO) 285 if (ai[n] != 0) { 286 PetscReal af = B->info.fill_ratio_needed; 287 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 288 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 289 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 290 PetscCall(PetscInfo(A, "for best performance.\n")); 291 } else { 292 PetscCall(PetscInfo(A, "Empty matrix\n")); 293 } 294 #endif 295 296 PetscCall(ISIdentity(isrow, &row_identity)); 297 PetscCall(ISIdentity(iscol, &col_identity)); 298 299 both_identity = (PetscBool)(row_identity && col_identity); 300 301 PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 302 PetscFunctionReturn(PETSC_SUCCESS); 303 } 304 305 #if 0 306 // unused 307 static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 308 { 309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 310 PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 311 PetscBool row_identity, col_identity, both_identity; 312 IS isicol; 313 const PetscInt *r, *ic; 314 PetscInt i, *ai = a->i, *aj = a->j; 315 PetscInt *bi, *bj, *ajtmp; 316 PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 317 PetscReal f; 318 PetscInt nlnk, *lnk, k, **bi_ptr; 319 PetscFreeSpaceList free_space = NULL, current_space = NULL; 320 PetscBT lnkbt; 321 PetscBool missing; 322 323 PetscFunctionBegin; 324 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 325 PetscCall(MatMissingDiagonal(A, &missing, &i)); 326 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 327 328 PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 329 PetscCall(ISGetIndices(isrow, &r)); 330 PetscCall(ISGetIndices(isicol, &ic)); 331 332 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 333 PetscCall(PetscMalloc1(n + 1, &bi)); 334 PetscCall(PetscMalloc1(n + 1, &bdiag)); 335 336 bi[0] = bdiag[0] = 0; 337 338 /* linked list for storing column indices of the active row */ 339 nlnk = n + 1; 340 PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 341 342 PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 343 344 /* initial FreeSpace size is f*(ai[n]+1) */ 345 f = info->fill; 346 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 347 current_space = free_space; 348 349 for (i = 0; i < n; i++) { 350 /* copy previous fill into linked list */ 351 nzi = 0; 352 nnz = ai[r[i] + 1] - ai[r[i]]; 353 ajtmp = aj + ai[r[i]]; 354 PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 355 nzi += nlnk; 356 357 /* add pivot rows into linked list */ 358 row = lnk[n]; 359 while (row < i) { 360 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 361 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 362 PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 363 nzi += nlnk; 364 row = lnk[row]; 365 } 366 bi[i + 1] = bi[i] + nzi; 367 im[i] = nzi; 368 369 /* mark bdiag */ 370 nzbd = 0; 371 nnz = nzi; 372 k = lnk[n]; 373 while (nnz-- && k < i) { 374 nzbd++; 375 k = lnk[k]; 376 } 377 bdiag[i] = bi[i] + nzbd; 378 379 /* if free space is not available, make more free space */ 380 if (current_space->local_remaining < nzi) { 381 nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 382 PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 383 reallocs++; 384 } 385 386 /* copy data into free space, then initialize lnk */ 387 PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 388 389 bi_ptr[i] = current_space->array; 390 current_space->array += nzi; 391 current_space->local_used += nzi; 392 current_space->local_remaining -= nzi; 393 } 394 #if defined(PETSC_USE_INFO) 395 if (ai[n] != 0) { 396 PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 397 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 398 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 399 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 400 PetscCall(PetscInfo(A, "for best performance.\n")); 401 } else { 402 PetscCall(PetscInfo(A, "Empty matrix\n")); 403 } 404 #endif 405 406 PetscCall(ISRestoreIndices(isrow, &r)); 407 PetscCall(ISRestoreIndices(isicol, &ic)); 408 409 /* destroy list of free space and other temporary array(s) */ 410 PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 411 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 412 PetscCall(PetscLLDestroy(lnk, lnkbt)); 413 PetscCall(PetscFree2(bi_ptr, im)); 414 415 /* put together the new matrix */ 416 PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 417 b = (Mat_SeqBAIJ *)B->data; 418 b->free_ij = PETSC_TRUE; 419 PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a)); 420 b->free_a = PETSC_TRUE; 421 b->j = bj; 422 b->i = bi; 423 b->diag = bdiag; 424 b->free_diag = PETSC_TRUE; 425 b->ilen = NULL; 426 b->imax = NULL; 427 b->row = isrow; 428 b->col = iscol; 429 b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 430 431 PetscCall(PetscObjectReference((PetscObject)isrow)); 432 PetscCall(PetscObjectReference((PetscObject)iscol)); 433 b->icol = isicol; 434 435 PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 436 437 b->maxnz = b->nz = bi[n]; 438 439 B->factortype = MAT_FACTOR_LU; 440 B->info.factor_mallocs = reallocs; 441 B->info.fill_ratio_given = f; 442 443 if (ai[n] != 0) { 444 B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 445 } else { 446 B->info.fill_ratio_needed = 0.0; 447 } 448 449 PetscCall(ISIdentity(isrow, &row_identity)); 450 PetscCall(ISIdentity(iscol, &col_identity)); 451 452 both_identity = (PetscBool)(row_identity && col_identity); 453 454 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 #endif 458