1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petsc/private/kernels/blockinvert.h> 4 #include <petscis.h> 5 6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 7 { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 if (fi) { 18 for (i = 0; i < mbs; i++) { 19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 21 fi++; 22 } 23 } else { 24 for (i = 0; i < mbs; i++) { 25 if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++; 26 else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++; 27 } 28 } 29 if (nneg) *nneg = nneg_tmp; 30 if (npos) *npos = npos_tmp; 31 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 /* 36 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 37 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 38 */ 39 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) 40 { 41 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 42 const PetscInt *rip, *ai, *aj; 43 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 44 PetscInt m, reallocs = 0, prow; 45 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 46 PetscReal f = info->fill; 47 PetscBool perm_identity; 48 49 PetscFunctionBegin; 50 /* check whether perm is the identity mapping */ 51 PetscCall(ISIdentity(perm, &perm_identity)); 52 PetscCall(ISGetIndices(perm, &rip)); 53 54 if (perm_identity) { /* without permutation */ 55 a->permute = PETSC_FALSE; 56 57 ai = a->i; 58 aj = a->j; 59 } else { /* non-trivial permutation */ 60 a->permute = PETSC_TRUE; 61 62 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 63 64 ai = a->inew; 65 aj = a->jnew; 66 } 67 68 /* initialization */ 69 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu)); 70 umax = (PetscInt)(f * ai[mbs] + 1); 71 umax += mbs + 1; 72 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju)); 73 iu[0] = mbs + 1; 74 juidx = mbs + 1; /* index for ju */ 75 /* jl linked list for pivot row -- linked list for col index */ 76 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 77 for (i = 0; i < mbs; i++) { 78 jl[i] = mbs; 79 q[i] = 0; 80 } 81 82 /* for each row k */ 83 for (k = 0; k < mbs; k++) { 84 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 85 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 86 q[k] = mbs; 87 /* initialize nonzero structure of k-th row to row rip[k] of A */ 88 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 89 jmax = ai[rip[k] + 1]; 90 for (j = jmin; j < jmax; j++) { 91 vj = rip[aj[j]]; /* col. value */ 92 if (vj > k) { 93 qm = k; 94 do { 95 m = qm; 96 qm = q[m]; 97 } while (qm < vj); 98 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 99 nzk++; 100 q[m] = vj; 101 q[vj] = qm; 102 } /* if (vj > k) */ 103 } /* for (j=jmin; j<jmax; j++) */ 104 105 /* modify nonzero structure of k-th row by computing fill-in 106 for each row i to be merged in */ 107 prow = k; 108 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 109 110 while (prow < k) { 111 /* merge row prow into k-th row */ 112 jmin = iu[prow] + 1; 113 jmax = iu[prow + 1]; 114 qm = k; 115 for (j = jmin; j < jmax; j++) { 116 vj = ju[j]; 117 do { 118 m = qm; 119 qm = q[m]; 120 } while (qm < vj); 121 if (qm != vj) { 122 nzk++; 123 q[m] = vj; 124 q[vj] = qm; 125 qm = vj; 126 } 127 } 128 prow = jl[prow]; /* next pivot row */ 129 } 130 131 /* add k to row list for first nonzero element in k-th row */ 132 if (nzk > 0) { 133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134 jl[k] = jl[i]; 135 jl[i] = k; 136 } 137 iu[k + 1] = iu[k] + nzk; 138 139 /* allocate more space to ju if needed */ 140 if (iu[k + 1] > umax) { 141 /* estimate how much additional space we will need */ 142 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 143 /* just double the memory each time */ 144 maxadd = umax; 145 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 146 umax += maxadd; 147 148 /* allocate a longer ju */ 149 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp)); 150 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 151 PetscCall(PetscShmgetDeallocateArray((void **)&ju)); 152 ju = jutmp; 153 reallocs++; /* count how many times we realloc */ 154 } 155 156 /* save nonzero structure of k-th row in ju */ 157 i = k; 158 while (nzk--) { 159 i = q[i]; 160 ju[juidx++] = i; 161 } 162 } 163 164 #if defined(PETSC_USE_INFO) 165 if (ai[mbs] != 0) { 166 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 167 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 168 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 169 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 170 PetscCall(PetscInfo(A, "for best performance.\n")); 171 } else { 172 PetscCall(PetscInfo(A, "Empty matrix\n")); 173 } 174 #endif 175 176 PetscCall(ISRestoreIndices(perm, &rip)); 177 PetscCall(PetscFree2(jl, q)); 178 179 /* put together the new matrix */ 180 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 181 182 b = (Mat_SeqSBAIJ *)F->data; 183 b->free_ij = PETSC_TRUE; 184 PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 185 b->free_a = PETSC_TRUE; 186 b->j = ju; 187 b->i = iu; 188 b->diag = NULL; 189 b->ilen = NULL; 190 b->imax = NULL; 191 b->row = perm; 192 193 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 194 195 PetscCall(PetscObjectReference((PetscObject)perm)); 196 197 b->icol = perm; 198 PetscCall(PetscObjectReference((PetscObject)perm)); 199 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 200 /* In b structure: Free imax, ilen, old a, old j. 201 Allocate idnew, solve_work, new a, new j */ 202 b->maxnz = b->nz = iu[mbs]; 203 204 F->info.factor_mallocs = reallocs; 205 F->info.fill_ratio_given = f; 206 if (ai[mbs] != 0) { 207 F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 208 } else { 209 F->info.fill_ratio_needed = 0.0; 210 } 211 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 /* 215 Symbolic U^T*D*U factorization for SBAIJ format. 216 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 217 */ 218 #include <petscbt.h> 219 #include <../src/mat/utils/freespace.h> 220 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 221 { 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 223 Mat_SeqSBAIJ *b; 224 PetscBool perm_identity, missing; 225 PetscReal fill = info->fill; 226 const PetscInt *rip, *ai = a->i, *aj = a->j; 227 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 228 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 229 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 230 PetscFreeSpaceList free_space = NULL, current_space = NULL; 231 PetscBT lnkbt; 232 233 PetscFunctionBegin; 234 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 235 PetscCall(MatMissingDiagonal(A, &missing, &i)); 236 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 237 if (bs > 1) { 238 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 /* check whether perm is the identity mapping */ 243 PetscCall(ISIdentity(perm, &perm_identity)); 244 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 245 a->permute = PETSC_FALSE; 246 PetscCall(ISGetIndices(perm, &rip)); 247 248 /* initialization */ 249 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui)); 250 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 251 ui[0] = 0; 252 253 /* jl: linked list for storing indices of the pivot rows 254 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 255 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 256 for (i = 0; i < mbs; i++) { 257 jl[i] = mbs; 258 il[i] = 0; 259 } 260 261 /* create and initialize a linked list for storing column indices of the active row k */ 262 nlnk = mbs + 1; 263 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 264 265 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 266 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 267 current_space = free_space; 268 269 for (k = 0; k < mbs; k++) { /* for each active row k */ 270 /* initialize lnk by the column indices of row rip[k] of A */ 271 nzk = 0; 272 ncols = ai[k + 1] - ai[k]; 273 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 274 for (j = 0; j < ncols; j++) { 275 i = *(aj + ai[k] + j); 276 cols[j] = i; 277 } 278 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 279 nzk += nlnk; 280 281 /* update lnk by computing fill-in for each pivot row to be merged in */ 282 prow = jl[k]; /* 1st pivot row */ 283 284 while (prow < k) { 285 nextprow = jl[prow]; 286 /* merge prow into k-th row */ 287 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 288 jmax = ui[prow + 1]; 289 ncols = jmax - jmin; 290 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 291 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 292 nzk += nlnk; 293 294 /* update il and jl for prow */ 295 if (jmin < jmax) { 296 il[prow] = jmin; 297 j = *uj_ptr; 298 jl[prow] = jl[j]; 299 jl[j] = prow; 300 } 301 prow = nextprow; 302 } 303 304 /* if free space is not available, make more free space */ 305 if (current_space->local_remaining < nzk) { 306 i = mbs - k + 1; /* num of unfactored rows */ 307 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 308 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 309 reallocs++; 310 } 311 312 /* copy data into free space, then initialize lnk */ 313 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 314 315 /* add the k-th row into il and jl */ 316 if (nzk > 1) { 317 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 318 jl[k] = jl[i]; 319 jl[i] = k; 320 il[k] = ui[k] + 1; 321 } 322 ui_ptr[k] = current_space->array; 323 324 current_space->array += nzk; 325 current_space->local_used += nzk; 326 current_space->local_remaining -= nzk; 327 328 ui[k + 1] = ui[k] + nzk; 329 } 330 331 PetscCall(ISRestoreIndices(perm, &rip)); 332 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 333 334 /* destroy list of free space and other temporary array(s) */ 335 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscInt), (void **)&uj)); 336 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 337 PetscCall(PetscLLDestroy(lnk, lnkbt)); 338 339 /* put together the new matrix in MATSEQSBAIJ format */ 340 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 341 342 b = (Mat_SeqSBAIJ *)fact->data; 343 b->free_ij = PETSC_TRUE; 344 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscScalar), (void **)&b->a)); 345 b->free_a = PETSC_TRUE; 346 b->j = uj; 347 b->i = ui; 348 b->diag = udiag; 349 b->free_diag = PETSC_TRUE; 350 b->ilen = NULL; 351 b->imax = NULL; 352 b->row = perm; 353 b->icol = perm; 354 355 PetscCall(PetscObjectReference((PetscObject)perm)); 356 PetscCall(PetscObjectReference((PetscObject)perm)); 357 358 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 359 360 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 361 362 b->maxnz = b->nz = ui[mbs]; 363 364 fact->info.factor_mallocs = reallocs; 365 fact->info.fill_ratio_given = fill; 366 if (ai[mbs] != 0) { 367 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 368 } else { 369 fact->info.fill_ratio_needed = 0.0; 370 } 371 #if defined(PETSC_USE_INFO) 372 if (ai[mbs] != 0) { 373 PetscReal af = fact->info.fill_ratio_needed; 374 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 375 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 376 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 377 } else { 378 PetscCall(PetscInfo(A, "Empty matrix\n")); 379 } 380 #endif 381 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 386 { 387 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 388 Mat_SeqSBAIJ *b; 389 PetscBool perm_identity, missing; 390 PetscReal fill = info->fill; 391 const PetscInt *rip, *ai, *aj; 392 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 393 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 394 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 395 PetscFreeSpaceList free_space = NULL, current_space = NULL; 396 PetscBT lnkbt; 397 398 PetscFunctionBegin; 399 PetscCall(MatMissingDiagonal(A, &missing, &d)); 400 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 401 402 /* 403 This code originally uses Modified Sparse Row (MSR) storage 404 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 405 Then it is rewritten so the factor B takes seqsbaij format. However the associated 406 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 407 thus the original code in MSR format is still used for these cases. 408 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 409 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 410 */ 411 if (bs > 1) { 412 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 /* check whether perm is the identity mapping */ 417 PetscCall(ISIdentity(perm, &perm_identity)); 418 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 419 a->permute = PETSC_FALSE; 420 ai = a->i; 421 aj = a->j; 422 PetscCall(ISGetIndices(perm, &rip)); 423 424 /* initialization */ 425 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui)); 426 ui[0] = 0; 427 428 /* jl: linked list for storing indices of the pivot rows 429 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 430 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 431 for (i = 0; i < mbs; i++) { 432 jl[i] = mbs; 433 il[i] = 0; 434 } 435 436 /* create and initialize a linked list for storing column indices of the active row k */ 437 nlnk = mbs + 1; 438 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 439 440 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 441 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 442 current_space = free_space; 443 444 for (k = 0; k < mbs; k++) { /* for each active row k */ 445 /* initialize lnk by the column indices of row rip[k] of A */ 446 nzk = 0; 447 ncols = ai[rip[k] + 1] - ai[rip[k]]; 448 for (j = 0; j < ncols; j++) { 449 i = *(aj + ai[rip[k]] + j); 450 cols[j] = rip[i]; 451 } 452 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 453 nzk += nlnk; 454 455 /* update lnk by computing fill-in for each pivot row to be merged in */ 456 prow = jl[k]; /* 1st pivot row */ 457 458 while (prow < k) { 459 nextprow = jl[prow]; 460 /* merge prow into k-th row */ 461 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 462 jmax = ui[prow + 1]; 463 ncols = jmax - jmin; 464 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 465 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 466 nzk += nlnk; 467 468 /* update il and jl for prow */ 469 if (jmin < jmax) { 470 il[prow] = jmin; 471 472 j = *uj_ptr; 473 jl[prow] = jl[j]; 474 jl[j] = prow; 475 } 476 prow = nextprow; 477 } 478 479 /* if free space is not available, make more free space */ 480 if (current_space->local_remaining < nzk) { 481 i = mbs - k + 1; /* num of unfactored rows */ 482 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 483 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 484 reallocs++; 485 } 486 487 /* copy data into free space, then initialize lnk */ 488 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 489 490 /* add the k-th row into il and jl */ 491 if (nzk - 1 > 0) { 492 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 493 jl[k] = jl[i]; 494 jl[i] = k; 495 il[k] = ui[k] + 1; 496 } 497 ui_ptr[k] = current_space->array; 498 499 current_space->array += nzk; 500 current_space->local_used += nzk; 501 current_space->local_remaining -= nzk; 502 503 ui[k + 1] = ui[k] + nzk; 504 } 505 506 PetscCall(ISRestoreIndices(perm, &rip)); 507 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 508 509 /* destroy list of free space and other temporary array(s) */ 510 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj)); 511 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 512 PetscCall(PetscLLDestroy(lnk, lnkbt)); 513 514 /* put together the new matrix in MATSEQSBAIJ format */ 515 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 516 517 b = (Mat_SeqSBAIJ *)fact->data; 518 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a)); 519 b->free_a = PETSC_TRUE; 520 b->free_ij = PETSC_TRUE; 521 b->j = uj; 522 b->i = ui; 523 b->diag = NULL; 524 b->ilen = NULL; 525 b->imax = NULL; 526 b->row = perm; 527 528 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 529 530 PetscCall(PetscObjectReference((PetscObject)perm)); 531 b->icol = perm; 532 PetscCall(PetscObjectReference((PetscObject)perm)); 533 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 534 b->maxnz = b->nz = ui[mbs]; 535 536 fact->info.factor_mallocs = reallocs; 537 fact->info.fill_ratio_given = fill; 538 if (ai[mbs] != 0) { 539 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 540 } else { 541 fact->info.fill_ratio_needed = 0.0; 542 } 543 #if defined(PETSC_USE_INFO) 544 if (ai[mbs] != 0) { 545 PetscReal af = fact->info.fill_ratio_needed; 546 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 547 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 548 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 549 } else { 550 PetscCall(PetscInfo(A, "Empty matrix\n")); 551 } 552 #endif 553 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 558 { 559 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 560 IS perm = b->row; 561 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 562 PetscInt i, j; 563 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 564 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 565 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 566 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 567 MatScalar *work; 568 PetscInt *pivots; 569 PetscBool allowzeropivot, zeropivotdetected; 570 571 PetscFunctionBegin; 572 /* initialization */ 573 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 574 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 575 allowzeropivot = PetscNot(A->erroriffailure); 576 577 il[0] = 0; 578 for (i = 0; i < mbs; i++) jl[i] = mbs; 579 580 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 581 PetscCall(PetscMalloc1(bs, &pivots)); 582 583 PetscCall(ISGetIndices(perm, &perm_ptr)); 584 585 /* check permutation */ 586 if (!a->permute) { 587 ai = a->i; 588 aj = a->j; 589 aa = a->a; 590 } else { 591 ai = a->inew; 592 aj = a->jnew; 593 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 594 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 595 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 596 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 597 598 for (i = 0; i < mbs; i++) { 599 jmin = ai[i]; 600 jmax = ai[i + 1]; 601 for (j = jmin; j < jmax; j++) { 602 while (a2anew[j] != j) { 603 k = a2anew[j]; 604 a2anew[j] = a2anew[k]; 605 a2anew[k] = k; 606 for (k1 = 0; k1 < bs2; k1++) { 607 dk[k1] = aa[k * bs2 + k1]; 608 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 609 aa[j * bs2 + k1] = dk[k1]; 610 } 611 } 612 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 613 if (i > aj[j]) { 614 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 615 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 616 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 617 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 618 } 619 } 620 } 621 } 622 PetscCall(PetscFree(a2anew)); 623 } 624 625 /* for each row k */ 626 for (k = 0; k < mbs; k++) { 627 /*initialize k-th row with elements nonzero in row perm(k) of A */ 628 jmin = ai[perm_ptr[k]]; 629 jmax = ai[perm_ptr[k] + 1]; 630 631 ap = aa + jmin * bs2; 632 for (j = jmin; j < jmax; j++) { 633 vj = perm_ptr[aj[j]]; /* block col. index */ 634 rtmp_ptr = rtmp + vj * bs2; 635 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 636 } 637 638 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 639 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 640 i = jl[k]; /* first row to be added to k_th row */ 641 642 while (i < k) { 643 nexti = jl[i]; /* next row to be added to k_th row */ 644 645 /* compute multiplier */ 646 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 647 648 /* uik = -inv(Di)*U_bar(i,k) */ 649 diag = ba + i * bs2; 650 u = ba + ili * bs2; 651 PetscCall(PetscArrayzero(uik, bs2)); 652 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 653 654 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 655 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 656 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 657 658 /* update -U(i,k) */ 659 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 660 661 /* add multiple of row i to k-th row ... */ 662 jmin = ili + 1; 663 jmax = bi[i + 1]; 664 if (jmin < jmax) { 665 for (j = jmin; j < jmax; j++) { 666 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 667 rtmp_ptr = rtmp + bj[j] * bs2; 668 u = ba + j * bs2; 669 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 670 } 671 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 672 673 /* ... add i to row list for next nonzero entry */ 674 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 675 j = bj[jmin]; 676 jl[i] = jl[j]; 677 jl[j] = i; /* update jl */ 678 } 679 i = nexti; 680 } 681 682 /* save nonzero entries in k-th row of U ... */ 683 684 /* invert diagonal block */ 685 diag = ba + k * bs2; 686 PetscCall(PetscArraycpy(diag, dk, bs2)); 687 688 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 689 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 690 691 jmin = bi[k]; 692 jmax = bi[k + 1]; 693 if (jmin < jmax) { 694 for (j = jmin; j < jmax; j++) { 695 vj = bj[j]; /* block col. index of U */ 696 u = ba + j * bs2; 697 rtmp_ptr = rtmp + vj * bs2; 698 for (k1 = 0; k1 < bs2; k1++) { 699 *u++ = *rtmp_ptr; 700 *rtmp_ptr++ = 0.0; 701 } 702 } 703 704 /* ... add k to row list for first nonzero entry in k-th row */ 705 il[k] = jmin; 706 i = bj[jmin]; 707 jl[k] = jl[i]; 708 jl[i] = k; 709 } 710 } 711 712 PetscCall(PetscFree(rtmp)); 713 PetscCall(PetscFree2(il, jl)); 714 PetscCall(PetscFree3(dk, uik, work)); 715 PetscCall(PetscFree(pivots)); 716 if (a->permute) PetscCall(PetscFree(aa)); 717 718 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 719 720 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 721 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 722 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 723 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 724 725 C->assembled = PETSC_TRUE; 726 C->preallocated = PETSC_TRUE; 727 728 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 729 PetscFunctionReturn(PETSC_SUCCESS); 730 } 731 732 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 733 { 734 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 735 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 736 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 737 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 738 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 739 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 740 MatScalar *work; 741 PetscInt *pivots; 742 PetscBool allowzeropivot, zeropivotdetected; 743 744 PetscFunctionBegin; 745 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 746 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 747 il[0] = 0; 748 for (i = 0; i < mbs; i++) jl[i] = mbs; 749 750 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 751 PetscCall(PetscMalloc1(bs, &pivots)); 752 allowzeropivot = PetscNot(A->erroriffailure); 753 754 ai = a->i; 755 aj = a->j; 756 aa = a->a; 757 758 /* for each row k */ 759 for (k = 0; k < mbs; k++) { 760 /*initialize k-th row with elements nonzero in row k of A */ 761 jmin = ai[k]; 762 jmax = ai[k + 1]; 763 ap = aa + jmin * bs2; 764 for (j = jmin; j < jmax; j++) { 765 vj = aj[j]; /* block col. index */ 766 rtmp_ptr = rtmp + vj * bs2; 767 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 768 } 769 770 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 771 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 772 i = jl[k]; /* first row to be added to k_th row */ 773 774 while (i < k) { 775 nexti = jl[i]; /* next row to be added to k_th row */ 776 777 /* compute multiplier */ 778 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 779 780 /* uik = -inv(Di)*U_bar(i,k) */ 781 diag = ba + i * bs2; 782 u = ba + ili * bs2; 783 PetscCall(PetscArrayzero(uik, bs2)); 784 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 785 786 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 787 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 788 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 789 790 /* update -U(i,k) */ 791 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 792 793 /* add multiple of row i to k-th row ... */ 794 jmin = ili + 1; 795 jmax = bi[i + 1]; 796 if (jmin < jmax) { 797 for (j = jmin; j < jmax; j++) { 798 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 799 rtmp_ptr = rtmp + bj[j] * bs2; 800 u = ba + j * bs2; 801 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 802 } 803 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 804 805 /* ... add i to row list for next nonzero entry */ 806 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 807 j = bj[jmin]; 808 jl[i] = jl[j]; 809 jl[j] = i; /* update jl */ 810 } 811 i = nexti; 812 } 813 814 /* save nonzero entries in k-th row of U ... */ 815 816 /* invert diagonal block */ 817 diag = ba + k * bs2; 818 PetscCall(PetscArraycpy(diag, dk, bs2)); 819 820 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 821 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 822 823 jmin = bi[k]; 824 jmax = bi[k + 1]; 825 if (jmin < jmax) { 826 for (j = jmin; j < jmax; j++) { 827 vj = bj[j]; /* block col. index of U */ 828 u = ba + j * bs2; 829 rtmp_ptr = rtmp + vj * bs2; 830 for (k1 = 0; k1 < bs2; k1++) { 831 *u++ = *rtmp_ptr; 832 *rtmp_ptr++ = 0.0; 833 } 834 } 835 836 /* ... add k to row list for first nonzero entry in k-th row */ 837 il[k] = jmin; 838 i = bj[jmin]; 839 jl[k] = jl[i]; 840 jl[i] = k; 841 } 842 } 843 844 PetscCall(PetscFree(rtmp)); 845 PetscCall(PetscFree2(il, jl)); 846 PetscCall(PetscFree3(dk, uik, work)); 847 PetscCall(PetscFree(pivots)); 848 849 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 850 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 851 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 852 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 853 C->assembled = PETSC_TRUE; 854 C->preallocated = PETSC_TRUE; 855 856 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 857 PetscFunctionReturn(PETSC_SUCCESS); 858 } 859 860 /* 861 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 862 Version for blocks 2 by 2. 863 */ 864 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) 865 { 866 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 867 IS perm = b->row; 868 const PetscInt *ai, *aj, *perm_ptr; 869 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 870 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 871 MatScalar *ba = b->a, *aa, *ap; 872 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 873 PetscReal shift = info->shiftamount; 874 PetscBool allowzeropivot, zeropivotdetected; 875 876 PetscFunctionBegin; 877 allowzeropivot = PetscNot(A->erroriffailure); 878 879 /* initialization */ 880 /* il and jl record the first nonzero element in each row of the accessing 881 window U(0:k, k:mbs-1). 882 jl: list of rows to be added to uneliminated rows 883 i>= k: jl(i) is the first row to be added to row i 884 i< k: jl(i) is the row following row i in some list of rows 885 jl(i) = mbs indicates the end of a list 886 il(i): points to the first nonzero element in columns k,...,mbs-1 of 887 row i of U */ 888 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 889 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 890 il[0] = 0; 891 for (i = 0; i < mbs; i++) jl[i] = mbs; 892 893 PetscCall(ISGetIndices(perm, &perm_ptr)); 894 895 /* check permutation */ 896 if (!a->permute) { 897 ai = a->i; 898 aj = a->j; 899 aa = a->a; 900 } else { 901 ai = a->inew; 902 aj = a->jnew; 903 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 904 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 905 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 906 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 907 908 for (i = 0; i < mbs; i++) { 909 jmin = ai[i]; 910 jmax = ai[i + 1]; 911 for (j = jmin; j < jmax; j++) { 912 while (a2anew[j] != j) { 913 k = a2anew[j]; 914 a2anew[j] = a2anew[k]; 915 a2anew[k] = k; 916 for (k1 = 0; k1 < 4; k1++) { 917 dk[k1] = aa[k * 4 + k1]; 918 aa[k * 4 + k1] = aa[j * 4 + k1]; 919 aa[j * 4 + k1] = dk[k1]; 920 } 921 } 922 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 923 if (i > aj[j]) { 924 ap = aa + j * 4; /* ptr to the beginning of the block */ 925 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 926 ap[1] = ap[2]; 927 ap[2] = dk[1]; 928 } 929 } 930 } 931 PetscCall(PetscFree(a2anew)); 932 } 933 934 /* for each row k */ 935 for (k = 0; k < mbs; k++) { 936 /*initialize k-th row with elements nonzero in row perm(k) of A */ 937 jmin = ai[perm_ptr[k]]; 938 jmax = ai[perm_ptr[k] + 1]; 939 ap = aa + jmin * 4; 940 for (j = jmin; j < jmax; j++) { 941 vj = perm_ptr[aj[j]]; /* block col. index */ 942 rtmp_ptr = rtmp + vj * 4; 943 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 944 } 945 946 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 947 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 948 i = jl[k]; /* first row to be added to k_th row */ 949 950 while (i < k) { 951 nexti = jl[i]; /* next row to be added to k_th row */ 952 953 /* compute multiplier */ 954 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 955 956 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 957 diag = ba + i * 4; 958 u = ba + ili * 4; 959 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 960 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 961 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 962 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 963 964 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 965 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 966 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 967 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 968 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 969 970 PetscCall(PetscLogFlops(16.0 * 2.0)); 971 972 /* update -U(i,k): ba[ili] = uik */ 973 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 974 975 /* add multiple of row i to k-th row ... */ 976 jmin = ili + 1; 977 jmax = bi[i + 1]; 978 if (jmin < jmax) { 979 for (j = jmin; j < jmax; j++) { 980 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 981 rtmp_ptr = rtmp + bj[j] * 4; 982 u = ba + j * 4; 983 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 984 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 985 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 986 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 987 } 988 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 989 990 /* ... add i to row list for next nonzero entry */ 991 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 992 j = bj[jmin]; 993 jl[i] = jl[j]; 994 jl[j] = i; /* update jl */ 995 } 996 i = nexti; 997 } 998 999 /* save nonzero entries in k-th row of U ... */ 1000 1001 /* invert diagonal block */ 1002 diag = ba + k * 4; 1003 PetscCall(PetscArraycpy(diag, dk, 4)); 1004 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1005 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1006 1007 jmin = bi[k]; 1008 jmax = bi[k + 1]; 1009 if (jmin < jmax) { 1010 for (j = jmin; j < jmax; j++) { 1011 vj = bj[j]; /* block col. index of U */ 1012 u = ba + j * 4; 1013 rtmp_ptr = rtmp + vj * 4; 1014 for (k1 = 0; k1 < 4; k1++) { 1015 *u++ = *rtmp_ptr; 1016 *rtmp_ptr++ = 0.0; 1017 } 1018 } 1019 1020 /* ... add k to row list for first nonzero entry in k-th row */ 1021 il[k] = jmin; 1022 i = bj[jmin]; 1023 jl[k] = jl[i]; 1024 jl[i] = k; 1025 } 1026 } 1027 1028 PetscCall(PetscFree(rtmp)); 1029 PetscCall(PetscFree2(il, jl)); 1030 if (a->permute) PetscCall(PetscFree(aa)); 1031 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1032 1033 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1034 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1035 C->assembled = PETSC_TRUE; 1036 C->preallocated = PETSC_TRUE; 1037 1038 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 /* 1043 Version for when blocks are 2 by 2 Using natural ordering 1044 */ 1045 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 1046 { 1047 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1048 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1049 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1050 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1051 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1052 PetscReal shift = info->shiftamount; 1053 PetscBool allowzeropivot, zeropivotdetected; 1054 1055 PetscFunctionBegin; 1056 allowzeropivot = PetscNot(A->erroriffailure); 1057 1058 /* initialization */ 1059 /* il and jl record the first nonzero element in each row of the accessing 1060 window U(0:k, k:mbs-1). 1061 jl: list of rows to be added to uneliminated rows 1062 i>= k: jl(i) is the first row to be added to row i 1063 i< k: jl(i) is the row following row i in some list of rows 1064 jl(i) = mbs indicates the end of a list 1065 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1066 row i of U */ 1067 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1068 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1069 il[0] = 0; 1070 for (i = 0; i < mbs; i++) jl[i] = mbs; 1071 1072 ai = a->i; 1073 aj = a->j; 1074 aa = a->a; 1075 1076 /* for each row k */ 1077 for (k = 0; k < mbs; k++) { 1078 /*initialize k-th row with elements nonzero in row k of A */ 1079 jmin = ai[k]; 1080 jmax = ai[k + 1]; 1081 ap = aa + jmin * 4; 1082 for (j = jmin; j < jmax; j++) { 1083 vj = aj[j]; /* block col. index */ 1084 rtmp_ptr = rtmp + vj * 4; 1085 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1086 } 1087 1088 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1089 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1090 i = jl[k]; /* first row to be added to k_th row */ 1091 1092 while (i < k) { 1093 nexti = jl[i]; /* next row to be added to k_th row */ 1094 1095 /* compute multiplier */ 1096 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1097 1098 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1099 diag = ba + i * 4; 1100 u = ba + ili * 4; 1101 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1102 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1103 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1104 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1105 1106 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1107 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1108 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1109 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1110 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1111 1112 PetscCall(PetscLogFlops(16.0 * 2.0)); 1113 1114 /* update -U(i,k): ba[ili] = uik */ 1115 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1116 1117 /* add multiple of row i to k-th row ... */ 1118 jmin = ili + 1; 1119 jmax = bi[i + 1]; 1120 if (jmin < jmax) { 1121 for (j = jmin; j < jmax; j++) { 1122 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1123 rtmp_ptr = rtmp + bj[j] * 4; 1124 u = ba + j * 4; 1125 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1126 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1127 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1128 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1129 } 1130 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1131 1132 /* ... add i to row list for next nonzero entry */ 1133 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1134 j = bj[jmin]; 1135 jl[i] = jl[j]; 1136 jl[j] = i; /* update jl */ 1137 } 1138 i = nexti; 1139 } 1140 1141 /* save nonzero entries in k-th row of U ... */ 1142 1143 /* invert diagonal block */ 1144 diag = ba + k * 4; 1145 PetscCall(PetscArraycpy(diag, dk, 4)); 1146 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1147 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1148 1149 jmin = bi[k]; 1150 jmax = bi[k + 1]; 1151 if (jmin < jmax) { 1152 for (j = jmin; j < jmax; j++) { 1153 vj = bj[j]; /* block col. index of U */ 1154 u = ba + j * 4; 1155 rtmp_ptr = rtmp + vj * 4; 1156 for (k1 = 0; k1 < 4; k1++) { 1157 *u++ = *rtmp_ptr; 1158 *rtmp_ptr++ = 0.0; 1159 } 1160 } 1161 1162 /* ... add k to row list for first nonzero entry in k-th row */ 1163 il[k] = jmin; 1164 i = bj[jmin]; 1165 jl[k] = jl[i]; 1166 jl[i] = k; 1167 } 1168 } 1169 1170 PetscCall(PetscFree(rtmp)); 1171 PetscCall(PetscFree2(il, jl)); 1172 1173 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1174 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1175 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1176 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1177 C->assembled = PETSC_TRUE; 1178 C->preallocated = PETSC_TRUE; 1179 1180 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 /* 1185 Numeric U^T*D*U factorization for SBAIJ format. 1186 Version for blocks are 1 by 1. 1187 */ 1188 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 1189 { 1190 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1191 IS ip = b->row; 1192 const PetscInt *ai, *aj, *rip; 1193 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1194 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1195 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1196 PetscReal rs; 1197 FactorShiftCtx sctx; 1198 1199 PetscFunctionBegin; 1200 /* MatPivotSetUp(): initialize shift context sctx */ 1201 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1202 1203 PetscCall(ISGetIndices(ip, &rip)); 1204 if (!a->permute) { 1205 ai = a->i; 1206 aj = a->j; 1207 aa = a->a; 1208 } else { 1209 ai = a->inew; 1210 aj = a->jnew; 1211 nz = ai[mbs]; 1212 PetscCall(PetscMalloc1(nz, &aa)); 1213 a2anew = a->a2anew; 1214 bval = a->a; 1215 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1216 } 1217 1218 /* initialization */ 1219 /* il and jl record the first nonzero element in each row of the accessing 1220 window U(0:k, k:mbs-1). 1221 jl: list of rows to be added to uneliminated rows 1222 i>= k: jl(i) is the first row to be added to row i 1223 i< k: jl(i) is the row following row i in some list of rows 1224 jl(i) = mbs indicates the end of a list 1225 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1226 row i of U */ 1227 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1228 1229 do { 1230 sctx.newshift = PETSC_FALSE; 1231 il[0] = 0; 1232 for (i = 0; i < mbs; i++) { 1233 rtmp[i] = 0.0; 1234 jl[i] = mbs; 1235 } 1236 1237 for (k = 0; k < mbs; k++) { 1238 /*initialize k-th row by the perm[k]-th row of A */ 1239 jmin = ai[rip[k]]; 1240 jmax = ai[rip[k] + 1]; 1241 bval = ba + bi[k]; 1242 for (j = jmin; j < jmax; j++) { 1243 col = rip[aj[j]]; 1244 rtmp[col] = aa[j]; 1245 *bval++ = 0.0; /* for in-place factorization */ 1246 } 1247 1248 /* shift the diagonal of the matrix */ 1249 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1250 1251 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1252 dk = rtmp[k]; 1253 i = jl[k]; /* first row to be added to k_th row */ 1254 1255 while (i < k) { 1256 nexti = jl[i]; /* next row to be added to k_th row */ 1257 1258 /* compute multiplier, update diag(k) and U(i,k) */ 1259 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1260 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1261 dk += uikdi * ba[ili]; 1262 ba[ili] = uikdi; /* -U(i,k) */ 1263 1264 /* add multiple of row i to k-th row */ 1265 jmin = ili + 1; 1266 jmax = bi[i + 1]; 1267 if (jmin < jmax) { 1268 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1269 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1270 1271 /* update il and jl for row i */ 1272 il[i] = jmin; 1273 j = bj[jmin]; 1274 jl[i] = jl[j]; 1275 jl[j] = i; 1276 } 1277 i = nexti; 1278 } 1279 1280 /* shift the diagonals when zero pivot is detected */ 1281 /* compute rs=sum of abs(off-diagonal) */ 1282 rs = 0.0; 1283 jmin = bi[k] + 1; 1284 nz = bi[k + 1] - jmin; 1285 if (nz) { 1286 bcol = bj + jmin; 1287 while (nz--) { 1288 rs += PetscAbsScalar(rtmp[*bcol]); 1289 bcol++; 1290 } 1291 } 1292 1293 sctx.rs = rs; 1294 sctx.pv = dk; 1295 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1296 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1297 dk = sctx.pv; 1298 1299 /* copy data into U(k,:) */ 1300 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1301 jmin = bi[k] + 1; 1302 jmax = bi[k + 1]; 1303 if (jmin < jmax) { 1304 for (j = jmin; j < jmax; j++) { 1305 col = bj[j]; 1306 ba[j] = rtmp[col]; 1307 rtmp[col] = 0.0; 1308 } 1309 /* add the k-th row into il and jl */ 1310 il[k] = jmin; 1311 i = bj[jmin]; 1312 jl[k] = jl[i]; 1313 jl[i] = k; 1314 } 1315 } 1316 } while (sctx.newshift); 1317 PetscCall(PetscFree3(rtmp, il, jl)); 1318 if (a->permute) PetscCall(PetscFree(aa)); 1319 1320 PetscCall(ISRestoreIndices(ip, &rip)); 1321 1322 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1323 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1324 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1325 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1326 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1327 C->assembled = PETSC_TRUE; 1328 C->preallocated = PETSC_TRUE; 1329 1330 PetscCall(PetscLogFlops(C->rmap->N)); 1331 if (sctx.nshift) { 1332 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1333 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1334 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1335 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1336 } 1337 } 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } 1340 1341 /* 1342 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1343 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1344 */ 1345 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1346 { 1347 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1348 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1349 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1350 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1351 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1352 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1353 FactorShiftCtx sctx; 1354 PetscReal rs; 1355 MatScalar d, *v; 1356 1357 PetscFunctionBegin; 1358 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1359 1360 /* MatPivotSetUp(): initialize shift context sctx */ 1361 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1362 1363 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1364 sctx.shift_top = info->zeropivot; 1365 1366 PetscCall(PetscArrayzero(rtmp, mbs)); 1367 1368 for (i = 0; i < mbs; i++) { 1369 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1370 d = (aa)[a->diag[i]]; 1371 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1372 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1373 v = aa + ai[i] + 1; 1374 nz = ai[i + 1] - ai[i] - 1; 1375 for (j = 0; j < nz; j++) { 1376 rtmp[i] += PetscAbsScalar(v[j]); 1377 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1378 } 1379 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1380 } 1381 sctx.shift_top *= 1.1; 1382 sctx.nshift_max = 5; 1383 sctx.shift_lo = 0.; 1384 sctx.shift_hi = 1.; 1385 } 1386 1387 /* allocate working arrays 1388 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1389 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1390 */ 1391 do { 1392 sctx.newshift = PETSC_FALSE; 1393 1394 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1395 if (mbs) il[0] = 0; 1396 1397 for (k = 0; k < mbs; k++) { 1398 /* zero rtmp */ 1399 nz = bi[k + 1] - bi[k]; 1400 bjtmp = bj + bi[k]; 1401 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1402 1403 /* load in initial unfactored row */ 1404 bval = ba + bi[k]; 1405 jmin = ai[k]; 1406 jmax = ai[k + 1]; 1407 for (j = jmin; j < jmax; j++) { 1408 col = aj[j]; 1409 rtmp[col] = aa[j]; 1410 *bval++ = 0.0; /* for in-place factorization */ 1411 } 1412 /* shift the diagonal of the matrix: ZeropivotApply() */ 1413 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1414 1415 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1416 dk = rtmp[k]; 1417 i = c2r[k]; /* first row to be added to k_th row */ 1418 1419 while (i < k) { 1420 nexti = c2r[i]; /* next row to be added to k_th row */ 1421 1422 /* compute multiplier, update diag(k) and U(i,k) */ 1423 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1424 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1425 dk += uikdi * ba[ili]; /* update diag[k] */ 1426 ba[ili] = uikdi; /* -U(i,k) */ 1427 1428 /* add multiple of row i to k-th row */ 1429 jmin = ili + 1; 1430 jmax = bi[i + 1]; 1431 if (jmin < jmax) { 1432 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1433 /* update il and c2r for row i */ 1434 il[i] = jmin; 1435 j = bj[jmin]; 1436 c2r[i] = c2r[j]; 1437 c2r[j] = i; 1438 } 1439 i = nexti; 1440 } 1441 1442 /* copy data into U(k,:) */ 1443 rs = 0.0; 1444 jmin = bi[k]; 1445 jmax = bi[k + 1] - 1; 1446 if (jmin < jmax) { 1447 for (j = jmin; j < jmax; j++) { 1448 col = bj[j]; 1449 ba[j] = rtmp[col]; 1450 rs += PetscAbsScalar(ba[j]); 1451 } 1452 /* add the k-th row into il and c2r */ 1453 il[k] = jmin; 1454 i = bj[jmin]; 1455 c2r[k] = c2r[i]; 1456 c2r[i] = k; 1457 } 1458 1459 sctx.rs = rs; 1460 sctx.pv = dk; 1461 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1462 if (sctx.newshift) break; 1463 dk = sctx.pv; 1464 1465 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1466 } 1467 } while (sctx.newshift); 1468 1469 PetscCall(PetscFree3(rtmp, il, c2r)); 1470 1471 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1472 B->ops->solves = MatSolves_SeqSBAIJ_1; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1474 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1475 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1476 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1477 1478 B->assembled = PETSC_TRUE; 1479 B->preallocated = PETSC_TRUE; 1480 1481 PetscCall(PetscLogFlops(B->rmap->n)); 1482 1483 /* MatPivotView() */ 1484 if (sctx.nshift) { 1485 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1486 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1487 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1488 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1489 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1490 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1491 } 1492 } 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 1497 { 1498 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1499 PetscInt i, j, mbs = a->mbs; 1500 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1501 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1502 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1503 PetscReal rs; 1504 FactorShiftCtx sctx; 1505 1506 PetscFunctionBegin; 1507 /* MatPivotSetUp(): initialize shift context sctx */ 1508 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1509 1510 /* initialization */ 1511 /* il and jl record the first nonzero element in each row of the accessing 1512 window U(0:k, k:mbs-1). 1513 jl: list of rows to be added to uneliminated rows 1514 i>= k: jl(i) is the first row to be added to row i 1515 i< k: jl(i) is the row following row i in some list of rows 1516 jl(i) = mbs indicates the end of a list 1517 il(i): points to the first nonzero element in U(i,k:mbs-1) 1518 */ 1519 PetscCall(PetscMalloc1(mbs, &rtmp)); 1520 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1521 1522 do { 1523 sctx.newshift = PETSC_FALSE; 1524 il[0] = 0; 1525 for (i = 0; i < mbs; i++) { 1526 rtmp[i] = 0.0; 1527 jl[i] = mbs; 1528 } 1529 1530 for (k = 0; k < mbs; k++) { 1531 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1532 nz = ai[k + 1] - ai[k]; 1533 acol = aj + ai[k]; 1534 aval = aa + ai[k]; 1535 bval = ba + bi[k]; 1536 while (nz--) { 1537 rtmp[*acol++] = *aval++; 1538 *bval++ = 0.0; /* for in-place factorization */ 1539 } 1540 1541 /* shift the diagonal of the matrix */ 1542 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1543 1544 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1545 dk = rtmp[k]; 1546 i = jl[k]; /* first row to be added to k_th row */ 1547 1548 while (i < k) { 1549 nexti = jl[i]; /* next row to be added to k_th row */ 1550 /* compute multiplier, update D(k) and U(i,k) */ 1551 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1552 uikdi = -ba[ili] * ba[bi[i]]; 1553 dk += uikdi * ba[ili]; 1554 ba[ili] = uikdi; /* -U(i,k) */ 1555 1556 /* add multiple of row i to k-th row ... */ 1557 jmin = ili + 1; 1558 nz = bi[i + 1] - jmin; 1559 if (nz > 0) { 1560 bcol = bj + jmin; 1561 bval = ba + jmin; 1562 PetscCall(PetscLogFlops(2.0 * nz)); 1563 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1564 1565 /* update il and jl for i-th row */ 1566 il[i] = jmin; 1567 j = bj[jmin]; 1568 jl[i] = jl[j]; 1569 jl[j] = i; 1570 } 1571 i = nexti; 1572 } 1573 1574 /* shift the diagonals when zero pivot is detected */ 1575 /* compute rs=sum of abs(off-diagonal) */ 1576 rs = 0.0; 1577 jmin = bi[k] + 1; 1578 nz = bi[k + 1] - jmin; 1579 if (nz) { 1580 bcol = bj + jmin; 1581 while (nz--) { 1582 rs += PetscAbsScalar(rtmp[*bcol]); 1583 bcol++; 1584 } 1585 } 1586 1587 sctx.rs = rs; 1588 sctx.pv = dk; 1589 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1590 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1591 dk = sctx.pv; 1592 1593 /* copy data into U(k,:) */ 1594 ba[bi[k]] = 1.0 / dk; 1595 jmin = bi[k] + 1; 1596 nz = bi[k + 1] - jmin; 1597 if (nz) { 1598 bcol = bj + jmin; 1599 bval = ba + jmin; 1600 while (nz--) { 1601 *bval++ = rtmp[*bcol]; 1602 rtmp[*bcol++] = 0.0; 1603 } 1604 /* add k-th row into il and jl */ 1605 il[k] = jmin; 1606 i = bj[jmin]; 1607 jl[k] = jl[i]; 1608 jl[i] = k; 1609 } 1610 } /* end of for (k = 0; k<mbs; k++) */ 1611 } while (sctx.newshift); 1612 PetscCall(PetscFree(rtmp)); 1613 PetscCall(PetscFree2(il, jl)); 1614 1615 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1616 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1617 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1618 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1619 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1620 1621 C->assembled = PETSC_TRUE; 1622 C->preallocated = PETSC_TRUE; 1623 1624 PetscCall(PetscLogFlops(C->rmap->N)); 1625 if (sctx.nshift) { 1626 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1627 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1628 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1629 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1630 } 1631 } 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) 1636 { 1637 Mat C; 1638 1639 PetscFunctionBegin; 1640 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1641 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1642 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1643 1644 A->ops->solve = C->ops->solve; 1645 A->ops->solvetranspose = C->ops->solvetranspose; 1646 1647 PetscCall(MatHeaderMerge(A, &C)); 1648 PetscFunctionReturn(PETSC_SUCCESS); 1649 } 1650