1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petsc/private/kernels/blockinvert.h> 4 #include <petscis.h> 5 6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 7 { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 if (fi) { 18 for (i = 0; i < mbs; i++) { 19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 21 fi++; 22 } 23 } else { 24 for (i = 0; i < mbs; i++) { 25 if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++; 26 else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++; 27 } 28 } 29 if (nneg) *nneg = nneg_tmp; 30 if (npos) *npos = npos_tmp; 31 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 /* 36 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 37 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 38 */ 39 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) 40 { 41 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 42 const PetscInt *rip, *ai, *aj; 43 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 44 PetscInt m, reallocs = 0, prow; 45 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 46 PetscReal f = info->fill; 47 PetscBool perm_identity; 48 49 PetscFunctionBegin; 50 /* check whether perm is the identity mapping */ 51 PetscCall(ISIdentity(perm, &perm_identity)); 52 PetscCall(ISGetIndices(perm, &rip)); 53 54 if (perm_identity) { /* without permutation */ 55 a->permute = PETSC_FALSE; 56 57 ai = a->i; 58 aj = a->j; 59 } else { /* non-trivial permutation */ 60 a->permute = PETSC_TRUE; 61 62 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 63 64 ai = a->inew; 65 aj = a->jnew; 66 } 67 68 /* initialization */ 69 PetscCall(PetscMalloc1(mbs + 1, &iu)); 70 umax = (PetscInt)(f * ai[mbs] + 1); 71 umax += mbs + 1; 72 PetscCall(PetscMalloc1(umax, &ju)); 73 iu[0] = mbs + 1; 74 juidx = mbs + 1; /* index for ju */ 75 /* jl linked list for pivot row -- linked list for col index */ 76 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 77 for (i = 0; i < mbs; i++) { 78 jl[i] = mbs; 79 q[i] = 0; 80 } 81 82 /* for each row k */ 83 for (k = 0; k < mbs; k++) { 84 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 85 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 86 q[k] = mbs; 87 /* initialize nonzero structure of k-th row to row rip[k] of A */ 88 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 89 jmax = ai[rip[k] + 1]; 90 for (j = jmin; j < jmax; j++) { 91 vj = rip[aj[j]]; /* col. value */ 92 if (vj > k) { 93 qm = k; 94 do { 95 m = qm; 96 qm = q[m]; 97 } while (qm < vj); 98 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 99 nzk++; 100 q[m] = vj; 101 q[vj] = qm; 102 } /* if (vj > k) */ 103 } /* for (j=jmin; j<jmax; j++) */ 104 105 /* modify nonzero structure of k-th row by computing fill-in 106 for each row i to be merged in */ 107 prow = k; 108 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 109 110 while (prow < k) { 111 /* merge row prow into k-th row */ 112 jmin = iu[prow] + 1; 113 jmax = iu[prow + 1]; 114 qm = k; 115 for (j = jmin; j < jmax; j++) { 116 vj = ju[j]; 117 do { 118 m = qm; 119 qm = q[m]; 120 } while (qm < vj); 121 if (qm != vj) { 122 nzk++; 123 q[m] = vj; 124 q[vj] = qm; 125 qm = vj; 126 } 127 } 128 prow = jl[prow]; /* next pivot row */ 129 } 130 131 /* add k to row list for first nonzero element in k-th row */ 132 if (nzk > 0) { 133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134 jl[k] = jl[i]; 135 jl[i] = k; 136 } 137 iu[k + 1] = iu[k] + nzk; 138 139 /* allocate more space to ju if needed */ 140 if (iu[k + 1] > umax) { 141 /* estimate how much additional space we will need */ 142 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 143 /* just double the memory each time */ 144 maxadd = umax; 145 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 146 umax += maxadd; 147 148 /* allocate a longer ju */ 149 PetscCall(PetscMalloc1(umax, &jutmp)); 150 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 151 PetscCall(PetscFree(ju)); 152 ju = jutmp; 153 reallocs++; /* count how many times we realloc */ 154 } 155 156 /* save nonzero structure of k-th row in ju */ 157 i = k; 158 while (nzk--) { 159 i = q[i]; 160 ju[juidx++] = i; 161 } 162 } 163 164 #if defined(PETSC_USE_INFO) 165 if (ai[mbs] != 0) { 166 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 167 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 168 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 169 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 170 PetscCall(PetscInfo(A, "for best performance.\n")); 171 } else { 172 PetscCall(PetscInfo(A, "Empty matrix\n")); 173 } 174 #endif 175 176 PetscCall(ISRestoreIndices(perm, &rip)); 177 PetscCall(PetscFree2(jl, q)); 178 179 /* put together the new matrix */ 180 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 181 182 b = (Mat_SeqSBAIJ *)(F)->data; 183 b->singlemalloc = PETSC_FALSE; 184 b->free_a = PETSC_TRUE; 185 b->free_ij = PETSC_TRUE; 186 187 PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a)); 188 b->j = ju; 189 b->i = iu; 190 b->diag = NULL; 191 b->ilen = NULL; 192 b->imax = NULL; 193 b->row = perm; 194 195 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 196 197 PetscCall(PetscObjectReference((PetscObject)perm)); 198 199 b->icol = perm; 200 PetscCall(PetscObjectReference((PetscObject)perm)); 201 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 202 /* In b structure: Free imax, ilen, old a, old j. 203 Allocate idnew, solve_work, new a, new j */ 204 b->maxnz = b->nz = iu[mbs]; 205 206 (F)->info.factor_mallocs = reallocs; 207 (F)->info.fill_ratio_given = f; 208 if (ai[mbs] != 0) { 209 (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 210 } else { 211 (F)->info.fill_ratio_needed = 0.0; 212 } 213 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 /* 217 Symbolic U^T*D*U factorization for SBAIJ format. 218 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 219 */ 220 #include <petscbt.h> 221 #include <../src/mat/utils/freespace.h> 222 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 223 { 224 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 225 Mat_SeqSBAIJ *b; 226 PetscBool perm_identity, missing; 227 PetscReal fill = info->fill; 228 const PetscInt *rip, *ai = a->i, *aj = a->j; 229 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 230 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 231 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 232 PetscFreeSpaceList free_space = NULL, current_space = NULL; 233 PetscBT lnkbt; 234 235 PetscFunctionBegin; 236 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 237 PetscCall(MatMissingDiagonal(A, &missing, &i)); 238 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 239 if (bs > 1) { 240 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 241 PetscFunctionReturn(PETSC_SUCCESS); 242 } 243 244 /* check whether perm is the identity mapping */ 245 PetscCall(ISIdentity(perm, &perm_identity)); 246 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 247 a->permute = PETSC_FALSE; 248 PetscCall(ISGetIndices(perm, &rip)); 249 250 /* initialization */ 251 PetscCall(PetscMalloc1(mbs + 1, &ui)); 252 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 253 ui[0] = 0; 254 255 /* jl: linked list for storing indices of the pivot rows 256 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 257 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 258 for (i = 0; i < mbs; i++) { 259 jl[i] = mbs; 260 il[i] = 0; 261 } 262 263 /* create and initialize a linked list for storing column indices of the active row k */ 264 nlnk = mbs + 1; 265 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 266 267 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 268 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 269 current_space = free_space; 270 271 for (k = 0; k < mbs; k++) { /* for each active row k */ 272 /* initialize lnk by the column indices of row rip[k] of A */ 273 nzk = 0; 274 ncols = ai[k + 1] - ai[k]; 275 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 276 for (j = 0; j < ncols; j++) { 277 i = *(aj + ai[k] + j); 278 cols[j] = i; 279 } 280 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 281 nzk += nlnk; 282 283 /* update lnk by computing fill-in for each pivot row to be merged in */ 284 prow = jl[k]; /* 1st pivot row */ 285 286 while (prow < k) { 287 nextprow = jl[prow]; 288 /* merge prow into k-th row */ 289 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 290 jmax = ui[prow + 1]; 291 ncols = jmax - jmin; 292 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 293 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 294 nzk += nlnk; 295 296 /* update il and jl for prow */ 297 if (jmin < jmax) { 298 il[prow] = jmin; 299 j = *uj_ptr; 300 jl[prow] = jl[j]; 301 jl[j] = prow; 302 } 303 prow = nextprow; 304 } 305 306 /* if free space is not available, make more free space */ 307 if (current_space->local_remaining < nzk) { 308 i = mbs - k + 1; /* num of unfactored rows */ 309 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 310 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 311 reallocs++; 312 } 313 314 /* copy data into free space, then initialize lnk */ 315 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 316 317 /* add the k-th row into il and jl */ 318 if (nzk > 1) { 319 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 320 jl[k] = jl[i]; 321 jl[i] = k; 322 il[k] = ui[k] + 1; 323 } 324 ui_ptr[k] = current_space->array; 325 326 current_space->array += nzk; 327 current_space->local_used += nzk; 328 current_space->local_remaining -= nzk; 329 330 ui[k + 1] = ui[k] + nzk; 331 } 332 333 PetscCall(ISRestoreIndices(perm, &rip)); 334 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 335 336 /* destroy list of free space and other temporary array(s) */ 337 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 338 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 339 PetscCall(PetscLLDestroy(lnk, lnkbt)); 340 341 /* put together the new matrix in MATSEQSBAIJ format */ 342 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 343 344 b = (Mat_SeqSBAIJ *)fact->data; 345 b->singlemalloc = PETSC_FALSE; 346 b->free_a = PETSC_TRUE; 347 b->free_ij = PETSC_TRUE; 348 349 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 350 351 b->j = uj; 352 b->i = ui; 353 b->diag = udiag; 354 b->free_diag = PETSC_TRUE; 355 b->ilen = NULL; 356 b->imax = NULL; 357 b->row = perm; 358 b->icol = perm; 359 360 PetscCall(PetscObjectReference((PetscObject)perm)); 361 PetscCall(PetscObjectReference((PetscObject)perm)); 362 363 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 364 365 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 366 367 b->maxnz = b->nz = ui[mbs]; 368 369 fact->info.factor_mallocs = reallocs; 370 fact->info.fill_ratio_given = fill; 371 if (ai[mbs] != 0) { 372 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 373 } else { 374 fact->info.fill_ratio_needed = 0.0; 375 } 376 #if defined(PETSC_USE_INFO) 377 if (ai[mbs] != 0) { 378 PetscReal af = fact->info.fill_ratio_needed; 379 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 380 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 381 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 382 } else { 383 PetscCall(PetscInfo(A, "Empty matrix\n")); 384 } 385 #endif 386 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 391 { 392 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 393 Mat_SeqSBAIJ *b; 394 PetscBool perm_identity, missing; 395 PetscReal fill = info->fill; 396 const PetscInt *rip, *ai, *aj; 397 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d; 398 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 399 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 400 PetscFreeSpaceList free_space = NULL, current_space = NULL; 401 PetscBT lnkbt; 402 403 PetscFunctionBegin; 404 PetscCall(MatMissingDiagonal(A, &missing, &d)); 405 PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d); 406 407 /* 408 This code originally uses Modified Sparse Row (MSR) storage 409 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 410 Then it is rewritten so the factor B takes seqsbaij format. However the associated 411 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 412 thus the original code in MSR format is still used for these cases. 413 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 414 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 415 */ 416 if (bs > 1) { 417 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 /* check whether perm is the identity mapping */ 422 PetscCall(ISIdentity(perm, &perm_identity)); 423 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 424 a->permute = PETSC_FALSE; 425 ai = a->i; 426 aj = a->j; 427 PetscCall(ISGetIndices(perm, &rip)); 428 429 /* initialization */ 430 PetscCall(PetscMalloc1(mbs + 1, &ui)); 431 ui[0] = 0; 432 433 /* jl: linked list for storing indices of the pivot rows 434 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 435 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 436 for (i = 0; i < mbs; i++) { 437 jl[i] = mbs; 438 il[i] = 0; 439 } 440 441 /* create and initialize a linked list for storing column indices of the active row k */ 442 nlnk = mbs + 1; 443 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 444 445 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 446 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 447 current_space = free_space; 448 449 for (k = 0; k < mbs; k++) { /* for each active row k */ 450 /* initialize lnk by the column indices of row rip[k] of A */ 451 nzk = 0; 452 ncols = ai[rip[k] + 1] - ai[rip[k]]; 453 for (j = 0; j < ncols; j++) { 454 i = *(aj + ai[rip[k]] + j); 455 cols[j] = rip[i]; 456 } 457 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 458 nzk += nlnk; 459 460 /* update lnk by computing fill-in for each pivot row to be merged in */ 461 prow = jl[k]; /* 1st pivot row */ 462 463 while (prow < k) { 464 nextprow = jl[prow]; 465 /* merge prow into k-th row */ 466 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 467 jmax = ui[prow + 1]; 468 ncols = jmax - jmin; 469 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 470 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 471 nzk += nlnk; 472 473 /* update il and jl for prow */ 474 if (jmin < jmax) { 475 il[prow] = jmin; 476 477 j = *uj_ptr; 478 jl[prow] = jl[j]; 479 jl[j] = prow; 480 } 481 prow = nextprow; 482 } 483 484 /* if free space is not available, make more free space */ 485 if (current_space->local_remaining < nzk) { 486 i = mbs - k + 1; /* num of unfactored rows */ 487 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 488 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 489 reallocs++; 490 } 491 492 /* copy data into free space, then initialize lnk */ 493 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 494 495 /* add the k-th row into il and jl */ 496 if (nzk - 1 > 0) { 497 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 498 jl[k] = jl[i]; 499 jl[i] = k; 500 il[k] = ui[k] + 1; 501 } 502 ui_ptr[k] = current_space->array; 503 504 current_space->array += nzk; 505 current_space->local_used += nzk; 506 current_space->local_remaining -= nzk; 507 508 ui[k + 1] = ui[k] + nzk; 509 } 510 511 PetscCall(ISRestoreIndices(perm, &rip)); 512 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 513 514 /* destroy list of free space and other temporary array(s) */ 515 PetscCall(PetscMalloc1(ui[mbs] + 1, &uj)); 516 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 517 PetscCall(PetscLLDestroy(lnk, lnkbt)); 518 519 /* put together the new matrix in MATSEQSBAIJ format */ 520 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 521 522 b = (Mat_SeqSBAIJ *)fact->data; 523 b->singlemalloc = PETSC_FALSE; 524 b->free_a = PETSC_TRUE; 525 b->free_ij = PETSC_TRUE; 526 527 PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a)); 528 529 b->j = uj; 530 b->i = ui; 531 b->diag = NULL; 532 b->ilen = NULL; 533 b->imax = NULL; 534 b->row = perm; 535 536 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 537 538 PetscCall(PetscObjectReference((PetscObject)perm)); 539 b->icol = perm; 540 PetscCall(PetscObjectReference((PetscObject)perm)); 541 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 542 b->maxnz = b->nz = ui[mbs]; 543 544 fact->info.factor_mallocs = reallocs; 545 fact->info.fill_ratio_given = fill; 546 if (ai[mbs] != 0) { 547 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 548 } else { 549 fact->info.fill_ratio_needed = 0.0; 550 } 551 #if defined(PETSC_USE_INFO) 552 if (ai[mbs] != 0) { 553 PetscReal af = fact->info.fill_ratio_needed; 554 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 555 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 556 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 557 } else { 558 PetscCall(PetscInfo(A, "Empty matrix\n")); 559 } 560 #endif 561 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 562 PetscFunctionReturn(PETSC_SUCCESS); 563 } 564 565 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 566 { 567 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 568 IS perm = b->row; 569 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 570 PetscInt i, j; 571 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 572 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 573 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 574 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 575 MatScalar *work; 576 PetscInt *pivots; 577 PetscBool allowzeropivot, zeropivotdetected; 578 579 PetscFunctionBegin; 580 /* initialization */ 581 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 582 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 583 allowzeropivot = PetscNot(A->erroriffailure); 584 585 il[0] = 0; 586 for (i = 0; i < mbs; i++) jl[i] = mbs; 587 588 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 589 PetscCall(PetscMalloc1(bs, &pivots)); 590 591 PetscCall(ISGetIndices(perm, &perm_ptr)); 592 593 /* check permutation */ 594 if (!a->permute) { 595 ai = a->i; 596 aj = a->j; 597 aa = a->a; 598 } else { 599 ai = a->inew; 600 aj = a->jnew; 601 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 602 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 603 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 604 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 605 606 for (i = 0; i < mbs; i++) { 607 jmin = ai[i]; 608 jmax = ai[i + 1]; 609 for (j = jmin; j < jmax; j++) { 610 while (a2anew[j] != j) { 611 k = a2anew[j]; 612 a2anew[j] = a2anew[k]; 613 a2anew[k] = k; 614 for (k1 = 0; k1 < bs2; k1++) { 615 dk[k1] = aa[k * bs2 + k1]; 616 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 617 aa[j * bs2 + k1] = dk[k1]; 618 } 619 } 620 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 621 if (i > aj[j]) { 622 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 623 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 624 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 625 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 626 } 627 } 628 } 629 } 630 PetscCall(PetscFree(a2anew)); 631 } 632 633 /* for each row k */ 634 for (k = 0; k < mbs; k++) { 635 /*initialize k-th row with elements nonzero in row perm(k) of A */ 636 jmin = ai[perm_ptr[k]]; 637 jmax = ai[perm_ptr[k] + 1]; 638 639 ap = aa + jmin * bs2; 640 for (j = jmin; j < jmax; j++) { 641 vj = perm_ptr[aj[j]]; /* block col. index */ 642 rtmp_ptr = rtmp + vj * bs2; 643 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 644 } 645 646 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 647 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 648 i = jl[k]; /* first row to be added to k_th row */ 649 650 while (i < k) { 651 nexti = jl[i]; /* next row to be added to k_th row */ 652 653 /* compute multiplier */ 654 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 655 656 /* uik = -inv(Di)*U_bar(i,k) */ 657 diag = ba + i * bs2; 658 u = ba + ili * bs2; 659 PetscCall(PetscArrayzero(uik, bs2)); 660 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 661 662 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 663 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 664 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 665 666 /* update -U(i,k) */ 667 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 668 669 /* add multiple of row i to k-th row ... */ 670 jmin = ili + 1; 671 jmax = bi[i + 1]; 672 if (jmin < jmax) { 673 for (j = jmin; j < jmax; j++) { 674 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 675 rtmp_ptr = rtmp + bj[j] * bs2; 676 u = ba + j * bs2; 677 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 678 } 679 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 680 681 /* ... add i to row list for next nonzero entry */ 682 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 683 j = bj[jmin]; 684 jl[i] = jl[j]; 685 jl[j] = i; /* update jl */ 686 } 687 i = nexti; 688 } 689 690 /* save nonzero entries in k-th row of U ... */ 691 692 /* invert diagonal block */ 693 diag = ba + k * bs2; 694 PetscCall(PetscArraycpy(diag, dk, bs2)); 695 696 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 697 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 698 699 jmin = bi[k]; 700 jmax = bi[k + 1]; 701 if (jmin < jmax) { 702 for (j = jmin; j < jmax; j++) { 703 vj = bj[j]; /* block col. index of U */ 704 u = ba + j * bs2; 705 rtmp_ptr = rtmp + vj * bs2; 706 for (k1 = 0; k1 < bs2; k1++) { 707 *u++ = *rtmp_ptr; 708 *rtmp_ptr++ = 0.0; 709 } 710 } 711 712 /* ... add k to row list for first nonzero entry in k-th row */ 713 il[k] = jmin; 714 i = bj[jmin]; 715 jl[k] = jl[i]; 716 jl[i] = k; 717 } 718 } 719 720 PetscCall(PetscFree(rtmp)); 721 PetscCall(PetscFree2(il, jl)); 722 PetscCall(PetscFree3(dk, uik, work)); 723 PetscCall(PetscFree(pivots)); 724 if (a->permute) PetscCall(PetscFree(aa)); 725 726 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 727 728 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 729 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 730 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 731 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 732 733 C->assembled = PETSC_TRUE; 734 C->preallocated = PETSC_TRUE; 735 736 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 741 { 742 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 743 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 744 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 745 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 746 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 747 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 748 MatScalar *work; 749 PetscInt *pivots; 750 PetscBool allowzeropivot, zeropivotdetected; 751 752 PetscFunctionBegin; 753 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 754 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 755 il[0] = 0; 756 for (i = 0; i < mbs; i++) jl[i] = mbs; 757 758 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 759 PetscCall(PetscMalloc1(bs, &pivots)); 760 allowzeropivot = PetscNot(A->erroriffailure); 761 762 ai = a->i; 763 aj = a->j; 764 aa = a->a; 765 766 /* for each row k */ 767 for (k = 0; k < mbs; k++) { 768 /*initialize k-th row with elements nonzero in row k of A */ 769 jmin = ai[k]; 770 jmax = ai[k + 1]; 771 ap = aa + jmin * bs2; 772 for (j = jmin; j < jmax; j++) { 773 vj = aj[j]; /* block col. index */ 774 rtmp_ptr = rtmp + vj * bs2; 775 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 776 } 777 778 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 779 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 780 i = jl[k]; /* first row to be added to k_th row */ 781 782 while (i < k) { 783 nexti = jl[i]; /* next row to be added to k_th row */ 784 785 /* compute multiplier */ 786 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 787 788 /* uik = -inv(Di)*U_bar(i,k) */ 789 diag = ba + i * bs2; 790 u = ba + ili * bs2; 791 PetscCall(PetscArrayzero(uik, bs2)); 792 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 793 794 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 795 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 796 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 797 798 /* update -U(i,k) */ 799 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 800 801 /* add multiple of row i to k-th row ... */ 802 jmin = ili + 1; 803 jmax = bi[i + 1]; 804 if (jmin < jmax) { 805 for (j = jmin; j < jmax; j++) { 806 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 807 rtmp_ptr = rtmp + bj[j] * bs2; 808 u = ba + j * bs2; 809 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 810 } 811 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 812 813 /* ... add i to row list for next nonzero entry */ 814 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 815 j = bj[jmin]; 816 jl[i] = jl[j]; 817 jl[j] = i; /* update jl */ 818 } 819 i = nexti; 820 } 821 822 /* save nonzero entries in k-th row of U ... */ 823 824 /* invert diagonal block */ 825 diag = ba + k * bs2; 826 PetscCall(PetscArraycpy(diag, dk, bs2)); 827 828 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 829 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 830 831 jmin = bi[k]; 832 jmax = bi[k + 1]; 833 if (jmin < jmax) { 834 for (j = jmin; j < jmax; j++) { 835 vj = bj[j]; /* block col. index of U */ 836 u = ba + j * bs2; 837 rtmp_ptr = rtmp + vj * bs2; 838 for (k1 = 0; k1 < bs2; k1++) { 839 *u++ = *rtmp_ptr; 840 *rtmp_ptr++ = 0.0; 841 } 842 } 843 844 /* ... add k to row list for first nonzero entry in k-th row */ 845 il[k] = jmin; 846 i = bj[jmin]; 847 jl[k] = jl[i]; 848 jl[i] = k; 849 } 850 } 851 852 PetscCall(PetscFree(rtmp)); 853 PetscCall(PetscFree2(il, jl)); 854 PetscCall(PetscFree3(dk, uik, work)); 855 PetscCall(PetscFree(pivots)); 856 857 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 858 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 859 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 860 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 861 C->assembled = PETSC_TRUE; 862 C->preallocated = PETSC_TRUE; 863 864 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 /* 869 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 870 Version for blocks 2 by 2. 871 */ 872 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) 873 { 874 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 875 IS perm = b->row; 876 const PetscInt *ai, *aj, *perm_ptr; 877 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 878 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 879 MatScalar *ba = b->a, *aa, *ap; 880 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 881 PetscReal shift = info->shiftamount; 882 PetscBool allowzeropivot, zeropivotdetected; 883 884 PetscFunctionBegin; 885 allowzeropivot = PetscNot(A->erroriffailure); 886 887 /* initialization */ 888 /* il and jl record the first nonzero element in each row of the accessing 889 window U(0:k, k:mbs-1). 890 jl: list of rows to be added to uneliminated rows 891 i>= k: jl(i) is the first row to be added to row i 892 i< k: jl(i) is the row following row i in some list of rows 893 jl(i) = mbs indicates the end of a list 894 il(i): points to the first nonzero element in columns k,...,mbs-1 of 895 row i of U */ 896 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 897 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 898 il[0] = 0; 899 for (i = 0; i < mbs; i++) jl[i] = mbs; 900 901 PetscCall(ISGetIndices(perm, &perm_ptr)); 902 903 /* check permutation */ 904 if (!a->permute) { 905 ai = a->i; 906 aj = a->j; 907 aa = a->a; 908 } else { 909 ai = a->inew; 910 aj = a->jnew; 911 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 912 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 913 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 914 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 915 916 for (i = 0; i < mbs; i++) { 917 jmin = ai[i]; 918 jmax = ai[i + 1]; 919 for (j = jmin; j < jmax; j++) { 920 while (a2anew[j] != j) { 921 k = a2anew[j]; 922 a2anew[j] = a2anew[k]; 923 a2anew[k] = k; 924 for (k1 = 0; k1 < 4; k1++) { 925 dk[k1] = aa[k * 4 + k1]; 926 aa[k * 4 + k1] = aa[j * 4 + k1]; 927 aa[j * 4 + k1] = dk[k1]; 928 } 929 } 930 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 931 if (i > aj[j]) { 932 ap = aa + j * 4; /* ptr to the beginning of the block */ 933 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 934 ap[1] = ap[2]; 935 ap[2] = dk[1]; 936 } 937 } 938 } 939 PetscCall(PetscFree(a2anew)); 940 } 941 942 /* for each row k */ 943 for (k = 0; k < mbs; k++) { 944 /*initialize k-th row with elements nonzero in row perm(k) of A */ 945 jmin = ai[perm_ptr[k]]; 946 jmax = ai[perm_ptr[k] + 1]; 947 ap = aa + jmin * 4; 948 for (j = jmin; j < jmax; j++) { 949 vj = perm_ptr[aj[j]]; /* block col. index */ 950 rtmp_ptr = rtmp + vj * 4; 951 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 952 } 953 954 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 955 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 956 i = jl[k]; /* first row to be added to k_th row */ 957 958 while (i < k) { 959 nexti = jl[i]; /* next row to be added to k_th row */ 960 961 /* compute multiplier */ 962 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 963 964 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 965 diag = ba + i * 4; 966 u = ba + ili * 4; 967 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 968 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 969 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 970 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 971 972 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 973 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 974 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 975 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 976 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 977 978 PetscCall(PetscLogFlops(16.0 * 2.0)); 979 980 /* update -U(i,k): ba[ili] = uik */ 981 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 982 983 /* add multiple of row i to k-th row ... */ 984 jmin = ili + 1; 985 jmax = bi[i + 1]; 986 if (jmin < jmax) { 987 for (j = jmin; j < jmax; j++) { 988 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 989 rtmp_ptr = rtmp + bj[j] * 4; 990 u = ba + j * 4; 991 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 992 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 993 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 994 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 995 } 996 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 997 998 /* ... add i to row list for next nonzero entry */ 999 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1000 j = bj[jmin]; 1001 jl[i] = jl[j]; 1002 jl[j] = i; /* update jl */ 1003 } 1004 i = nexti; 1005 } 1006 1007 /* save nonzero entries in k-th row of U ... */ 1008 1009 /* invert diagonal block */ 1010 diag = ba + k * 4; 1011 PetscCall(PetscArraycpy(diag, dk, 4)); 1012 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1013 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1014 1015 jmin = bi[k]; 1016 jmax = bi[k + 1]; 1017 if (jmin < jmax) { 1018 for (j = jmin; j < jmax; j++) { 1019 vj = bj[j]; /* block col. index of U */ 1020 u = ba + j * 4; 1021 rtmp_ptr = rtmp + vj * 4; 1022 for (k1 = 0; k1 < 4; k1++) { 1023 *u++ = *rtmp_ptr; 1024 *rtmp_ptr++ = 0.0; 1025 } 1026 } 1027 1028 /* ... add k to row list for first nonzero entry in k-th row */ 1029 il[k] = jmin; 1030 i = bj[jmin]; 1031 jl[k] = jl[i]; 1032 jl[i] = k; 1033 } 1034 } 1035 1036 PetscCall(PetscFree(rtmp)); 1037 PetscCall(PetscFree2(il, jl)); 1038 if (a->permute) PetscCall(PetscFree(aa)); 1039 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1040 1041 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1042 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1043 C->assembled = PETSC_TRUE; 1044 C->preallocated = PETSC_TRUE; 1045 1046 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 1050 /* 1051 Version for when blocks are 2 by 2 Using natural ordering 1052 */ 1053 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 1054 { 1055 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1056 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1057 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1058 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1059 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1060 PetscReal shift = info->shiftamount; 1061 PetscBool allowzeropivot, zeropivotdetected; 1062 1063 PetscFunctionBegin; 1064 allowzeropivot = PetscNot(A->erroriffailure); 1065 1066 /* initialization */ 1067 /* il and jl record the first nonzero element in each row of the accessing 1068 window U(0:k, k:mbs-1). 1069 jl: list of rows to be added to uneliminated rows 1070 i>= k: jl(i) is the first row to be added to row i 1071 i< k: jl(i) is the row following row i in some list of rows 1072 jl(i) = mbs indicates the end of a list 1073 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1074 row i of U */ 1075 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1076 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1077 il[0] = 0; 1078 for (i = 0; i < mbs; i++) jl[i] = mbs; 1079 1080 ai = a->i; 1081 aj = a->j; 1082 aa = a->a; 1083 1084 /* for each row k */ 1085 for (k = 0; k < mbs; k++) { 1086 /*initialize k-th row with elements nonzero in row k of A */ 1087 jmin = ai[k]; 1088 jmax = ai[k + 1]; 1089 ap = aa + jmin * 4; 1090 for (j = jmin; j < jmax; j++) { 1091 vj = aj[j]; /* block col. index */ 1092 rtmp_ptr = rtmp + vj * 4; 1093 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1094 } 1095 1096 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1097 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1098 i = jl[k]; /* first row to be added to k_th row */ 1099 1100 while (i < k) { 1101 nexti = jl[i]; /* next row to be added to k_th row */ 1102 1103 /* compute multiplier */ 1104 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1105 1106 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1107 diag = ba + i * 4; 1108 u = ba + ili * 4; 1109 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1110 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1111 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1112 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1113 1114 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1115 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1116 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1117 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1118 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1119 1120 PetscCall(PetscLogFlops(16.0 * 2.0)); 1121 1122 /* update -U(i,k): ba[ili] = uik */ 1123 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1124 1125 /* add multiple of row i to k-th row ... */ 1126 jmin = ili + 1; 1127 jmax = bi[i + 1]; 1128 if (jmin < jmax) { 1129 for (j = jmin; j < jmax; j++) { 1130 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1131 rtmp_ptr = rtmp + bj[j] * 4; 1132 u = ba + j * 4; 1133 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1134 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1135 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1136 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1137 } 1138 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1139 1140 /* ... add i to row list for next nonzero entry */ 1141 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1142 j = bj[jmin]; 1143 jl[i] = jl[j]; 1144 jl[j] = i; /* update jl */ 1145 } 1146 i = nexti; 1147 } 1148 1149 /* save nonzero entries in k-th row of U ... */ 1150 1151 /* invert diagonal block */ 1152 diag = ba + k * 4; 1153 PetscCall(PetscArraycpy(diag, dk, 4)); 1154 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1155 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1156 1157 jmin = bi[k]; 1158 jmax = bi[k + 1]; 1159 if (jmin < jmax) { 1160 for (j = jmin; j < jmax; j++) { 1161 vj = bj[j]; /* block col. index of U */ 1162 u = ba + j * 4; 1163 rtmp_ptr = rtmp + vj * 4; 1164 for (k1 = 0; k1 < 4; k1++) { 1165 *u++ = *rtmp_ptr; 1166 *rtmp_ptr++ = 0.0; 1167 } 1168 } 1169 1170 /* ... add k to row list for first nonzero entry in k-th row */ 1171 il[k] = jmin; 1172 i = bj[jmin]; 1173 jl[k] = jl[i]; 1174 jl[i] = k; 1175 } 1176 } 1177 1178 PetscCall(PetscFree(rtmp)); 1179 PetscCall(PetscFree2(il, jl)); 1180 1181 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1182 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1183 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1184 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1185 C->assembled = PETSC_TRUE; 1186 C->preallocated = PETSC_TRUE; 1187 1188 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 /* 1193 Numeric U^T*D*U factorization for SBAIJ format. 1194 Version for blocks are 1 by 1. 1195 */ 1196 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 1197 { 1198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1199 IS ip = b->row; 1200 const PetscInt *ai, *aj, *rip; 1201 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1202 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1203 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1204 PetscReal rs; 1205 FactorShiftCtx sctx; 1206 1207 PetscFunctionBegin; 1208 /* MatPivotSetUp(): initialize shift context sctx */ 1209 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1210 1211 PetscCall(ISGetIndices(ip, &rip)); 1212 if (!a->permute) { 1213 ai = a->i; 1214 aj = a->j; 1215 aa = a->a; 1216 } else { 1217 ai = a->inew; 1218 aj = a->jnew; 1219 nz = ai[mbs]; 1220 PetscCall(PetscMalloc1(nz, &aa)); 1221 a2anew = a->a2anew; 1222 bval = a->a; 1223 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1224 } 1225 1226 /* initialization */ 1227 /* il and jl record the first nonzero element in each row of the accessing 1228 window U(0:k, k:mbs-1). 1229 jl: list of rows to be added to uneliminated rows 1230 i>= k: jl(i) is the first row to be added to row i 1231 i< k: jl(i) is the row following row i in some list of rows 1232 jl(i) = mbs indicates the end of a list 1233 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1234 row i of U */ 1235 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1236 1237 do { 1238 sctx.newshift = PETSC_FALSE; 1239 il[0] = 0; 1240 for (i = 0; i < mbs; i++) { 1241 rtmp[i] = 0.0; 1242 jl[i] = mbs; 1243 } 1244 1245 for (k = 0; k < mbs; k++) { 1246 /*initialize k-th row by the perm[k]-th row of A */ 1247 jmin = ai[rip[k]]; 1248 jmax = ai[rip[k] + 1]; 1249 bval = ba + bi[k]; 1250 for (j = jmin; j < jmax; j++) { 1251 col = rip[aj[j]]; 1252 rtmp[col] = aa[j]; 1253 *bval++ = 0.0; /* for in-place factorization */ 1254 } 1255 1256 /* shift the diagonal of the matrix */ 1257 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1258 1259 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1260 dk = rtmp[k]; 1261 i = jl[k]; /* first row to be added to k_th row */ 1262 1263 while (i < k) { 1264 nexti = jl[i]; /* next row to be added to k_th row */ 1265 1266 /* compute multiplier, update diag(k) and U(i,k) */ 1267 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1268 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1269 dk += uikdi * ba[ili]; 1270 ba[ili] = uikdi; /* -U(i,k) */ 1271 1272 /* add multiple of row i to k-th row */ 1273 jmin = ili + 1; 1274 jmax = bi[i + 1]; 1275 if (jmin < jmax) { 1276 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1277 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1278 1279 /* update il and jl for row i */ 1280 il[i] = jmin; 1281 j = bj[jmin]; 1282 jl[i] = jl[j]; 1283 jl[j] = i; 1284 } 1285 i = nexti; 1286 } 1287 1288 /* shift the diagonals when zero pivot is detected */ 1289 /* compute rs=sum of abs(off-diagonal) */ 1290 rs = 0.0; 1291 jmin = bi[k] + 1; 1292 nz = bi[k + 1] - jmin; 1293 if (nz) { 1294 bcol = bj + jmin; 1295 while (nz--) { 1296 rs += PetscAbsScalar(rtmp[*bcol]); 1297 bcol++; 1298 } 1299 } 1300 1301 sctx.rs = rs; 1302 sctx.pv = dk; 1303 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1304 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1305 dk = sctx.pv; 1306 1307 /* copy data into U(k,:) */ 1308 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1309 jmin = bi[k] + 1; 1310 jmax = bi[k + 1]; 1311 if (jmin < jmax) { 1312 for (j = jmin; j < jmax; j++) { 1313 col = bj[j]; 1314 ba[j] = rtmp[col]; 1315 rtmp[col] = 0.0; 1316 } 1317 /* add the k-th row into il and jl */ 1318 il[k] = jmin; 1319 i = bj[jmin]; 1320 jl[k] = jl[i]; 1321 jl[i] = k; 1322 } 1323 } 1324 } while (sctx.newshift); 1325 PetscCall(PetscFree3(rtmp, il, jl)); 1326 if (a->permute) PetscCall(PetscFree(aa)); 1327 1328 PetscCall(ISRestoreIndices(ip, &rip)); 1329 1330 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1331 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1332 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1333 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1334 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1335 C->assembled = PETSC_TRUE; 1336 C->preallocated = PETSC_TRUE; 1337 1338 PetscCall(PetscLogFlops(C->rmap->N)); 1339 if (sctx.nshift) { 1340 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1341 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1342 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1343 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1344 } 1345 } 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 /* 1350 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1351 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1352 */ 1353 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1354 { 1355 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1356 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1357 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1358 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1359 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1360 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1361 FactorShiftCtx sctx; 1362 PetscReal rs; 1363 MatScalar d, *v; 1364 1365 PetscFunctionBegin; 1366 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1367 1368 /* MatPivotSetUp(): initialize shift context sctx */ 1369 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1370 1371 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1372 sctx.shift_top = info->zeropivot; 1373 1374 PetscCall(PetscArrayzero(rtmp, mbs)); 1375 1376 for (i = 0; i < mbs; i++) { 1377 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1378 d = (aa)[a->diag[i]]; 1379 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1380 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1381 v = aa + ai[i] + 1; 1382 nz = ai[i + 1] - ai[i] - 1; 1383 for (j = 0; j < nz; j++) { 1384 rtmp[i] += PetscAbsScalar(v[j]); 1385 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1386 } 1387 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1388 } 1389 sctx.shift_top *= 1.1; 1390 sctx.nshift_max = 5; 1391 sctx.shift_lo = 0.; 1392 sctx.shift_hi = 1.; 1393 } 1394 1395 /* allocate working arrays 1396 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1397 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1398 */ 1399 do { 1400 sctx.newshift = PETSC_FALSE; 1401 1402 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1403 if (mbs) il[0] = 0; 1404 1405 for (k = 0; k < mbs; k++) { 1406 /* zero rtmp */ 1407 nz = bi[k + 1] - bi[k]; 1408 bjtmp = bj + bi[k]; 1409 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1410 1411 /* load in initial unfactored row */ 1412 bval = ba + bi[k]; 1413 jmin = ai[k]; 1414 jmax = ai[k + 1]; 1415 for (j = jmin; j < jmax; j++) { 1416 col = aj[j]; 1417 rtmp[col] = aa[j]; 1418 *bval++ = 0.0; /* for in-place factorization */ 1419 } 1420 /* shift the diagonal of the matrix: ZeropivotApply() */ 1421 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1422 1423 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1424 dk = rtmp[k]; 1425 i = c2r[k]; /* first row to be added to k_th row */ 1426 1427 while (i < k) { 1428 nexti = c2r[i]; /* next row to be added to k_th row */ 1429 1430 /* compute multiplier, update diag(k) and U(i,k) */ 1431 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1432 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1433 dk += uikdi * ba[ili]; /* update diag[k] */ 1434 ba[ili] = uikdi; /* -U(i,k) */ 1435 1436 /* add multiple of row i to k-th row */ 1437 jmin = ili + 1; 1438 jmax = bi[i + 1]; 1439 if (jmin < jmax) { 1440 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1441 /* update il and c2r for row i */ 1442 il[i] = jmin; 1443 j = bj[jmin]; 1444 c2r[i] = c2r[j]; 1445 c2r[j] = i; 1446 } 1447 i = nexti; 1448 } 1449 1450 /* copy data into U(k,:) */ 1451 rs = 0.0; 1452 jmin = bi[k]; 1453 jmax = bi[k + 1] - 1; 1454 if (jmin < jmax) { 1455 for (j = jmin; j < jmax; j++) { 1456 col = bj[j]; 1457 ba[j] = rtmp[col]; 1458 rs += PetscAbsScalar(ba[j]); 1459 } 1460 /* add the k-th row into il and c2r */ 1461 il[k] = jmin; 1462 i = bj[jmin]; 1463 c2r[k] = c2r[i]; 1464 c2r[i] = k; 1465 } 1466 1467 sctx.rs = rs; 1468 sctx.pv = dk; 1469 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1470 if (sctx.newshift) break; 1471 dk = sctx.pv; 1472 1473 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1474 } 1475 } while (sctx.newshift); 1476 1477 PetscCall(PetscFree3(rtmp, il, c2r)); 1478 1479 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1480 B->ops->solves = MatSolves_SeqSBAIJ_1; 1481 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1482 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1483 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1484 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1485 1486 B->assembled = PETSC_TRUE; 1487 B->preallocated = PETSC_TRUE; 1488 1489 PetscCall(PetscLogFlops(B->rmap->n)); 1490 1491 /* MatPivotView() */ 1492 if (sctx.nshift) { 1493 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1494 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1495 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1496 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1497 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1498 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1499 } 1500 } 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 1505 { 1506 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1507 PetscInt i, j, mbs = a->mbs; 1508 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1509 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1510 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1511 PetscReal rs; 1512 FactorShiftCtx sctx; 1513 1514 PetscFunctionBegin; 1515 /* MatPivotSetUp(): initialize shift context sctx */ 1516 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1517 1518 /* initialization */ 1519 /* il and jl record the first nonzero element in each row of the accessing 1520 window U(0:k, k:mbs-1). 1521 jl: list of rows to be added to uneliminated rows 1522 i>= k: jl(i) is the first row to be added to row i 1523 i< k: jl(i) is the row following row i in some list of rows 1524 jl(i) = mbs indicates the end of a list 1525 il(i): points to the first nonzero element in U(i,k:mbs-1) 1526 */ 1527 PetscCall(PetscMalloc1(mbs, &rtmp)); 1528 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1529 1530 do { 1531 sctx.newshift = PETSC_FALSE; 1532 il[0] = 0; 1533 for (i = 0; i < mbs; i++) { 1534 rtmp[i] = 0.0; 1535 jl[i] = mbs; 1536 } 1537 1538 for (k = 0; k < mbs; k++) { 1539 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1540 nz = ai[k + 1] - ai[k]; 1541 acol = aj + ai[k]; 1542 aval = aa + ai[k]; 1543 bval = ba + bi[k]; 1544 while (nz--) { 1545 rtmp[*acol++] = *aval++; 1546 *bval++ = 0.0; /* for in-place factorization */ 1547 } 1548 1549 /* shift the diagonal of the matrix */ 1550 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1551 1552 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1553 dk = rtmp[k]; 1554 i = jl[k]; /* first row to be added to k_th row */ 1555 1556 while (i < k) { 1557 nexti = jl[i]; /* next row to be added to k_th row */ 1558 /* compute multiplier, update D(k) and U(i,k) */ 1559 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1560 uikdi = -ba[ili] * ba[bi[i]]; 1561 dk += uikdi * ba[ili]; 1562 ba[ili] = uikdi; /* -U(i,k) */ 1563 1564 /* add multiple of row i to k-th row ... */ 1565 jmin = ili + 1; 1566 nz = bi[i + 1] - jmin; 1567 if (nz > 0) { 1568 bcol = bj + jmin; 1569 bval = ba + jmin; 1570 PetscCall(PetscLogFlops(2.0 * nz)); 1571 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1572 1573 /* update il and jl for i-th row */ 1574 il[i] = jmin; 1575 j = bj[jmin]; 1576 jl[i] = jl[j]; 1577 jl[j] = i; 1578 } 1579 i = nexti; 1580 } 1581 1582 /* shift the diagonals when zero pivot is detected */ 1583 /* compute rs=sum of abs(off-diagonal) */ 1584 rs = 0.0; 1585 jmin = bi[k] + 1; 1586 nz = bi[k + 1] - jmin; 1587 if (nz) { 1588 bcol = bj + jmin; 1589 while (nz--) { 1590 rs += PetscAbsScalar(rtmp[*bcol]); 1591 bcol++; 1592 } 1593 } 1594 1595 sctx.rs = rs; 1596 sctx.pv = dk; 1597 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1598 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1599 dk = sctx.pv; 1600 1601 /* copy data into U(k,:) */ 1602 ba[bi[k]] = 1.0 / dk; 1603 jmin = bi[k] + 1; 1604 nz = bi[k + 1] - jmin; 1605 if (nz) { 1606 bcol = bj + jmin; 1607 bval = ba + jmin; 1608 while (nz--) { 1609 *bval++ = rtmp[*bcol]; 1610 rtmp[*bcol++] = 0.0; 1611 } 1612 /* add k-th row into il and jl */ 1613 il[k] = jmin; 1614 i = bj[jmin]; 1615 jl[k] = jl[i]; 1616 jl[i] = k; 1617 } 1618 } /* end of for (k = 0; k<mbs; k++) */ 1619 } while (sctx.newshift); 1620 PetscCall(PetscFree(rtmp)); 1621 PetscCall(PetscFree2(il, jl)); 1622 1623 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1624 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1625 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1626 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1627 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1628 1629 C->assembled = PETSC_TRUE; 1630 C->preallocated = PETSC_TRUE; 1631 1632 PetscCall(PetscLogFlops(C->rmap->N)); 1633 if (sctx.nshift) { 1634 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1635 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1636 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1637 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1638 } 1639 } 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) 1644 { 1645 Mat C; 1646 1647 PetscFunctionBegin; 1648 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1649 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1650 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1651 1652 A->ops->solve = C->ops->solve; 1653 A->ops->solvetranspose = C->ops->solvetranspose; 1654 1655 PetscCall(MatHeaderMerge(A, &C)); 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658