1 #include <../src/mat/impls/baij/seq/baij.h> 2 #include <../src/mat/impls/sbaij/seq/sbaij.h> 3 #include <petsc/private/kernels/blockinvert.h> 4 #include <petscis.h> 5 6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 7 { 8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data; 9 MatScalar *dd = fact->a; 10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag; 11 12 PetscFunctionBegin; 13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs); 14 15 nneg_tmp = 0; 16 npos_tmp = 0; 17 if (fi) { 18 for (i = 0; i < mbs; i++) { 19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++; 20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++; 21 fi++; 22 } 23 } else { 24 for (i = 0; i < mbs; i++) { 25 if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++; 26 else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++; 27 } 28 } 29 if (nneg) *nneg = nneg_tmp; 30 if (npos) *npos = npos_tmp; 31 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp; 32 PetscFunctionReturn(PETSC_SUCCESS); 33 } 34 35 /* 36 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP. 37 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad. 38 */ 39 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info) 40 { 41 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b; 42 const PetscInt *rip, *ai, *aj; 43 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2; 44 PetscInt m, reallocs = 0, prow; 45 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd; 46 PetscReal f = info->fill; 47 PetscBool perm_identity; 48 49 PetscFunctionBegin; 50 /* check whether perm is the identity mapping */ 51 PetscCall(ISIdentity(perm, &perm_identity)); 52 PetscCall(ISGetIndices(perm, &rip)); 53 54 if (perm_identity) { /* without permutation */ 55 a->permute = PETSC_FALSE; 56 57 ai = a->i; 58 aj = a->j; 59 } else { /* non-trivial permutation */ 60 a->permute = PETSC_TRUE; 61 62 PetscCall(MatReorderingSeqSBAIJ(A, perm)); 63 64 ai = a->inew; 65 aj = a->jnew; 66 } 67 68 /* initialization */ 69 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu)); 70 umax = (PetscInt)(f * ai[mbs] + 1); 71 umax += mbs + 1; 72 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju)); 73 iu[0] = mbs + 1; 74 juidx = mbs + 1; /* index for ju */ 75 /* jl linked list for pivot row -- linked list for col index */ 76 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q)); 77 for (i = 0; i < mbs; i++) { 78 jl[i] = mbs; 79 q[i] = 0; 80 } 81 82 /* for each row k */ 83 for (k = 0; k < mbs; k++) { 84 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */ 85 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */ 86 q[k] = mbs; 87 /* initialize nonzero structure of k-th row to row rip[k] of A */ 88 jmin = ai[rip[k]] + 1; /* exclude diag[k] */ 89 jmax = ai[rip[k] + 1]; 90 for (j = jmin; j < jmax; j++) { 91 vj = rip[aj[j]]; /* col. value */ 92 if (vj > k) { 93 qm = k; 94 do { 95 m = qm; 96 qm = q[m]; 97 } while (qm < vj); 98 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A"); 99 nzk++; 100 q[m] = vj; 101 q[vj] = qm; 102 } /* if (vj > k) */ 103 } /* for (j=jmin; j<jmax; j++) */ 104 105 /* modify nonzero structure of k-th row by computing fill-in 106 for each row i to be merged in */ 107 prow = k; 108 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */ 109 110 while (prow < k) { 111 /* merge row prow into k-th row */ 112 jmin = iu[prow] + 1; 113 jmax = iu[prow + 1]; 114 qm = k; 115 for (j = jmin; j < jmax; j++) { 116 vj = ju[j]; 117 do { 118 m = qm; 119 qm = q[m]; 120 } while (qm < vj); 121 if (qm != vj) { 122 nzk++; 123 q[m] = vj; 124 q[vj] = qm; 125 qm = vj; 126 } 127 } 128 prow = jl[prow]; /* next pivot row */ 129 } 130 131 /* add k to row list for first nonzero element in k-th row */ 132 if (nzk > 0) { 133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */ 134 jl[k] = jl[i]; 135 jl[i] = k; 136 } 137 iu[k + 1] = iu[k] + nzk; 138 139 /* allocate more space to ju if needed */ 140 if (iu[k + 1] > umax) { 141 /* estimate how much additional space we will need */ 142 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 143 /* just double the memory each time */ 144 maxadd = umax; 145 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2; 146 umax += maxadd; 147 148 /* allocate a longer ju */ 149 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp)); 150 PetscCall(PetscArraycpy(jutmp, ju, iu[k])); 151 PetscCall(PetscShmgetDeallocateArray((void **)&ju)); 152 ju = jutmp; 153 reallocs++; /* count how many times we realloc */ 154 } 155 156 /* save nonzero structure of k-th row in ju */ 157 i = k; 158 while (nzk--) { 159 i = q[i]; 160 ju[juidx++] = i; 161 } 162 } 163 164 #if defined(PETSC_USE_INFO) 165 if (ai[mbs] != 0) { 166 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 167 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 168 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 169 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 170 PetscCall(PetscInfo(A, "for best performance.\n")); 171 } else { 172 PetscCall(PetscInfo(A, "Empty matrix\n")); 173 } 174 #endif 175 176 PetscCall(ISRestoreIndices(perm, &rip)); 177 PetscCall(PetscFree2(jl, q)); 178 179 /* put together the new matrix */ 180 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL)); 181 182 b = (Mat_SeqSBAIJ *)F->data; 183 b->free_ij = PETSC_TRUE; 184 PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 185 b->free_a = PETSC_TRUE; 186 b->j = ju; 187 b->i = iu; 188 b->diag = NULL; 189 b->ilen = NULL; 190 b->imax = NULL; 191 b->row = perm; 192 193 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 194 195 PetscCall(PetscObjectReference((PetscObject)perm)); 196 197 b->icol = perm; 198 PetscCall(PetscObjectReference((PetscObject)perm)); 199 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work)); 200 /* In b structure: Free imax, ilen, old a, old j. 201 Allocate idnew, solve_work, new a, new j */ 202 b->maxnz = b->nz = iu[mbs]; 203 204 F->info.factor_mallocs = reallocs; 205 F->info.fill_ratio_given = f; 206 if (ai[mbs] != 0) { 207 F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]); 208 } else { 209 F->info.fill_ratio_needed = 0.0; 210 } 211 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity)); 212 PetscFunctionReturn(PETSC_SUCCESS); 213 } 214 /* 215 Symbolic U^T*D*U factorization for SBAIJ format. 216 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure. 217 */ 218 #include <petscbt.h> 219 #include <../src/mat/utils/freespace.h> 220 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 221 { 222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 223 Mat_SeqSBAIJ *b; 224 PetscBool perm_identity; 225 PetscReal fill = info->fill; 226 const PetscInt *rip, *ai = a->i, *aj = a->j; 227 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 228 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 229 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag; 230 PetscFreeSpaceList free_space = NULL, current_space = NULL; 231 PetscBT lnkbt; 232 PetscBool diagDense; 233 234 PetscFunctionBegin; 235 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n); 236 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense)); 237 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry"); 238 if (bs > 1) { 239 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info)); 240 PetscFunctionReturn(PETSC_SUCCESS); 241 } 242 243 /* check whether perm is the identity mapping */ 244 PetscCall(ISIdentity(perm, &perm_identity)); 245 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 246 a->permute = PETSC_FALSE; 247 PetscCall(ISGetIndices(perm, &rip)); 248 249 /* initialization */ 250 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui)); 251 PetscCall(PetscMalloc1(mbs + 1, &udiag)); 252 ui[0] = 0; 253 254 /* jl: linked list for storing indices of the pivot rows 255 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 256 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 257 for (i = 0; i < mbs; i++) { 258 jl[i] = mbs; 259 il[i] = 0; 260 } 261 262 /* create and initialize a linked list for storing column indices of the active row k */ 263 nlnk = mbs + 1; 264 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 265 266 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 267 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 268 current_space = free_space; 269 270 for (k = 0; k < mbs; k++) { /* for each active row k */ 271 /* initialize lnk by the column indices of row rip[k] of A */ 272 nzk = 0; 273 ncols = ai[k + 1] - ai[k]; 274 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k); 275 for (j = 0; j < ncols; j++) { 276 i = *(aj + ai[k] + j); 277 cols[j] = i; 278 } 279 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 280 nzk += nlnk; 281 282 /* update lnk by computing fill-in for each pivot row to be merged in */ 283 prow = jl[k]; /* 1st pivot row */ 284 285 while (prow < k) { 286 nextprow = jl[prow]; 287 /* merge prow into k-th row */ 288 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 289 jmax = ui[prow + 1]; 290 ncols = jmax - jmin; 291 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 292 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 293 nzk += nlnk; 294 295 /* update il and jl for prow */ 296 if (jmin < jmax) { 297 il[prow] = jmin; 298 j = *uj_ptr; 299 jl[prow] = jl[j]; 300 jl[j] = prow; 301 } 302 prow = nextprow; 303 } 304 305 /* if free space is not available, make more free space */ 306 if (current_space->local_remaining < nzk) { 307 i = mbs - k + 1; /* num of unfactored rows */ 308 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 309 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 310 reallocs++; 311 } 312 313 /* copy data into free space, then initialize lnk */ 314 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 315 316 /* add the k-th row into il and jl */ 317 if (nzk > 1) { 318 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 319 jl[k] = jl[i]; 320 jl[i] = k; 321 il[k] = ui[k] + 1; 322 } 323 ui_ptr[k] = current_space->array; 324 325 current_space->array += nzk; 326 current_space->local_used += nzk; 327 current_space->local_remaining -= nzk; 328 329 ui[k + 1] = ui[k] + nzk; 330 } 331 332 PetscCall(ISRestoreIndices(perm, &rip)); 333 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 334 335 /* destroy list of free space and other temporary array(s) */ 336 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscInt), (void **)&uj)); 337 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */ 338 PetscCall(PetscLLDestroy(lnk, lnkbt)); 339 340 /* put together the new matrix in MATSEQSBAIJ format */ 341 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 342 343 b = (Mat_SeqSBAIJ *)fact->data; 344 b->free_ij = PETSC_TRUE; 345 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscScalar), (void **)&b->a)); 346 b->free_a = PETSC_TRUE; 347 b->j = uj; 348 b->i = ui; 349 b->diag = udiag; 350 b->ilen = NULL; 351 b->imax = NULL; 352 b->row = perm; 353 b->icol = perm; 354 355 PetscCall(PetscObjectReference((PetscObject)perm)); 356 PetscCall(PetscObjectReference((PetscObject)perm)); 357 358 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 359 360 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 361 362 b->maxnz = b->nz = ui[mbs]; 363 364 fact->info.factor_mallocs = reallocs; 365 fact->info.fill_ratio_given = fill; 366 if (ai[mbs] != 0) { 367 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 368 } else { 369 fact->info.fill_ratio_needed = 0.0; 370 } 371 #if defined(PETSC_USE_INFO) 372 if (ai[mbs] != 0) { 373 PetscReal af = fact->info.fill_ratio_needed; 374 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 375 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 376 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 377 } else { 378 PetscCall(PetscInfo(A, "Empty matrix\n")); 379 } 380 #endif 381 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } 384 385 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info) 386 { 387 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 388 Mat_SeqSBAIJ *b; 389 PetscBool perm_identity; 390 PetscReal fill = info->fill; 391 const PetscInt *rip, *ai, *aj; 392 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow; 393 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow; 394 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr; 395 PetscFreeSpaceList free_space = NULL, current_space = NULL; 396 PetscBT lnkbt; 397 PetscBool diagDense; 398 399 PetscFunctionBegin; 400 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense)); 401 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry"); 402 403 /* 404 This code originally uses Modified Sparse Row (MSR) storage 405 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice! 406 Then it is rewritten so the factor B takes seqsbaij format. However the associated 407 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity, 408 thus the original code in MSR format is still used for these cases. 409 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever 410 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor. 411 */ 412 if (bs > 1) { 413 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info)); 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 /* check whether perm is the identity mapping */ 418 PetscCall(ISIdentity(perm, &perm_identity)); 419 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format"); 420 a->permute = PETSC_FALSE; 421 ai = a->i; 422 aj = a->j; 423 PetscCall(ISGetIndices(perm, &rip)); 424 425 /* initialization */ 426 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui)); 427 ui[0] = 0; 428 429 /* jl: linked list for storing indices of the pivot rows 430 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */ 431 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols)); 432 for (i = 0; i < mbs; i++) { 433 jl[i] = mbs; 434 il[i] = 0; 435 } 436 437 /* create and initialize a linked list for storing column indices of the active row k */ 438 nlnk = mbs + 1; 439 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt)); 440 441 /* initial FreeSpace size is fill*(ai[mbs]+1) */ 442 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space)); 443 current_space = free_space; 444 445 for (k = 0; k < mbs; k++) { /* for each active row k */ 446 /* initialize lnk by the column indices of row rip[k] of A */ 447 nzk = 0; 448 ncols = ai[rip[k] + 1] - ai[rip[k]]; 449 for (j = 0; j < ncols; j++) { 450 i = *(aj + ai[rip[k]] + j); 451 cols[j] = rip[i]; 452 } 453 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt)); 454 nzk += nlnk; 455 456 /* update lnk by computing fill-in for each pivot row to be merged in */ 457 prow = jl[k]; /* 1st pivot row */ 458 459 while (prow < k) { 460 nextprow = jl[prow]; 461 /* merge prow into k-th row */ 462 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */ 463 jmax = ui[prow + 1]; 464 ncols = jmax - jmin; 465 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */ 466 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt)); 467 nzk += nlnk; 468 469 /* update il and jl for prow */ 470 if (jmin < jmax) { 471 il[prow] = jmin; 472 473 j = *uj_ptr; 474 jl[prow] = jl[j]; 475 jl[j] = prow; 476 } 477 prow = nextprow; 478 } 479 480 /* if free space is not available, make more free space */ 481 if (current_space->local_remaining < nzk) { 482 i = mbs - k + 1; /* num of unfactored rows */ 483 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 484 PetscCall(PetscFreeSpaceGet(i, ¤t_space)); 485 reallocs++; 486 } 487 488 /* copy data into free space, then initialize lnk */ 489 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt)); 490 491 /* add the k-th row into il and jl */ 492 if (nzk - 1 > 0) { 493 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */ 494 jl[k] = jl[i]; 495 jl[i] = k; 496 il[k] = ui[k] + 1; 497 } 498 ui_ptr[k] = current_space->array; 499 500 current_space->array += nzk; 501 current_space->local_used += nzk; 502 current_space->local_remaining -= nzk; 503 504 ui[k + 1] = ui[k] + nzk; 505 } 506 507 PetscCall(ISRestoreIndices(perm, &rip)); 508 PetscCall(PetscFree4(ui_ptr, il, jl, cols)); 509 510 /* destroy list of free space and other temporary array(s) */ 511 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj)); 512 PetscCall(PetscFreeSpaceContiguous(&free_space, uj)); 513 PetscCall(PetscLLDestroy(lnk, lnkbt)); 514 515 /* put together the new matrix in MATSEQSBAIJ format */ 516 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL)); 517 518 b = (Mat_SeqSBAIJ *)fact->data; 519 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a)); 520 b->free_a = PETSC_TRUE; 521 b->free_ij = PETSC_TRUE; 522 b->j = uj; 523 b->i = ui; 524 b->diag = NULL; 525 b->ilen = NULL; 526 b->imax = NULL; 527 b->row = perm; 528 529 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 530 531 PetscCall(PetscObjectReference((PetscObject)perm)); 532 b->icol = perm; 533 PetscCall(PetscObjectReference((PetscObject)perm)); 534 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work)); 535 b->maxnz = b->nz = ui[mbs]; 536 537 fact->info.factor_mallocs = reallocs; 538 fact->info.fill_ratio_given = fill; 539 if (ai[mbs] != 0) { 540 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs]; 541 } else { 542 fact->info.fill_ratio_needed = 0.0; 543 } 544 #if defined(PETSC_USE_INFO) 545 if (ai[mbs] != 0) { 546 PetscReal af = fact->info.fill_ratio_needed; 547 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af)); 548 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 549 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af)); 550 } else { 551 PetscCall(PetscInfo(A, "Empty matrix\n")); 552 } 553 #endif 554 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity)); 555 PetscFunctionReturn(PETSC_SUCCESS); 556 } 557 558 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info) 559 { 560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 561 IS perm = b->row; 562 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j; 563 PetscInt i, j; 564 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 565 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 566 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 567 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 568 MatScalar *work; 569 PetscInt *pivots; 570 PetscBool allowzeropivot, zeropivotdetected; 571 572 PetscFunctionBegin; 573 /* initialization */ 574 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 575 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 576 allowzeropivot = PetscNot(A->erroriffailure); 577 578 il[0] = 0; 579 for (i = 0; i < mbs; i++) jl[i] = mbs; 580 581 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 582 PetscCall(PetscMalloc1(bs, &pivots)); 583 584 PetscCall(ISGetIndices(perm, &perm_ptr)); 585 586 /* check permutation */ 587 if (!a->permute) { 588 ai = a->i; 589 aj = a->j; 590 aa = a->a; 591 } else { 592 ai = a->inew; 593 aj = a->jnew; 594 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa)); 595 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs])); 596 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 597 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 598 599 for (i = 0; i < mbs; i++) { 600 jmin = ai[i]; 601 jmax = ai[i + 1]; 602 for (j = jmin; j < jmax; j++) { 603 while (a2anew[j] != j) { 604 k = a2anew[j]; 605 a2anew[j] = a2anew[k]; 606 a2anew[k] = k; 607 for (k1 = 0; k1 < bs2; k1++) { 608 dk[k1] = aa[k * bs2 + k1]; 609 aa[k * bs2 + k1] = aa[j * bs2 + k1]; 610 aa[j * bs2 + k1] = dk[k1]; 611 } 612 } 613 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 614 if (i > aj[j]) { 615 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */ 616 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */ 617 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */ 618 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1]; 619 } 620 } 621 } 622 } 623 PetscCall(PetscFree(a2anew)); 624 } 625 626 /* for each row k */ 627 for (k = 0; k < mbs; k++) { 628 /*initialize k-th row with elements nonzero in row perm(k) of A */ 629 jmin = ai[perm_ptr[k]]; 630 jmax = ai[perm_ptr[k] + 1]; 631 632 ap = aa + jmin * bs2; 633 for (j = jmin; j < jmax; j++) { 634 vj = perm_ptr[aj[j]]; /* block col. index */ 635 rtmp_ptr = rtmp + vj * bs2; 636 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 637 } 638 639 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 640 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 641 i = jl[k]; /* first row to be added to k_th row */ 642 643 while (i < k) { 644 nexti = jl[i]; /* next row to be added to k_th row */ 645 646 /* compute multiplier */ 647 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 648 649 /* uik = -inv(Di)*U_bar(i,k) */ 650 diag = ba + i * bs2; 651 u = ba + ili * bs2; 652 PetscCall(PetscArrayzero(uik, bs2)); 653 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 654 655 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 656 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 657 PetscCall(PetscLogFlops(4.0 * bs * bs2)); 658 659 /* update -U(i,k) */ 660 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 661 662 /* add multiple of row i to k-th row ... */ 663 jmin = ili + 1; 664 jmax = bi[i + 1]; 665 if (jmin < jmax) { 666 for (j = jmin; j < jmax; j++) { 667 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 668 rtmp_ptr = rtmp + bj[j] * bs2; 669 u = ba + j * bs2; 670 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 671 } 672 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 673 674 /* ... add i to row list for next nonzero entry */ 675 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 676 j = bj[jmin]; 677 jl[i] = jl[j]; 678 jl[j] = i; /* update jl */ 679 } 680 i = nexti; 681 } 682 683 /* save nonzero entries in k-th row of U ... */ 684 685 /* invert diagonal block */ 686 diag = ba + k * bs2; 687 PetscCall(PetscArraycpy(diag, dk, bs2)); 688 689 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 690 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 691 692 jmin = bi[k]; 693 jmax = bi[k + 1]; 694 if (jmin < jmax) { 695 for (j = jmin; j < jmax; j++) { 696 vj = bj[j]; /* block col. index of U */ 697 u = ba + j * bs2; 698 rtmp_ptr = rtmp + vj * bs2; 699 for (k1 = 0; k1 < bs2; k1++) { 700 *u++ = *rtmp_ptr; 701 *rtmp_ptr++ = 0.0; 702 } 703 } 704 705 /* ... add k to row list for first nonzero entry in k-th row */ 706 il[k] = jmin; 707 i = bj[jmin]; 708 jl[k] = jl[i]; 709 jl[i] = k; 710 } 711 } 712 713 PetscCall(PetscFree(rtmp)); 714 PetscCall(PetscFree2(il, jl)); 715 PetscCall(PetscFree3(dk, uik, work)); 716 PetscCall(PetscFree(pivots)); 717 if (a->permute) PetscCall(PetscFree(aa)); 718 719 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 720 721 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace; 722 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace; 723 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace; 724 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace; 725 726 C->assembled = PETSC_TRUE; 727 C->preallocated = PETSC_TRUE; 728 729 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 730 PetscFunctionReturn(PETSC_SUCCESS); 731 } 732 733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 734 { 735 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 736 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 737 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 738 PetscInt bs = A->rmap->bs, bs2 = a->bs2; 739 MatScalar *ba = b->a, *aa, *ap, *dk, *uik; 740 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 741 MatScalar *work; 742 PetscInt *pivots; 743 PetscBool allowzeropivot, zeropivotdetected; 744 745 PetscFunctionBegin; 746 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp)); 747 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 748 il[0] = 0; 749 for (i = 0; i < mbs; i++) jl[i] = mbs; 750 751 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work)); 752 PetscCall(PetscMalloc1(bs, &pivots)); 753 allowzeropivot = PetscNot(A->erroriffailure); 754 755 ai = a->i; 756 aj = a->j; 757 aa = a->a; 758 759 /* for each row k */ 760 for (k = 0; k < mbs; k++) { 761 /*initialize k-th row with elements nonzero in row k of A */ 762 jmin = ai[k]; 763 jmax = ai[k + 1]; 764 ap = aa + jmin * bs2; 765 for (j = jmin; j < jmax; j++) { 766 vj = aj[j]; /* block col. index */ 767 rtmp_ptr = rtmp + vj * bs2; 768 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++; 769 } 770 771 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 772 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2)); 773 i = jl[k]; /* first row to be added to k_th row */ 774 775 while (i < k) { 776 nexti = jl[i]; /* next row to be added to k_th row */ 777 778 /* compute multiplier */ 779 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 780 781 /* uik = -inv(Di)*U_bar(i,k) */ 782 diag = ba + i * bs2; 783 u = ba + ili * bs2; 784 PetscCall(PetscArrayzero(uik, bs2)); 785 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u); 786 787 /* update D(k) += -U(i,k)^T * U_bar(i,k) */ 788 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u); 789 PetscCall(PetscLogFlops(2.0 * bs * bs2)); 790 791 /* update -U(i,k) */ 792 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2)); 793 794 /* add multiple of row i to k-th row ... */ 795 jmin = ili + 1; 796 jmax = bi[i + 1]; 797 if (jmin < jmax) { 798 for (j = jmin; j < jmax; j++) { 799 /* rtmp += -U(i,k)^T * U_bar(i,j) */ 800 rtmp_ptr = rtmp + bj[j] * bs2; 801 u = ba + j * bs2; 802 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u); 803 } 804 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin))); 805 806 /* ... add i to row list for next nonzero entry */ 807 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 808 j = bj[jmin]; 809 jl[i] = jl[j]; 810 jl[j] = i; /* update jl */ 811 } 812 i = nexti; 813 } 814 815 /* save nonzero entries in k-th row of U ... */ 816 817 /* invert diagonal block */ 818 diag = ba + k * bs2; 819 PetscCall(PetscArraycpy(diag, dk, bs2)); 820 821 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected)); 822 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 823 824 jmin = bi[k]; 825 jmax = bi[k + 1]; 826 if (jmin < jmax) { 827 for (j = jmin; j < jmax; j++) { 828 vj = bj[j]; /* block col. index of U */ 829 u = ba + j * bs2; 830 rtmp_ptr = rtmp + vj * bs2; 831 for (k1 = 0; k1 < bs2; k1++) { 832 *u++ = *rtmp_ptr; 833 *rtmp_ptr++ = 0.0; 834 } 835 } 836 837 /* ... add k to row list for first nonzero entry in k-th row */ 838 il[k] = jmin; 839 i = bj[jmin]; 840 jl[k] = jl[i]; 841 jl[i] = k; 842 } 843 } 844 845 PetscCall(PetscFree(rtmp)); 846 PetscCall(PetscFree2(il, jl)); 847 PetscCall(PetscFree3(dk, uik, work)); 848 PetscCall(PetscFree(pivots)); 849 850 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 851 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 852 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 853 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace; 854 C->assembled = PETSC_TRUE; 855 C->preallocated = PETSC_TRUE; 856 857 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */ 858 PetscFunctionReturn(PETSC_SUCCESS); 859 } 860 861 /* 862 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP. 863 Version for blocks 2 by 2. 864 */ 865 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info) 866 { 867 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 868 IS perm = b->row; 869 const PetscInt *ai, *aj, *perm_ptr; 870 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 871 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 872 MatScalar *ba = b->a, *aa, *ap; 873 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4]; 874 PetscReal shift = info->shiftamount; 875 PetscBool allowzeropivot, zeropivotdetected; 876 877 PetscFunctionBegin; 878 allowzeropivot = PetscNot(A->erroriffailure); 879 880 /* initialization */ 881 /* il and jl record the first nonzero element in each row of the accessing 882 window U(0:k, k:mbs-1). 883 jl: list of rows to be added to uneliminated rows 884 i>= k: jl(i) is the first row to be added to row i 885 i< k: jl(i) is the row following row i in some list of rows 886 jl(i) = mbs indicates the end of a list 887 il(i): points to the first nonzero element in columns k,...,mbs-1 of 888 row i of U */ 889 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 890 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 891 il[0] = 0; 892 for (i = 0; i < mbs; i++) jl[i] = mbs; 893 894 PetscCall(ISGetIndices(perm, &perm_ptr)); 895 896 /* check permutation */ 897 if (!a->permute) { 898 ai = a->i; 899 aj = a->j; 900 aa = a->a; 901 } else { 902 ai = a->inew; 903 aj = a->jnew; 904 PetscCall(PetscMalloc1(4 * ai[mbs], &aa)); 905 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs])); 906 PetscCall(PetscMalloc1(ai[mbs], &a2anew)); 907 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs])); 908 909 for (i = 0; i < mbs; i++) { 910 jmin = ai[i]; 911 jmax = ai[i + 1]; 912 for (j = jmin; j < jmax; j++) { 913 while (a2anew[j] != j) { 914 k = a2anew[j]; 915 a2anew[j] = a2anew[k]; 916 a2anew[k] = k; 917 for (k1 = 0; k1 < 4; k1++) { 918 dk[k1] = aa[k * 4 + k1]; 919 aa[k * 4 + k1] = aa[j * 4 + k1]; 920 aa[j * 4 + k1] = dk[k1]; 921 } 922 } 923 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */ 924 if (i > aj[j]) { 925 ap = aa + j * 4; /* ptr to the beginning of the block */ 926 dk[1] = ap[1]; /* swap ap[1] and ap[2] */ 927 ap[1] = ap[2]; 928 ap[2] = dk[1]; 929 } 930 } 931 } 932 PetscCall(PetscFree(a2anew)); 933 } 934 935 /* for each row k */ 936 for (k = 0; k < mbs; k++) { 937 /*initialize k-th row with elements nonzero in row perm(k) of A */ 938 jmin = ai[perm_ptr[k]]; 939 jmax = ai[perm_ptr[k] + 1]; 940 ap = aa + jmin * 4; 941 for (j = jmin; j < jmax; j++) { 942 vj = perm_ptr[aj[j]]; /* block col. index */ 943 rtmp_ptr = rtmp + vj * 4; 944 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 945 } 946 947 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 948 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 949 i = jl[k]; /* first row to be added to k_th row */ 950 951 while (i < k) { 952 nexti = jl[i]; /* next row to be added to k_th row */ 953 954 /* compute multiplier */ 955 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 956 957 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 958 diag = ba + i * 4; 959 u = ba + ili * 4; 960 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 961 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 962 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 963 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 964 965 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 966 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 967 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 968 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 969 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 970 971 PetscCall(PetscLogFlops(16.0 * 2.0)); 972 973 /* update -U(i,k): ba[ili] = uik */ 974 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 975 976 /* add multiple of row i to k-th row ... */ 977 jmin = ili + 1; 978 jmax = bi[i + 1]; 979 if (jmin < jmax) { 980 for (j = jmin; j < jmax; j++) { 981 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 982 rtmp_ptr = rtmp + bj[j] * 4; 983 u = ba + j * 4; 984 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 985 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 986 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 987 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 988 } 989 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 990 991 /* ... add i to row list for next nonzero entry */ 992 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 993 j = bj[jmin]; 994 jl[i] = jl[j]; 995 jl[j] = i; /* update jl */ 996 } 997 i = nexti; 998 } 999 1000 /* save nonzero entries in k-th row of U ... */ 1001 1002 /* invert diagonal block */ 1003 diag = ba + k * 4; 1004 PetscCall(PetscArraycpy(diag, dk, 4)); 1005 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1006 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1007 1008 jmin = bi[k]; 1009 jmax = bi[k + 1]; 1010 if (jmin < jmax) { 1011 for (j = jmin; j < jmax; j++) { 1012 vj = bj[j]; /* block col. index of U */ 1013 u = ba + j * 4; 1014 rtmp_ptr = rtmp + vj * 4; 1015 for (k1 = 0; k1 < 4; k1++) { 1016 *u++ = *rtmp_ptr; 1017 *rtmp_ptr++ = 0.0; 1018 } 1019 } 1020 1021 /* ... add k to row list for first nonzero entry in k-th row */ 1022 il[k] = jmin; 1023 i = bj[jmin]; 1024 jl[k] = jl[i]; 1025 jl[i] = k; 1026 } 1027 } 1028 1029 PetscCall(PetscFree(rtmp)); 1030 PetscCall(PetscFree2(il, jl)); 1031 if (a->permute) PetscCall(PetscFree(aa)); 1032 PetscCall(ISRestoreIndices(perm, &perm_ptr)); 1033 1034 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace; 1035 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace; 1036 C->assembled = PETSC_TRUE; 1037 C->preallocated = PETSC_TRUE; 1038 1039 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 /* 1044 Version for when blocks are 2 by 2 Using natural ordering 1045 */ 1046 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info) 1047 { 1048 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1049 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j; 1050 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili; 1051 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8]; 1052 MatScalar *u, *diag, *rtmp, *rtmp_ptr; 1053 PetscReal shift = info->shiftamount; 1054 PetscBool allowzeropivot, zeropivotdetected; 1055 1056 PetscFunctionBegin; 1057 allowzeropivot = PetscNot(A->erroriffailure); 1058 1059 /* initialization */ 1060 /* il and jl record the first nonzero element in each row of the accessing 1061 window U(0:k, k:mbs-1). 1062 jl: list of rows to be added to uneliminated rows 1063 i>= k: jl(i) is the first row to be added to row i 1064 i< k: jl(i) is the row following row i in some list of rows 1065 jl(i) = mbs indicates the end of a list 1066 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1067 row i of U */ 1068 PetscCall(PetscCalloc1(4 * mbs, &rtmp)); 1069 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1070 il[0] = 0; 1071 for (i = 0; i < mbs; i++) jl[i] = mbs; 1072 1073 ai = a->i; 1074 aj = a->j; 1075 aa = a->a; 1076 1077 /* for each row k */ 1078 for (k = 0; k < mbs; k++) { 1079 /*initialize k-th row with elements nonzero in row k of A */ 1080 jmin = ai[k]; 1081 jmax = ai[k + 1]; 1082 ap = aa + jmin * 4; 1083 for (j = jmin; j < jmax; j++) { 1084 vj = aj[j]; /* block col. index */ 1085 rtmp_ptr = rtmp + vj * 4; 1086 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++; 1087 } 1088 1089 /* modify k-th row by adding in those rows i with U(i,k) != 0 */ 1090 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4)); 1091 i = jl[k]; /* first row to be added to k_th row */ 1092 1093 while (i < k) { 1094 nexti = jl[i]; /* next row to be added to k_th row */ 1095 1096 /* compute multiplier */ 1097 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1098 1099 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */ 1100 diag = ba + i * 4; 1101 u = ba + ili * 4; 1102 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]); 1103 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]); 1104 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]); 1105 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]); 1106 1107 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */ 1108 dk[0] += uik[0] * u[0] + uik[1] * u[1]; 1109 dk[1] += uik[2] * u[0] + uik[3] * u[1]; 1110 dk[2] += uik[0] * u[2] + uik[1] * u[3]; 1111 dk[3] += uik[2] * u[2] + uik[3] * u[3]; 1112 1113 PetscCall(PetscLogFlops(16.0 * 2.0)); 1114 1115 /* update -U(i,k): ba[ili] = uik */ 1116 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4)); 1117 1118 /* add multiple of row i to k-th row ... */ 1119 jmin = ili + 1; 1120 jmax = bi[i + 1]; 1121 if (jmin < jmax) { 1122 for (j = jmin; j < jmax; j++) { 1123 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */ 1124 rtmp_ptr = rtmp + bj[j] * 4; 1125 u = ba + j * 4; 1126 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1]; 1127 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1]; 1128 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3]; 1129 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3]; 1130 } 1131 PetscCall(PetscLogFlops(16.0 * (jmax - jmin))); 1132 1133 /* ... add i to row list for next nonzero entry */ 1134 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */ 1135 j = bj[jmin]; 1136 jl[i] = jl[j]; 1137 jl[j] = i; /* update jl */ 1138 } 1139 i = nexti; 1140 } 1141 1142 /* save nonzero entries in k-th row of U ... */ 1143 1144 /* invert diagonal block */ 1145 diag = ba + k * 4; 1146 PetscCall(PetscArraycpy(diag, dk, 4)); 1147 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 1148 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 1149 1150 jmin = bi[k]; 1151 jmax = bi[k + 1]; 1152 if (jmin < jmax) { 1153 for (j = jmin; j < jmax; j++) { 1154 vj = bj[j]; /* block col. index of U */ 1155 u = ba + j * 4; 1156 rtmp_ptr = rtmp + vj * 4; 1157 for (k1 = 0; k1 < 4; k1++) { 1158 *u++ = *rtmp_ptr; 1159 *rtmp_ptr++ = 0.0; 1160 } 1161 } 1162 1163 /* ... add k to row list for first nonzero entry in k-th row */ 1164 il[k] = jmin; 1165 i = bj[jmin]; 1166 jl[k] = jl[i]; 1167 jl[i] = k; 1168 } 1169 } 1170 1171 PetscCall(PetscFree(rtmp)); 1172 PetscCall(PetscFree2(il, jl)); 1173 1174 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1175 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1176 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1177 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace; 1178 C->assembled = PETSC_TRUE; 1179 C->preallocated = PETSC_TRUE; 1180 1181 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */ 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 /* 1186 Numeric U^T*D*U factorization for SBAIJ format. 1187 Version for blocks are 1 by 1. 1188 */ 1189 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1192 IS ip = b->row; 1193 const PetscInt *ai, *aj, *rip; 1194 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol; 1195 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz; 1196 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi; 1197 PetscReal rs; 1198 FactorShiftCtx sctx; 1199 1200 PetscFunctionBegin; 1201 /* MatPivotSetUp(): initialize shift context sctx */ 1202 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1203 1204 PetscCall(ISGetIndices(ip, &rip)); 1205 if (!a->permute) { 1206 ai = a->i; 1207 aj = a->j; 1208 aa = a->a; 1209 } else { 1210 ai = a->inew; 1211 aj = a->jnew; 1212 nz = ai[mbs]; 1213 PetscCall(PetscMalloc1(nz, &aa)); 1214 a2anew = a->a2anew; 1215 bval = a->a; 1216 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++); 1217 } 1218 1219 /* initialization */ 1220 /* il and jl record the first nonzero element in each row of the accessing 1221 window U(0:k, k:mbs-1). 1222 jl: list of rows to be added to uneliminated rows 1223 i>= k: jl(i) is the first row to be added to row i 1224 i< k: jl(i) is the row following row i in some list of rows 1225 jl(i) = mbs indicates the end of a list 1226 il(i): points to the first nonzero element in columns k,...,mbs-1 of 1227 row i of U */ 1228 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl)); 1229 1230 do { 1231 sctx.newshift = PETSC_FALSE; 1232 il[0] = 0; 1233 for (i = 0; i < mbs; i++) { 1234 rtmp[i] = 0.0; 1235 jl[i] = mbs; 1236 } 1237 1238 for (k = 0; k < mbs; k++) { 1239 /*initialize k-th row by the perm[k]-th row of A */ 1240 jmin = ai[rip[k]]; 1241 jmax = ai[rip[k] + 1]; 1242 bval = ba + bi[k]; 1243 for (j = jmin; j < jmax; j++) { 1244 col = rip[aj[j]]; 1245 rtmp[col] = aa[j]; 1246 *bval++ = 0.0; /* for in-place factorization */ 1247 } 1248 1249 /* shift the diagonal of the matrix */ 1250 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1251 1252 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1253 dk = rtmp[k]; 1254 i = jl[k]; /* first row to be added to k_th row */ 1255 1256 while (i < k) { 1257 nexti = jl[i]; /* next row to be added to k_th row */ 1258 1259 /* compute multiplier, update diag(k) and U(i,k) */ 1260 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1261 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */ 1262 dk += uikdi * ba[ili]; 1263 ba[ili] = uikdi; /* -U(i,k) */ 1264 1265 /* add multiple of row i to k-th row */ 1266 jmin = ili + 1; 1267 jmax = bi[i + 1]; 1268 if (jmin < jmax) { 1269 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1270 PetscCall(PetscLogFlops(2.0 * (jmax - jmin))); 1271 1272 /* update il and jl for row i */ 1273 il[i] = jmin; 1274 j = bj[jmin]; 1275 jl[i] = jl[j]; 1276 jl[j] = i; 1277 } 1278 i = nexti; 1279 } 1280 1281 /* shift the diagonals when zero pivot is detected */ 1282 /* compute rs=sum of abs(off-diagonal) */ 1283 rs = 0.0; 1284 jmin = bi[k] + 1; 1285 nz = bi[k + 1] - jmin; 1286 if (nz) { 1287 bcol = bj + jmin; 1288 while (nz--) { 1289 rs += PetscAbsScalar(rtmp[*bcol]); 1290 bcol++; 1291 } 1292 } 1293 1294 sctx.rs = rs; 1295 sctx.pv = dk; 1296 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1297 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1298 dk = sctx.pv; 1299 1300 /* copy data into U(k,:) */ 1301 ba[bi[k]] = 1.0 / dk; /* U(k,k) */ 1302 jmin = bi[k] + 1; 1303 jmax = bi[k + 1]; 1304 if (jmin < jmax) { 1305 for (j = jmin; j < jmax; j++) { 1306 col = bj[j]; 1307 ba[j] = rtmp[col]; 1308 rtmp[col] = 0.0; 1309 } 1310 /* add the k-th row into il and jl */ 1311 il[k] = jmin; 1312 i = bj[jmin]; 1313 jl[k] = jl[i]; 1314 jl[i] = k; 1315 } 1316 } 1317 } while (sctx.newshift); 1318 PetscCall(PetscFree3(rtmp, il, jl)); 1319 if (a->permute) PetscCall(PetscFree(aa)); 1320 1321 PetscCall(ISRestoreIndices(ip, &rip)); 1322 1323 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 1324 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1325 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 1326 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 1327 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 1328 C->assembled = PETSC_TRUE; 1329 C->preallocated = PETSC_TRUE; 1330 1331 PetscCall(PetscLogFlops(C->rmap->N)); 1332 if (sctx.nshift) { 1333 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1334 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1335 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1336 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1337 } 1338 } 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 /* 1343 Version for when blocks are 1 by 1 Using natural ordering under new datastructure 1344 Modified from MatCholeskyFactorNumeric_SeqAIJ() 1345 */ 1346 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info) 1347 { 1348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 1349 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data; 1350 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp; 1351 PetscInt *ai = a->i, *aj = a->j, *ajtmp; 1352 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz; 1353 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi; 1354 FactorShiftCtx sctx; 1355 PetscReal rs; 1356 MatScalar d, *v; 1357 const PetscInt *adiag; 1358 1359 PetscFunctionBegin; 1360 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, &adiag, NULL)); 1361 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r)); 1362 1363 /* MatPivotSetUp(): initialize shift context sctx */ 1364 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1365 1366 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 1367 sctx.shift_top = info->zeropivot; 1368 1369 PetscCall(PetscArrayzero(rtmp, mbs)); 1370 1371 for (i = 0; i < mbs; i++) { 1372 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1373 d = aa[adiag[i]]; 1374 rtmp[i] += -PetscRealPart(d); /* diagonal entry */ 1375 ajtmp = aj + ai[i] + 1; /* exclude diagonal */ 1376 v = aa + ai[i] + 1; 1377 nz = ai[i + 1] - ai[i] - 1; 1378 for (j = 0; j < nz; j++) { 1379 rtmp[i] += PetscAbsScalar(v[j]); 1380 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]); 1381 } 1382 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]); 1383 } 1384 sctx.shift_top *= 1.1; 1385 sctx.nshift_max = 5; 1386 sctx.shift_lo = 0.; 1387 sctx.shift_hi = 1.; 1388 } 1389 1390 /* allocate working arrays 1391 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 1392 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 1393 */ 1394 do { 1395 sctx.newshift = PETSC_FALSE; 1396 1397 for (i = 0; i < mbs; i++) c2r[i] = mbs; 1398 if (mbs) il[0] = 0; 1399 1400 for (k = 0; k < mbs; k++) { 1401 /* zero rtmp */ 1402 nz = bi[k + 1] - bi[k]; 1403 bjtmp = bj + bi[k]; 1404 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0; 1405 1406 /* load in initial unfactored row */ 1407 bval = ba + bi[k]; 1408 jmin = ai[k]; 1409 jmax = ai[k + 1]; 1410 for (j = jmin; j < jmax; j++) { 1411 col = aj[j]; 1412 rtmp[col] = aa[j]; 1413 *bval++ = 0.0; /* for in-place factorization */ 1414 } 1415 /* shift the diagonal of the matrix: ZeropivotApply() */ 1416 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1417 1418 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1419 dk = rtmp[k]; 1420 i = c2r[k]; /* first row to be added to k_th row */ 1421 1422 while (i < k) { 1423 nexti = c2r[i]; /* next row to be added to k_th row */ 1424 1425 /* compute multiplier, update diag(k) and U(i,k) */ 1426 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1427 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */ 1428 dk += uikdi * ba[ili]; /* update diag[k] */ 1429 ba[ili] = uikdi; /* -U(i,k) */ 1430 1431 /* add multiple of row i to k-th row */ 1432 jmin = ili + 1; 1433 jmax = bi[i + 1]; 1434 if (jmin < jmax) { 1435 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j]; 1436 /* update il and c2r for row i */ 1437 il[i] = jmin; 1438 j = bj[jmin]; 1439 c2r[i] = c2r[j]; 1440 c2r[j] = i; 1441 } 1442 i = nexti; 1443 } 1444 1445 /* copy data into U(k,:) */ 1446 rs = 0.0; 1447 jmin = bi[k]; 1448 jmax = bi[k + 1] - 1; 1449 if (jmin < jmax) { 1450 for (j = jmin; j < jmax; j++) { 1451 col = bj[j]; 1452 ba[j] = rtmp[col]; 1453 rs += PetscAbsScalar(ba[j]); 1454 } 1455 /* add the k-th row into il and c2r */ 1456 il[k] = jmin; 1457 i = bj[jmin]; 1458 c2r[k] = c2r[i]; 1459 c2r[i] = k; 1460 } 1461 1462 sctx.rs = rs; 1463 sctx.pv = dk; 1464 PetscCall(MatPivotCheck(B, A, info, &sctx, k)); 1465 if (sctx.newshift) break; 1466 dk = sctx.pv; 1467 1468 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */ 1469 } 1470 } while (sctx.newshift); 1471 1472 PetscCall(PetscFree3(rtmp, il, c2r)); 1473 1474 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1475 B->ops->solves = MatSolves_SeqSBAIJ_1; 1476 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1477 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering; 1478 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1479 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1480 1481 B->assembled = PETSC_TRUE; 1482 B->preallocated = PETSC_TRUE; 1483 1484 PetscCall(PetscLogFlops(B->rmap->n)); 1485 1486 /* MatPivotView() */ 1487 if (sctx.nshift) { 1488 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1489 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top)); 1490 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1491 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1492 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 1493 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount)); 1494 } 1495 } 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info) 1500 { 1501 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data; 1502 PetscInt i, j, mbs = a->mbs; 1503 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 1504 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz; 1505 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval; 1506 PetscReal rs; 1507 FactorShiftCtx sctx; 1508 1509 PetscFunctionBegin; 1510 /* MatPivotSetUp(): initialize shift context sctx */ 1511 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx))); 1512 1513 /* initialization */ 1514 /* il and jl record the first nonzero element in each row of the accessing 1515 window U(0:k, k:mbs-1). 1516 jl: list of rows to be added to uneliminated rows 1517 i>= k: jl(i) is the first row to be added to row i 1518 i< k: jl(i) is the row following row i in some list of rows 1519 jl(i) = mbs indicates the end of a list 1520 il(i): points to the first nonzero element in U(i,k:mbs-1) 1521 */ 1522 PetscCall(PetscMalloc1(mbs, &rtmp)); 1523 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl)); 1524 1525 do { 1526 sctx.newshift = PETSC_FALSE; 1527 il[0] = 0; 1528 for (i = 0; i < mbs; i++) { 1529 rtmp[i] = 0.0; 1530 jl[i] = mbs; 1531 } 1532 1533 for (k = 0; k < mbs; k++) { 1534 /*initialize k-th row with elements nonzero in row perm(k) of A */ 1535 nz = ai[k + 1] - ai[k]; 1536 acol = aj + ai[k]; 1537 aval = aa + ai[k]; 1538 bval = ba + bi[k]; 1539 while (nz--) { 1540 rtmp[*acol++] = *aval++; 1541 *bval++ = 0.0; /* for in-place factorization */ 1542 } 1543 1544 /* shift the diagonal of the matrix */ 1545 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1546 1547 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1548 dk = rtmp[k]; 1549 i = jl[k]; /* first row to be added to k_th row */ 1550 1551 while (i < k) { 1552 nexti = jl[i]; /* next row to be added to k_th row */ 1553 /* compute multiplier, update D(k) and U(i,k) */ 1554 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1555 uikdi = -ba[ili] * ba[bi[i]]; 1556 dk += uikdi * ba[ili]; 1557 ba[ili] = uikdi; /* -U(i,k) */ 1558 1559 /* add multiple of row i to k-th row ... */ 1560 jmin = ili + 1; 1561 nz = bi[i + 1] - jmin; 1562 if (nz > 0) { 1563 bcol = bj + jmin; 1564 bval = ba + jmin; 1565 PetscCall(PetscLogFlops(2.0 * nz)); 1566 while (nz--) rtmp[*bcol++] += uikdi * (*bval++); 1567 1568 /* update il and jl for i-th row */ 1569 il[i] = jmin; 1570 j = bj[jmin]; 1571 jl[i] = jl[j]; 1572 jl[j] = i; 1573 } 1574 i = nexti; 1575 } 1576 1577 /* shift the diagonals when zero pivot is detected */ 1578 /* compute rs=sum of abs(off-diagonal) */ 1579 rs = 0.0; 1580 jmin = bi[k] + 1; 1581 nz = bi[k + 1] - jmin; 1582 if (nz) { 1583 bcol = bj + jmin; 1584 while (nz--) { 1585 rs += PetscAbsScalar(rtmp[*bcol]); 1586 bcol++; 1587 } 1588 } 1589 1590 sctx.rs = rs; 1591 sctx.pv = dk; 1592 PetscCall(MatPivotCheck(C, A, info, &sctx, k)); 1593 if (sctx.newshift) break; /* sctx.shift_amount is updated */ 1594 dk = sctx.pv; 1595 1596 /* copy data into U(k,:) */ 1597 ba[bi[k]] = 1.0 / dk; 1598 jmin = bi[k] + 1; 1599 nz = bi[k + 1] - jmin; 1600 if (nz) { 1601 bcol = bj + jmin; 1602 bval = ba + jmin; 1603 while (nz--) { 1604 *bval++ = rtmp[*bcol]; 1605 rtmp[*bcol++] = 0.0; 1606 } 1607 /* add k-th row into il and jl */ 1608 il[k] = jmin; 1609 i = bj[jmin]; 1610 jl[k] = jl[i]; 1611 jl[i] = k; 1612 } 1613 } /* end of for (k = 0; k<mbs; k++) */ 1614 } while (sctx.newshift); 1615 PetscCall(PetscFree(rtmp)); 1616 PetscCall(PetscFree2(il, jl)); 1617 1618 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1619 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace; 1620 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1621 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1622 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 1623 1624 C->assembled = PETSC_TRUE; 1625 C->preallocated = PETSC_TRUE; 1626 1627 PetscCall(PetscLogFlops(C->rmap->N)); 1628 if (sctx.nshift) { 1629 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 1630 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1631 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 1632 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount)); 1633 } 1634 } 1635 PetscFunctionReturn(PETSC_SUCCESS); 1636 } 1637 1638 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info) 1639 { 1640 Mat C; 1641 1642 PetscFunctionBegin; 1643 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C)); 1644 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info)); 1645 PetscCall(MatCholeskyFactorNumeric(C, A, info)); 1646 1647 A->ops->solve = C->ops->solve; 1648 A->ops->solvetranspose = C->ops->solvetranspose; 1649 1650 PetscCall(MatHeaderMerge(A, &C)); 1651 PetscFunctionReturn(PETSC_SUCCESS); 1652 } 1653