1 #include <../src/mat/impls/baij/seq/baij.h>
2 #include <../src/mat/impls/sbaij/seq/sbaij.h>
3 #include <petsc/private/kernels/blockinvert.h>
4 #include <petscis.h>
5
MatGetInertia_SeqSBAIJ(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)6 PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7 {
8 Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9 MatScalar *dd = fact->a;
10 PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
11
12 PetscFunctionBegin;
13 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
14
15 nneg_tmp = 0;
16 npos_tmp = 0;
17 if (fi) {
18 for (i = 0; i < mbs; i++) {
19 if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20 else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21 fi++;
22 }
23 } else {
24 for (i = 0; i < mbs; i++) {
25 if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++;
26 else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++;
27 }
28 }
29 if (nneg) *nneg = nneg_tmp;
30 if (npos) *npos = npos_tmp;
31 if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
32 PetscFunctionReturn(PETSC_SUCCESS);
33 }
34
35 /*
36 Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
37 Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
38 */
MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo * info)39 static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
40 {
41 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
42 const PetscInt *rip, *ai, *aj;
43 PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
44 PetscInt m, reallocs = 0, prow;
45 PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
46 PetscReal f = info->fill;
47 PetscBool perm_identity;
48
49 PetscFunctionBegin;
50 /* check whether perm is the identity mapping */
51 PetscCall(ISIdentity(perm, &perm_identity));
52 PetscCall(ISGetIndices(perm, &rip));
53
54 if (perm_identity) { /* without permutation */
55 a->permute = PETSC_FALSE;
56
57 ai = a->i;
58 aj = a->j;
59 } else { /* non-trivial permutation */
60 a->permute = PETSC_TRUE;
61
62 PetscCall(MatReorderingSeqSBAIJ(A, perm));
63
64 ai = a->inew;
65 aj = a->jnew;
66 }
67
68 /* initialization */
69 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu));
70 umax = (PetscInt)(f * ai[mbs] + 1);
71 umax += mbs + 1;
72 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju));
73 iu[0] = mbs + 1;
74 juidx = mbs + 1; /* index for ju */
75 /* jl linked list for pivot row -- linked list for col index */
76 PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
77 for (i = 0; i < mbs; i++) {
78 jl[i] = mbs;
79 q[i] = 0;
80 }
81
82 /* for each row k */
83 for (k = 0; k < mbs; k++) {
84 for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
85 nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
86 q[k] = mbs;
87 /* initialize nonzero structure of k-th row to row rip[k] of A */
88 jmin = ai[rip[k]] + 1; /* exclude diag[k] */
89 jmax = ai[rip[k] + 1];
90 for (j = jmin; j < jmax; j++) {
91 vj = rip[aj[j]]; /* col. value */
92 if (vj > k) {
93 qm = k;
94 do {
95 m = qm;
96 qm = q[m];
97 } while (qm < vj);
98 PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
99 nzk++;
100 q[m] = vj;
101 q[vj] = qm;
102 } /* if (vj > k) */
103 } /* for (j=jmin; j<jmax; j++) */
104
105 /* modify nonzero structure of k-th row by computing fill-in
106 for each row i to be merged in */
107 prow = k;
108 prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
109
110 while (prow < k) {
111 /* merge row prow into k-th row */
112 jmin = iu[prow] + 1;
113 jmax = iu[prow + 1];
114 qm = k;
115 for (j = jmin; j < jmax; j++) {
116 vj = ju[j];
117 do {
118 m = qm;
119 qm = q[m];
120 } while (qm < vj);
121 if (qm != vj) {
122 nzk++;
123 q[m] = vj;
124 q[vj] = qm;
125 qm = vj;
126 }
127 }
128 prow = jl[prow]; /* next pivot row */
129 }
130
131 /* add k to row list for first nonzero element in k-th row */
132 if (nzk > 0) {
133 i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134 jl[k] = jl[i];
135 jl[i] = k;
136 }
137 iu[k + 1] = iu[k] + nzk;
138
139 /* allocate more space to ju if needed */
140 if (iu[k + 1] > umax) {
141 /* estimate how much additional space we will need */
142 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143 /* just double the memory each time */
144 maxadd = umax;
145 if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
146 umax += maxadd;
147
148 /* allocate a longer ju */
149 PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp));
150 PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
151 PetscCall(PetscShmgetDeallocateArray((void **)&ju));
152 ju = jutmp;
153 reallocs++; /* count how many times we realloc */
154 }
155
156 /* save nonzero structure of k-th row in ju */
157 i = k;
158 while (nzk--) {
159 i = q[i];
160 ju[juidx++] = i;
161 }
162 }
163
164 #if defined(PETSC_USE_INFO)
165 if (ai[mbs] != 0) {
166 PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
167 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
168 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
169 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
170 PetscCall(PetscInfo(A, "for best performance.\n"));
171 } else {
172 PetscCall(PetscInfo(A, "Empty matrix\n"));
173 }
174 #endif
175
176 PetscCall(ISRestoreIndices(perm, &rip));
177 PetscCall(PetscFree2(jl, q));
178
179 /* put together the new matrix */
180 PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
181
182 b = (Mat_SeqSBAIJ *)F->data;
183 b->free_ij = PETSC_TRUE;
184 PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
185 b->free_a = PETSC_TRUE;
186 b->j = ju;
187 b->i = iu;
188 b->diag = NULL;
189 b->ilen = NULL;
190 b->imax = NULL;
191 b->row = perm;
192
193 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
194
195 PetscCall(PetscObjectReference((PetscObject)perm));
196
197 b->icol = perm;
198 PetscCall(PetscObjectReference((PetscObject)perm));
199 PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
200 /* In b structure: Free imax, ilen, old a, old j.
201 Allocate idnew, solve_work, new a, new j */
202 b->maxnz = b->nz = iu[mbs];
203
204 F->info.factor_mallocs = reallocs;
205 F->info.fill_ratio_given = f;
206 if (ai[mbs] != 0) {
207 F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
208 } else {
209 F->info.fill_ratio_needed = 0.0;
210 }
211 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
212 PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 /*
215 Symbolic U^T*D*U factorization for SBAIJ format.
216 See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
217 */
218 #include <petscbt.h>
219 #include <../src/mat/utils/freespace.h>
MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)220 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
221 {
222 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
223 Mat_SeqSBAIJ *b;
224 PetscBool perm_identity;
225 PetscReal fill = info->fill;
226 const PetscInt *rip, *ai = a->i, *aj = a->j;
227 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
228 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
229 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
230 PetscFreeSpaceList free_space = NULL, current_space = NULL;
231 PetscBT lnkbt;
232 PetscBool diagDense;
233
234 PetscFunctionBegin;
235 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
236 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense));
237 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
238 if (bs > 1) {
239 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
240 PetscFunctionReturn(PETSC_SUCCESS);
241 }
242
243 /* check whether perm is the identity mapping */
244 PetscCall(ISIdentity(perm, &perm_identity));
245 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
246 a->permute = PETSC_FALSE;
247 PetscCall(ISGetIndices(perm, &rip));
248
249 /* initialization */
250 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
251 PetscCall(PetscMalloc1(mbs + 1, &udiag));
252 ui[0] = 0;
253
254 /* jl: linked list for storing indices of the pivot rows
255 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
256 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
257 for (i = 0; i < mbs; i++) {
258 jl[i] = mbs;
259 il[i] = 0;
260 }
261
262 /* create and initialize a linked list for storing column indices of the active row k */
263 nlnk = mbs + 1;
264 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
265
266 /* initial FreeSpace size is fill*(ai[mbs]+1) */
267 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
268 current_space = free_space;
269
270 for (k = 0; k < mbs; k++) { /* for each active row k */
271 /* initialize lnk by the column indices of row rip[k] of A */
272 nzk = 0;
273 ncols = ai[k + 1] - ai[k];
274 PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
275 for (j = 0; j < ncols; j++) {
276 i = *(aj + ai[k] + j);
277 cols[j] = i;
278 }
279 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
280 nzk += nlnk;
281
282 /* update lnk by computing fill-in for each pivot row to be merged in */
283 prow = jl[k]; /* 1st pivot row */
284
285 while (prow < k) {
286 nextprow = jl[prow];
287 /* merge prow into k-th row */
288 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
289 jmax = ui[prow + 1];
290 ncols = jmax - jmin;
291 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
292 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
293 nzk += nlnk;
294
295 /* update il and jl for prow */
296 if (jmin < jmax) {
297 il[prow] = jmin;
298 j = *uj_ptr;
299 jl[prow] = jl[j];
300 jl[j] = prow;
301 }
302 prow = nextprow;
303 }
304
305 /* if free space is not available, make more free space */
306 if (current_space->local_remaining < nzk) {
307 i = mbs - k + 1; /* num of unfactored rows */
308 i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
309 PetscCall(PetscFreeSpaceGet(i, ¤t_space));
310 reallocs++;
311 }
312
313 /* copy data into free space, then initialize lnk */
314 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
315
316 /* add the k-th row into il and jl */
317 if (nzk > 1) {
318 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
319 jl[k] = jl[i];
320 jl[i] = k;
321 il[k] = ui[k] + 1;
322 }
323 ui_ptr[k] = current_space->array;
324
325 current_space->array += nzk;
326 current_space->local_used += nzk;
327 current_space->local_remaining -= nzk;
328
329 ui[k + 1] = ui[k] + nzk;
330 }
331
332 PetscCall(ISRestoreIndices(perm, &rip));
333 PetscCall(PetscFree4(ui_ptr, il, jl, cols));
334
335 /* destroy list of free space and other temporary array(s) */
336 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscInt), (void **)&uj));
337 PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
338 PetscCall(PetscLLDestroy(lnk, lnkbt));
339
340 /* put together the new matrix in MATSEQSBAIJ format */
341 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
342
343 b = (Mat_SeqSBAIJ *)fact->data;
344 b->free_ij = PETSC_TRUE;
345 PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscScalar), (void **)&b->a));
346 b->free_a = PETSC_TRUE;
347 b->j = uj;
348 b->i = ui;
349 b->diag = udiag;
350 b->ilen = NULL;
351 b->imax = NULL;
352 b->row = perm;
353 b->icol = perm;
354
355 PetscCall(PetscObjectReference((PetscObject)perm));
356 PetscCall(PetscObjectReference((PetscObject)perm));
357
358 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
359
360 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
361
362 b->maxnz = b->nz = ui[mbs];
363
364 fact->info.factor_mallocs = reallocs;
365 fact->info.fill_ratio_given = fill;
366 if (ai[mbs] != 0) {
367 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
368 } else {
369 fact->info.fill_ratio_needed = 0.0;
370 }
371 #if defined(PETSC_USE_INFO)
372 if (ai[mbs] != 0) {
373 PetscReal af = fact->info.fill_ratio_needed;
374 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
375 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
376 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
377 } else {
378 PetscCall(PetscInfo(A, "Empty matrix\n"));
379 }
380 #endif
381 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
382 PetscFunctionReturn(PETSC_SUCCESS);
383 }
384
MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo * info)385 PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
386 {
387 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
388 Mat_SeqSBAIJ *b;
389 PetscBool perm_identity;
390 PetscReal fill = info->fill;
391 const PetscInt *rip, *ai, *aj;
392 PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
393 PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
394 PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
395 PetscFreeSpaceList free_space = NULL, current_space = NULL;
396 PetscBT lnkbt;
397 PetscBool diagDense;
398
399 PetscFunctionBegin;
400 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense));
401 PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
402
403 /*
404 This code originally uses Modified Sparse Row (MSR) storage
405 (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
406 Then it is rewritten so the factor B takes seqsbaij format. However the associated
407 MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
408 thus the original code in MSR format is still used for these cases.
409 The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
410 MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
411 */
412 if (bs > 1) {
413 PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
414 PetscFunctionReturn(PETSC_SUCCESS);
415 }
416
417 /* check whether perm is the identity mapping */
418 PetscCall(ISIdentity(perm, &perm_identity));
419 PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
420 a->permute = PETSC_FALSE;
421 ai = a->i;
422 aj = a->j;
423 PetscCall(ISGetIndices(perm, &rip));
424
425 /* initialization */
426 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
427 ui[0] = 0;
428
429 /* jl: linked list for storing indices of the pivot rows
430 il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
431 PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
432 for (i = 0; i < mbs; i++) {
433 jl[i] = mbs;
434 il[i] = 0;
435 }
436
437 /* create and initialize a linked list for storing column indices of the active row k */
438 nlnk = mbs + 1;
439 PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
440
441 /* initial FreeSpace size is fill*(ai[mbs]+1) */
442 PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
443 current_space = free_space;
444
445 for (k = 0; k < mbs; k++) { /* for each active row k */
446 /* initialize lnk by the column indices of row rip[k] of A */
447 nzk = 0;
448 ncols = ai[rip[k] + 1] - ai[rip[k]];
449 for (j = 0; j < ncols; j++) {
450 i = *(aj + ai[rip[k]] + j);
451 cols[j] = rip[i];
452 }
453 PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
454 nzk += nlnk;
455
456 /* update lnk by computing fill-in for each pivot row to be merged in */
457 prow = jl[k]; /* 1st pivot row */
458
459 while (prow < k) {
460 nextprow = jl[prow];
461 /* merge prow into k-th row */
462 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
463 jmax = ui[prow + 1];
464 ncols = jmax - jmin;
465 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
466 PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
467 nzk += nlnk;
468
469 /* update il and jl for prow */
470 if (jmin < jmax) {
471 il[prow] = jmin;
472
473 j = *uj_ptr;
474 jl[prow] = jl[j];
475 jl[j] = prow;
476 }
477 prow = nextprow;
478 }
479
480 /* if free space is not available, make more free space */
481 if (current_space->local_remaining < nzk) {
482 i = mbs - k + 1; /* num of unfactored rows */
483 i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
484 PetscCall(PetscFreeSpaceGet(i, ¤t_space));
485 reallocs++;
486 }
487
488 /* copy data into free space, then initialize lnk */
489 PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
490
491 /* add the k-th row into il and jl */
492 if (nzk - 1 > 0) {
493 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
494 jl[k] = jl[i];
495 jl[i] = k;
496 il[k] = ui[k] + 1;
497 }
498 ui_ptr[k] = current_space->array;
499
500 current_space->array += nzk;
501 current_space->local_used += nzk;
502 current_space->local_remaining -= nzk;
503
504 ui[k + 1] = ui[k] + nzk;
505 }
506
507 PetscCall(ISRestoreIndices(perm, &rip));
508 PetscCall(PetscFree4(ui_ptr, il, jl, cols));
509
510 /* destroy list of free space and other temporary array(s) */
511 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
512 PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
513 PetscCall(PetscLLDestroy(lnk, lnkbt));
514
515 /* put together the new matrix in MATSEQSBAIJ format */
516 PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
517
518 b = (Mat_SeqSBAIJ *)fact->data;
519 PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
520 b->free_a = PETSC_TRUE;
521 b->free_ij = PETSC_TRUE;
522 b->j = uj;
523 b->i = ui;
524 b->diag = NULL;
525 b->ilen = NULL;
526 b->imax = NULL;
527 b->row = perm;
528
529 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
530
531 PetscCall(PetscObjectReference((PetscObject)perm));
532 b->icol = perm;
533 PetscCall(PetscObjectReference((PetscObject)perm));
534 PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535 b->maxnz = b->nz = ui[mbs];
536
537 fact->info.factor_mallocs = reallocs;
538 fact->info.fill_ratio_given = fill;
539 if (ai[mbs] != 0) {
540 fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
541 } else {
542 fact->info.fill_ratio_needed = 0.0;
543 }
544 #if defined(PETSC_USE_INFO)
545 if (ai[mbs] != 0) {
546 PetscReal af = fact->info.fill_ratio_needed;
547 PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
548 PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
549 PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
550 } else {
551 PetscCall(PetscInfo(A, "Empty matrix\n"));
552 }
553 #endif
554 PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
555 PetscFunctionReturn(PETSC_SUCCESS);
556 }
557
MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo * info)558 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
559 {
560 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561 IS perm = b->row;
562 const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563 PetscInt i, j;
564 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565 PetscInt bs = A->rmap->bs, bs2 = a->bs2;
566 MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
567 MatScalar *u, *diag, *rtmp, *rtmp_ptr;
568 MatScalar *work;
569 PetscInt *pivots;
570 PetscBool allowzeropivot, zeropivotdetected;
571
572 PetscFunctionBegin;
573 /* initialization */
574 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576 allowzeropivot = PetscNot(A->erroriffailure);
577
578 il[0] = 0;
579 for (i = 0; i < mbs; i++) jl[i] = mbs;
580
581 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582 PetscCall(PetscMalloc1(bs, &pivots));
583
584 PetscCall(ISGetIndices(perm, &perm_ptr));
585
586 /* check permutation */
587 if (!a->permute) {
588 ai = a->i;
589 aj = a->j;
590 aa = a->a;
591 } else {
592 ai = a->inew;
593 aj = a->jnew;
594 PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595 PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596 PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
598
599 for (i = 0; i < mbs; i++) {
600 jmin = ai[i];
601 jmax = ai[i + 1];
602 for (j = jmin; j < jmax; j++) {
603 while (a2anew[j] != j) {
604 k = a2anew[j];
605 a2anew[j] = a2anew[k];
606 a2anew[k] = k;
607 for (k1 = 0; k1 < bs2; k1++) {
608 dk[k1] = aa[k * bs2 + k1];
609 aa[k * bs2 + k1] = aa[j * bs2 + k1];
610 aa[j * bs2 + k1] = dk[k1];
611 }
612 }
613 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
614 if (i > aj[j]) {
615 ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
616 for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617 for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
618 for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619 }
620 }
621 }
622 }
623 PetscCall(PetscFree(a2anew));
624 }
625
626 /* for each row k */
627 for (k = 0; k < mbs; k++) {
628 /*initialize k-th row with elements nonzero in row perm(k) of A */
629 jmin = ai[perm_ptr[k]];
630 jmax = ai[perm_ptr[k] + 1];
631
632 ap = aa + jmin * bs2;
633 for (j = jmin; j < jmax; j++) {
634 vj = perm_ptr[aj[j]]; /* block col. index */
635 rtmp_ptr = rtmp + vj * bs2;
636 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637 }
638
639 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641 i = jl[k]; /* first row to be added to k_th row */
642
643 while (i < k) {
644 nexti = jl[i]; /* next row to be added to k_th row */
645
646 /* compute multiplier */
647 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
648
649 /* uik = -inv(Di)*U_bar(i,k) */
650 diag = ba + i * bs2;
651 u = ba + ili * bs2;
652 PetscCall(PetscArrayzero(uik, bs2));
653 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
654
655 /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657 PetscCall(PetscLogFlops(4.0 * bs * bs2));
658
659 /* update -U(i,k) */
660 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
661
662 /* add multiple of row i to k-th row ... */
663 jmin = ili + 1;
664 jmax = bi[i + 1];
665 if (jmin < jmax) {
666 for (j = jmin; j < jmax; j++) {
667 /* rtmp += -U(i,k)^T * U_bar(i,j) */
668 rtmp_ptr = rtmp + bj[j] * bs2;
669 u = ba + j * bs2;
670 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671 }
672 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
673
674 /* ... add i to row list for next nonzero entry */
675 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676 j = bj[jmin];
677 jl[i] = jl[j];
678 jl[j] = i; /* update jl */
679 }
680 i = nexti;
681 }
682
683 /* save nonzero entries in k-th row of U ... */
684
685 /* invert diagonal block */
686 diag = ba + k * bs2;
687 PetscCall(PetscArraycpy(diag, dk, bs2));
688
689 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
691
692 jmin = bi[k];
693 jmax = bi[k + 1];
694 if (jmin < jmax) {
695 for (j = jmin; j < jmax; j++) {
696 vj = bj[j]; /* block col. index of U */
697 u = ba + j * bs2;
698 rtmp_ptr = rtmp + vj * bs2;
699 for (k1 = 0; k1 < bs2; k1++) {
700 *u++ = *rtmp_ptr;
701 *rtmp_ptr++ = 0.0;
702 }
703 }
704
705 /* ... add k to row list for first nonzero entry in k-th row */
706 il[k] = jmin;
707 i = bj[jmin];
708 jl[k] = jl[i];
709 jl[i] = k;
710 }
711 }
712
713 PetscCall(PetscFree(rtmp));
714 PetscCall(PetscFree2(il, jl));
715 PetscCall(PetscFree3(dk, uik, work));
716 PetscCall(PetscFree(pivots));
717 if (a->permute) PetscCall(PetscFree(aa));
718
719 PetscCall(ISRestoreIndices(perm, &perm_ptr));
720
721 C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
722 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
724 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
725
726 C->assembled = PETSC_TRUE;
727 C->preallocated = PETSC_TRUE;
728
729 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730 PetscFunctionReturn(PETSC_SUCCESS);
731 }
732
MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo * info)733 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
734 {
735 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
736 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
737 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
738 PetscInt bs = A->rmap->bs, bs2 = a->bs2;
739 MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
740 MatScalar *u, *diag, *rtmp, *rtmp_ptr;
741 MatScalar *work;
742 PetscInt *pivots;
743 PetscBool allowzeropivot, zeropivotdetected;
744
745 PetscFunctionBegin;
746 PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
747 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
748 il[0] = 0;
749 for (i = 0; i < mbs; i++) jl[i] = mbs;
750
751 PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
752 PetscCall(PetscMalloc1(bs, &pivots));
753 allowzeropivot = PetscNot(A->erroriffailure);
754
755 ai = a->i;
756 aj = a->j;
757 aa = a->a;
758
759 /* for each row k */
760 for (k = 0; k < mbs; k++) {
761 /*initialize k-th row with elements nonzero in row k of A */
762 jmin = ai[k];
763 jmax = ai[k + 1];
764 ap = aa + jmin * bs2;
765 for (j = jmin; j < jmax; j++) {
766 vj = aj[j]; /* block col. index */
767 rtmp_ptr = rtmp + vj * bs2;
768 for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
769 }
770
771 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
772 PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
773 i = jl[k]; /* first row to be added to k_th row */
774
775 while (i < k) {
776 nexti = jl[i]; /* next row to be added to k_th row */
777
778 /* compute multiplier */
779 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
780
781 /* uik = -inv(Di)*U_bar(i,k) */
782 diag = ba + i * bs2;
783 u = ba + ili * bs2;
784 PetscCall(PetscArrayzero(uik, bs2));
785 PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
786
787 /* update D(k) += -U(i,k)^T * U_bar(i,k) */
788 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
789 PetscCall(PetscLogFlops(2.0 * bs * bs2));
790
791 /* update -U(i,k) */
792 PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
793
794 /* add multiple of row i to k-th row ... */
795 jmin = ili + 1;
796 jmax = bi[i + 1];
797 if (jmin < jmax) {
798 for (j = jmin; j < jmax; j++) {
799 /* rtmp += -U(i,k)^T * U_bar(i,j) */
800 rtmp_ptr = rtmp + bj[j] * bs2;
801 u = ba + j * bs2;
802 PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
803 }
804 PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
805
806 /* ... add i to row list for next nonzero entry */
807 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
808 j = bj[jmin];
809 jl[i] = jl[j];
810 jl[j] = i; /* update jl */
811 }
812 i = nexti;
813 }
814
815 /* save nonzero entries in k-th row of U ... */
816
817 /* invert diagonal block */
818 diag = ba + k * bs2;
819 PetscCall(PetscArraycpy(diag, dk, bs2));
820
821 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
822 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
823
824 jmin = bi[k];
825 jmax = bi[k + 1];
826 if (jmin < jmax) {
827 for (j = jmin; j < jmax; j++) {
828 vj = bj[j]; /* block col. index of U */
829 u = ba + j * bs2;
830 rtmp_ptr = rtmp + vj * bs2;
831 for (k1 = 0; k1 < bs2; k1++) {
832 *u++ = *rtmp_ptr;
833 *rtmp_ptr++ = 0.0;
834 }
835 }
836
837 /* ... add k to row list for first nonzero entry in k-th row */
838 il[k] = jmin;
839 i = bj[jmin];
840 jl[k] = jl[i];
841 jl[i] = k;
842 }
843 }
844
845 PetscCall(PetscFree(rtmp));
846 PetscCall(PetscFree2(il, jl));
847 PetscCall(PetscFree3(dk, uik, work));
848 PetscCall(PetscFree(pivots));
849
850 C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851 C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854 C->assembled = PETSC_TRUE;
855 C->preallocated = PETSC_TRUE;
856
857 PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
858 PetscFunctionReturn(PETSC_SUCCESS);
859 }
860
861 /*
862 Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
863 Version for blocks 2 by 2.
864 */
MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo * info)865 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
866 {
867 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
868 IS perm = b->row;
869 const PetscInt *ai, *aj, *perm_ptr;
870 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
871 PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
872 MatScalar *ba = b->a, *aa, *ap;
873 MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
874 PetscReal shift = info->shiftamount;
875 PetscBool allowzeropivot, zeropivotdetected;
876
877 PetscFunctionBegin;
878 allowzeropivot = PetscNot(A->erroriffailure);
879
880 /* initialization */
881 /* il and jl record the first nonzero element in each row of the accessing
882 window U(0:k, k:mbs-1).
883 jl: list of rows to be added to uneliminated rows
884 i>= k: jl(i) is the first row to be added to row i
885 i< k: jl(i) is the row following row i in some list of rows
886 jl(i) = mbs indicates the end of a list
887 il(i): points to the first nonzero element in columns k,...,mbs-1 of
888 row i of U */
889 PetscCall(PetscCalloc1(4 * mbs, &rtmp));
890 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
891 il[0] = 0;
892 for (i = 0; i < mbs; i++) jl[i] = mbs;
893
894 PetscCall(ISGetIndices(perm, &perm_ptr));
895
896 /* check permutation */
897 if (!a->permute) {
898 ai = a->i;
899 aj = a->j;
900 aa = a->a;
901 } else {
902 ai = a->inew;
903 aj = a->jnew;
904 PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
905 PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
906 PetscCall(PetscMalloc1(ai[mbs], &a2anew));
907 PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
908
909 for (i = 0; i < mbs; i++) {
910 jmin = ai[i];
911 jmax = ai[i + 1];
912 for (j = jmin; j < jmax; j++) {
913 while (a2anew[j] != j) {
914 k = a2anew[j];
915 a2anew[j] = a2anew[k];
916 a2anew[k] = k;
917 for (k1 = 0; k1 < 4; k1++) {
918 dk[k1] = aa[k * 4 + k1];
919 aa[k * 4 + k1] = aa[j * 4 + k1];
920 aa[j * 4 + k1] = dk[k1];
921 }
922 }
923 /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
924 if (i > aj[j]) {
925 ap = aa + j * 4; /* ptr to the beginning of the block */
926 dk[1] = ap[1]; /* swap ap[1] and ap[2] */
927 ap[1] = ap[2];
928 ap[2] = dk[1];
929 }
930 }
931 }
932 PetscCall(PetscFree(a2anew));
933 }
934
935 /* for each row k */
936 for (k = 0; k < mbs; k++) {
937 /*initialize k-th row with elements nonzero in row perm(k) of A */
938 jmin = ai[perm_ptr[k]];
939 jmax = ai[perm_ptr[k] + 1];
940 ap = aa + jmin * 4;
941 for (j = jmin; j < jmax; j++) {
942 vj = perm_ptr[aj[j]]; /* block col. index */
943 rtmp_ptr = rtmp + vj * 4;
944 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
945 }
946
947 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
948 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
949 i = jl[k]; /* first row to be added to k_th row */
950
951 while (i < k) {
952 nexti = jl[i]; /* next row to be added to k_th row */
953
954 /* compute multiplier */
955 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
956
957 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
958 diag = ba + i * 4;
959 u = ba + ili * 4;
960 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
961 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
962 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
963 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
964
965 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
966 dk[0] += uik[0] * u[0] + uik[1] * u[1];
967 dk[1] += uik[2] * u[0] + uik[3] * u[1];
968 dk[2] += uik[0] * u[2] + uik[1] * u[3];
969 dk[3] += uik[2] * u[2] + uik[3] * u[3];
970
971 PetscCall(PetscLogFlops(16.0 * 2.0));
972
973 /* update -U(i,k): ba[ili] = uik */
974 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
975
976 /* add multiple of row i to k-th row ... */
977 jmin = ili + 1;
978 jmax = bi[i + 1];
979 if (jmin < jmax) {
980 for (j = jmin; j < jmax; j++) {
981 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
982 rtmp_ptr = rtmp + bj[j] * 4;
983 u = ba + j * 4;
984 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
985 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
986 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
987 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
988 }
989 PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
990
991 /* ... add i to row list for next nonzero entry */
992 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993 j = bj[jmin];
994 jl[i] = jl[j];
995 jl[j] = i; /* update jl */
996 }
997 i = nexti;
998 }
999
1000 /* save nonzero entries in k-th row of U ... */
1001
1002 /* invert diagonal block */
1003 diag = ba + k * 4;
1004 PetscCall(PetscArraycpy(diag, dk, 4));
1005 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1006 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1007
1008 jmin = bi[k];
1009 jmax = bi[k + 1];
1010 if (jmin < jmax) {
1011 for (j = jmin; j < jmax; j++) {
1012 vj = bj[j]; /* block col. index of U */
1013 u = ba + j * 4;
1014 rtmp_ptr = rtmp + vj * 4;
1015 for (k1 = 0; k1 < 4; k1++) {
1016 *u++ = *rtmp_ptr;
1017 *rtmp_ptr++ = 0.0;
1018 }
1019 }
1020
1021 /* ... add k to row list for first nonzero entry in k-th row */
1022 il[k] = jmin;
1023 i = bj[jmin];
1024 jl[k] = jl[i];
1025 jl[i] = k;
1026 }
1027 }
1028
1029 PetscCall(PetscFree(rtmp));
1030 PetscCall(PetscFree2(il, jl));
1031 if (a->permute) PetscCall(PetscFree(aa));
1032 PetscCall(ISRestoreIndices(perm, &perm_ptr));
1033
1034 C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1035 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1036 C->assembled = PETSC_TRUE;
1037 C->preallocated = PETSC_TRUE;
1038
1039 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1040 PetscFunctionReturn(PETSC_SUCCESS);
1041 }
1042
1043 /*
1044 Version for when blocks are 2 by 2 Using natural ordering
1045 */
MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo * info)1046 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1047 {
1048 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1049 PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1050 PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1051 MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1052 MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1053 PetscReal shift = info->shiftamount;
1054 PetscBool allowzeropivot, zeropivotdetected;
1055
1056 PetscFunctionBegin;
1057 allowzeropivot = PetscNot(A->erroriffailure);
1058
1059 /* initialization */
1060 /* il and jl record the first nonzero element in each row of the accessing
1061 window U(0:k, k:mbs-1).
1062 jl: list of rows to be added to uneliminated rows
1063 i>= k: jl(i) is the first row to be added to row i
1064 i< k: jl(i) is the row following row i in some list of rows
1065 jl(i) = mbs indicates the end of a list
1066 il(i): points to the first nonzero element in columns k,...,mbs-1 of
1067 row i of U */
1068 PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1069 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1070 il[0] = 0;
1071 for (i = 0; i < mbs; i++) jl[i] = mbs;
1072
1073 ai = a->i;
1074 aj = a->j;
1075 aa = a->a;
1076
1077 /* for each row k */
1078 for (k = 0; k < mbs; k++) {
1079 /*initialize k-th row with elements nonzero in row k of A */
1080 jmin = ai[k];
1081 jmax = ai[k + 1];
1082 ap = aa + jmin * 4;
1083 for (j = jmin; j < jmax; j++) {
1084 vj = aj[j]; /* block col. index */
1085 rtmp_ptr = rtmp + vj * 4;
1086 for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1087 }
1088
1089 /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1090 PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1091 i = jl[k]; /* first row to be added to k_th row */
1092
1093 while (i < k) {
1094 nexti = jl[i]; /* next row to be added to k_th row */
1095
1096 /* compute multiplier */
1097 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1098
1099 /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1100 diag = ba + i * 4;
1101 u = ba + ili * 4;
1102 uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1103 uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1104 uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1105 uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1106
1107 /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1108 dk[0] += uik[0] * u[0] + uik[1] * u[1];
1109 dk[1] += uik[2] * u[0] + uik[3] * u[1];
1110 dk[2] += uik[0] * u[2] + uik[1] * u[3];
1111 dk[3] += uik[2] * u[2] + uik[3] * u[3];
1112
1113 PetscCall(PetscLogFlops(16.0 * 2.0));
1114
1115 /* update -U(i,k): ba[ili] = uik */
1116 PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1117
1118 /* add multiple of row i to k-th row ... */
1119 jmin = ili + 1;
1120 jmax = bi[i + 1];
1121 if (jmin < jmax) {
1122 for (j = jmin; j < jmax; j++) {
1123 /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1124 rtmp_ptr = rtmp + bj[j] * 4;
1125 u = ba + j * 4;
1126 rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1127 rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1128 rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1129 rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1130 }
1131 PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1132
1133 /* ... add i to row list for next nonzero entry */
1134 il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1135 j = bj[jmin];
1136 jl[i] = jl[j];
1137 jl[j] = i; /* update jl */
1138 }
1139 i = nexti;
1140 }
1141
1142 /* save nonzero entries in k-th row of U ... */
1143
1144 /* invert diagonal block */
1145 diag = ba + k * 4;
1146 PetscCall(PetscArraycpy(diag, dk, 4));
1147 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1148 if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1149
1150 jmin = bi[k];
1151 jmax = bi[k + 1];
1152 if (jmin < jmax) {
1153 for (j = jmin; j < jmax; j++) {
1154 vj = bj[j]; /* block col. index of U */
1155 u = ba + j * 4;
1156 rtmp_ptr = rtmp + vj * 4;
1157 for (k1 = 0; k1 < 4; k1++) {
1158 *u++ = *rtmp_ptr;
1159 *rtmp_ptr++ = 0.0;
1160 }
1161 }
1162
1163 /* ... add k to row list for first nonzero entry in k-th row */
1164 il[k] = jmin;
1165 i = bj[jmin];
1166 jl[k] = jl[i];
1167 jl[i] = k;
1168 }
1169 }
1170
1171 PetscCall(PetscFree(rtmp));
1172 PetscCall(PetscFree2(il, jl));
1173
1174 C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175 C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178 C->assembled = PETSC_TRUE;
1179 C->preallocated = PETSC_TRUE;
1180
1181 PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1182 PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184
1185 /*
1186 Numeric U^T*D*U factorization for SBAIJ format.
1187 Version for blocks are 1 by 1.
1188 */
MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo * info)1189 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1190 {
1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1192 IS ip = b->row;
1193 const PetscInt *ai, *aj, *rip;
1194 PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1195 PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1196 MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1197 PetscReal rs;
1198 FactorShiftCtx sctx;
1199
1200 PetscFunctionBegin;
1201 /* MatPivotSetUp(): initialize shift context sctx */
1202 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1203
1204 PetscCall(ISGetIndices(ip, &rip));
1205 if (!a->permute) {
1206 ai = a->i;
1207 aj = a->j;
1208 aa = a->a;
1209 } else {
1210 ai = a->inew;
1211 aj = a->jnew;
1212 nz = ai[mbs];
1213 PetscCall(PetscMalloc1(nz, &aa));
1214 a2anew = a->a2anew;
1215 bval = a->a;
1216 for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1217 }
1218
1219 /* initialization */
1220 /* il and jl record the first nonzero element in each row of the accessing
1221 window U(0:k, k:mbs-1).
1222 jl: list of rows to be added to uneliminated rows
1223 i>= k: jl(i) is the first row to be added to row i
1224 i< k: jl(i) is the row following row i in some list of rows
1225 jl(i) = mbs indicates the end of a list
1226 il(i): points to the first nonzero element in columns k,...,mbs-1 of
1227 row i of U */
1228 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1229
1230 do {
1231 sctx.newshift = PETSC_FALSE;
1232 il[0] = 0;
1233 for (i = 0; i < mbs; i++) {
1234 rtmp[i] = 0.0;
1235 jl[i] = mbs;
1236 }
1237
1238 for (k = 0; k < mbs; k++) {
1239 /*initialize k-th row by the perm[k]-th row of A */
1240 jmin = ai[rip[k]];
1241 jmax = ai[rip[k] + 1];
1242 bval = ba + bi[k];
1243 for (j = jmin; j < jmax; j++) {
1244 col = rip[aj[j]];
1245 rtmp[col] = aa[j];
1246 *bval++ = 0.0; /* for in-place factorization */
1247 }
1248
1249 /* shift the diagonal of the matrix */
1250 if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1251
1252 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1253 dk = rtmp[k];
1254 i = jl[k]; /* first row to be added to k_th row */
1255
1256 while (i < k) {
1257 nexti = jl[i]; /* next row to be added to k_th row */
1258
1259 /* compute multiplier, update diag(k) and U(i,k) */
1260 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1261 uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1262 dk += uikdi * ba[ili];
1263 ba[ili] = uikdi; /* -U(i,k) */
1264
1265 /* add multiple of row i to k-th row */
1266 jmin = ili + 1;
1267 jmax = bi[i + 1];
1268 if (jmin < jmax) {
1269 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1270 PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1271
1272 /* update il and jl for row i */
1273 il[i] = jmin;
1274 j = bj[jmin];
1275 jl[i] = jl[j];
1276 jl[j] = i;
1277 }
1278 i = nexti;
1279 }
1280
1281 /* shift the diagonals when zero pivot is detected */
1282 /* compute rs=sum of abs(off-diagonal) */
1283 rs = 0.0;
1284 jmin = bi[k] + 1;
1285 nz = bi[k + 1] - jmin;
1286 if (nz) {
1287 bcol = bj + jmin;
1288 while (nz--) {
1289 rs += PetscAbsScalar(rtmp[*bcol]);
1290 bcol++;
1291 }
1292 }
1293
1294 sctx.rs = rs;
1295 sctx.pv = dk;
1296 PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1297 if (sctx.newshift) break; /* sctx.shift_amount is updated */
1298 dk = sctx.pv;
1299
1300 /* copy data into U(k,:) */
1301 ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1302 jmin = bi[k] + 1;
1303 jmax = bi[k + 1];
1304 if (jmin < jmax) {
1305 for (j = jmin; j < jmax; j++) {
1306 col = bj[j];
1307 ba[j] = rtmp[col];
1308 rtmp[col] = 0.0;
1309 }
1310 /* add the k-th row into il and jl */
1311 il[k] = jmin;
1312 i = bj[jmin];
1313 jl[k] = jl[i];
1314 jl[i] = k;
1315 }
1316 }
1317 } while (sctx.newshift);
1318 PetscCall(PetscFree3(rtmp, il, jl));
1319 if (a->permute) PetscCall(PetscFree(aa));
1320
1321 PetscCall(ISRestoreIndices(ip, &rip));
1322
1323 C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1324 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1325 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1326 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1327 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1328 C->assembled = PETSC_TRUE;
1329 C->preallocated = PETSC_TRUE;
1330
1331 PetscCall(PetscLogFlops(C->rmap->N));
1332 if (sctx.nshift) {
1333 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1334 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1335 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1336 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1337 }
1338 }
1339 PetscFunctionReturn(PETSC_SUCCESS);
1340 }
1341
1342 /*
1343 Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1344 Modified from MatCholeskyFactorNumeric_SeqAIJ()
1345 */
MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo * info)1346 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1347 {
1348 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1349 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1350 PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1351 PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1352 PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1353 MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1354 FactorShiftCtx sctx;
1355 PetscReal rs;
1356 MatScalar d, *v;
1357 const PetscInt *adiag;
1358
1359 PetscFunctionBegin;
1360 PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, &adiag, NULL));
1361 PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1362
1363 /* MatPivotSetUp(): initialize shift context sctx */
1364 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1365
1366 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1367 sctx.shift_top = info->zeropivot;
1368
1369 PetscCall(PetscArrayzero(rtmp, mbs));
1370
1371 for (i = 0; i < mbs; i++) {
1372 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1373 d = aa[adiag[i]];
1374 rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1375 ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1376 v = aa + ai[i] + 1;
1377 nz = ai[i + 1] - ai[i] - 1;
1378 for (j = 0; j < nz; j++) {
1379 rtmp[i] += PetscAbsScalar(v[j]);
1380 rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1381 }
1382 if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1383 }
1384 sctx.shift_top *= 1.1;
1385 sctx.nshift_max = 5;
1386 sctx.shift_lo = 0.;
1387 sctx.shift_hi = 1.;
1388 }
1389
1390 /* allocate working arrays
1391 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1392 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1393 */
1394 do {
1395 sctx.newshift = PETSC_FALSE;
1396
1397 for (i = 0; i < mbs; i++) c2r[i] = mbs;
1398 if (mbs) il[0] = 0;
1399
1400 for (k = 0; k < mbs; k++) {
1401 /* zero rtmp */
1402 nz = bi[k + 1] - bi[k];
1403 bjtmp = bj + bi[k];
1404 for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1405
1406 /* load in initial unfactored row */
1407 bval = ba + bi[k];
1408 jmin = ai[k];
1409 jmax = ai[k + 1];
1410 for (j = jmin; j < jmax; j++) {
1411 col = aj[j];
1412 rtmp[col] = aa[j];
1413 *bval++ = 0.0; /* for in-place factorization */
1414 }
1415 /* shift the diagonal of the matrix: ZeropivotApply() */
1416 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1417
1418 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1419 dk = rtmp[k];
1420 i = c2r[k]; /* first row to be added to k_th row */
1421
1422 while (i < k) {
1423 nexti = c2r[i]; /* next row to be added to k_th row */
1424
1425 /* compute multiplier, update diag(k) and U(i,k) */
1426 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1427 uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1428 dk += uikdi * ba[ili]; /* update diag[k] */
1429 ba[ili] = uikdi; /* -U(i,k) */
1430
1431 /* add multiple of row i to k-th row */
1432 jmin = ili + 1;
1433 jmax = bi[i + 1];
1434 if (jmin < jmax) {
1435 for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1436 /* update il and c2r for row i */
1437 il[i] = jmin;
1438 j = bj[jmin];
1439 c2r[i] = c2r[j];
1440 c2r[j] = i;
1441 }
1442 i = nexti;
1443 }
1444
1445 /* copy data into U(k,:) */
1446 rs = 0.0;
1447 jmin = bi[k];
1448 jmax = bi[k + 1] - 1;
1449 if (jmin < jmax) {
1450 for (j = jmin; j < jmax; j++) {
1451 col = bj[j];
1452 ba[j] = rtmp[col];
1453 rs += PetscAbsScalar(ba[j]);
1454 }
1455 /* add the k-th row into il and c2r */
1456 il[k] = jmin;
1457 i = bj[jmin];
1458 c2r[k] = c2r[i];
1459 c2r[i] = k;
1460 }
1461
1462 sctx.rs = rs;
1463 sctx.pv = dk;
1464 PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1465 if (sctx.newshift) break;
1466 dk = sctx.pv;
1467
1468 ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1469 }
1470 } while (sctx.newshift);
1471
1472 PetscCall(PetscFree3(rtmp, il, c2r));
1473
1474 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475 B->ops->solves = MatSolves_SeqSBAIJ_1;
1476 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1477 B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1478 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1479 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1480
1481 B->assembled = PETSC_TRUE;
1482 B->preallocated = PETSC_TRUE;
1483
1484 PetscCall(PetscLogFlops(B->rmap->n));
1485
1486 /* MatPivotView() */
1487 if (sctx.nshift) {
1488 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1489 PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1490 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1491 PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1492 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1493 PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1494 }
1495 }
1496 PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498
MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo * info)1499 PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1500 {
1501 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1502 PetscInt i, j, mbs = a->mbs;
1503 PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1504 PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1505 MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1506 PetscReal rs;
1507 FactorShiftCtx sctx;
1508
1509 PetscFunctionBegin;
1510 /* MatPivotSetUp(): initialize shift context sctx */
1511 PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1512
1513 /* initialization */
1514 /* il and jl record the first nonzero element in each row of the accessing
1515 window U(0:k, k:mbs-1).
1516 jl: list of rows to be added to uneliminated rows
1517 i>= k: jl(i) is the first row to be added to row i
1518 i< k: jl(i) is the row following row i in some list of rows
1519 jl(i) = mbs indicates the end of a list
1520 il(i): points to the first nonzero element in U(i,k:mbs-1)
1521 */
1522 PetscCall(PetscMalloc1(mbs, &rtmp));
1523 PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1524
1525 do {
1526 sctx.newshift = PETSC_FALSE;
1527 il[0] = 0;
1528 for (i = 0; i < mbs; i++) {
1529 rtmp[i] = 0.0;
1530 jl[i] = mbs;
1531 }
1532
1533 for (k = 0; k < mbs; k++) {
1534 /*initialize k-th row with elements nonzero in row perm(k) of A */
1535 nz = ai[k + 1] - ai[k];
1536 acol = aj + ai[k];
1537 aval = aa + ai[k];
1538 bval = ba + bi[k];
1539 while (nz--) {
1540 rtmp[*acol++] = *aval++;
1541 *bval++ = 0.0; /* for in-place factorization */
1542 }
1543
1544 /* shift the diagonal of the matrix */
1545 if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1546
1547 /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1548 dk = rtmp[k];
1549 i = jl[k]; /* first row to be added to k_th row */
1550
1551 while (i < k) {
1552 nexti = jl[i]; /* next row to be added to k_th row */
1553 /* compute multiplier, update D(k) and U(i,k) */
1554 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1555 uikdi = -ba[ili] * ba[bi[i]];
1556 dk += uikdi * ba[ili];
1557 ba[ili] = uikdi; /* -U(i,k) */
1558
1559 /* add multiple of row i to k-th row ... */
1560 jmin = ili + 1;
1561 nz = bi[i + 1] - jmin;
1562 if (nz > 0) {
1563 bcol = bj + jmin;
1564 bval = ba + jmin;
1565 PetscCall(PetscLogFlops(2.0 * nz));
1566 while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1567
1568 /* update il and jl for i-th row */
1569 il[i] = jmin;
1570 j = bj[jmin];
1571 jl[i] = jl[j];
1572 jl[j] = i;
1573 }
1574 i = nexti;
1575 }
1576
1577 /* shift the diagonals when zero pivot is detected */
1578 /* compute rs=sum of abs(off-diagonal) */
1579 rs = 0.0;
1580 jmin = bi[k] + 1;
1581 nz = bi[k + 1] - jmin;
1582 if (nz) {
1583 bcol = bj + jmin;
1584 while (nz--) {
1585 rs += PetscAbsScalar(rtmp[*bcol]);
1586 bcol++;
1587 }
1588 }
1589
1590 sctx.rs = rs;
1591 sctx.pv = dk;
1592 PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1593 if (sctx.newshift) break; /* sctx.shift_amount is updated */
1594 dk = sctx.pv;
1595
1596 /* copy data into U(k,:) */
1597 ba[bi[k]] = 1.0 / dk;
1598 jmin = bi[k] + 1;
1599 nz = bi[k + 1] - jmin;
1600 if (nz) {
1601 bcol = bj + jmin;
1602 bval = ba + jmin;
1603 while (nz--) {
1604 *bval++ = rtmp[*bcol];
1605 rtmp[*bcol++] = 0.0;
1606 }
1607 /* add k-th row into il and jl */
1608 il[k] = jmin;
1609 i = bj[jmin];
1610 jl[k] = jl[i];
1611 jl[i] = k;
1612 }
1613 } /* end of for (k = 0; k<mbs; k++) */
1614 } while (sctx.newshift);
1615 PetscCall(PetscFree(rtmp));
1616 PetscCall(PetscFree2(il, jl));
1617
1618 C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619 C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1620 C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621 C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1622 C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1623
1624 C->assembled = PETSC_TRUE;
1625 C->preallocated = PETSC_TRUE;
1626
1627 PetscCall(PetscLogFlops(C->rmap->N));
1628 if (sctx.nshift) {
1629 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1630 PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1631 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1632 PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1633 }
1634 }
1635 PetscFunctionReturn(PETSC_SUCCESS);
1636 }
1637
MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo * info)1638 PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1639 {
1640 Mat C;
1641
1642 PetscFunctionBegin;
1643 PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1644 PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1645 PetscCall(MatCholeskyFactorNumeric(C, A, info));
1646
1647 A->ops->solve = C->ops->solve;
1648 A->ops->solvetranspose = C->ops->solvetranspose;
1649
1650 PetscCall(MatHeaderMerge(A, &C));
1651 PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653