15d517e7eSBarry Smith /*
2ec3a8b7bSBarry Smith Factorization code for BAIJ format.
35d517e7eSBarry Smith */
4c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
5af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
6ec3a8b7bSBarry Smith
MatLUFactorNumeric_SeqBAIJ_2(Mat B,Mat A,const MatFactorInfo * info)7d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B, Mat A, const MatFactorInfo *info)
8d71ae5a4SJacob Faibussowitsch {
9b588c5a2SHong Zhang Mat C = B;
10b588c5a2SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
11b588c5a2SHong Zhang IS isrow = b->row, isicol = b->icol;
125a586d82SBarry Smith const PetscInt *r, *ic;
13bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
14bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
15bbd65245SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *pv;
16bbd65245SShri Abhyankar MatScalar *aa = a->a, *v;
17182b8fbaSHong Zhang PetscReal shift = info->shiftamount;
18ff6a9541SJacob Faibussowitsch const PetscBool allowzeropivot = PetscNot(A->erroriffailure);
19b588c5a2SHong Zhang
20b588c5a2SHong Zhang PetscFunctionBegin;
219566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r));
229566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic));
23b588c5a2SHong Zhang
244c000e2eSHong Zhang /* generate work space needed by the factorization */
259566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
269566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n));
27b588c5a2SHong Zhang
28ff6a9541SJacob Faibussowitsch for (PetscInt i = 0; i < n; i++) {
29b588c5a2SHong Zhang /* zero rtmp */
30b588c5a2SHong Zhang /* L part */
31ff6a9541SJacob Faibussowitsch PetscInt *pj;
32ff6a9541SJacob Faibussowitsch PetscInt nzL, nz = bi[i + 1] - bi[i];
33b588c5a2SHong Zhang bjtmp = bj + bi[i];
34ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
35b588c5a2SHong Zhang
36b588c5a2SHong Zhang /* U part */
370c4413a7SShri Abhyankar nz = bdiag[i] - bdiag[i + 1];
380c4413a7SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1;
39ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
400c4413a7SShri Abhyankar
410c4413a7SShri Abhyankar /* load in initial (unfactored row) */
420c4413a7SShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]];
430c4413a7SShri Abhyankar ajtmp = aj + ai[r[i]];
440c4413a7SShri Abhyankar v = aa + bs2 * ai[r[i]];
45ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2));
460c4413a7SShri Abhyankar
470c4413a7SShri Abhyankar /* elimination */
480c4413a7SShri Abhyankar bjtmp = bj + bi[i];
490c4413a7SShri Abhyankar nzL = bi[i + 1] - bi[i];
50ff6a9541SJacob Faibussowitsch for (PetscInt k = 0; k < nzL; k++) {
51ff6a9541SJacob Faibussowitsch PetscBool flg = PETSC_FALSE;
52ff6a9541SJacob Faibussowitsch PetscInt row = bjtmp[k];
53ff6a9541SJacob Faibussowitsch
540c4413a7SShri Abhyankar pc = rtmp + bs2 * row;
55ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < bs2; j++) {
56c35f09e5SBarry Smith if (pc[j] != (PetscScalar)0.0) {
57ff6a9541SJacob Faibussowitsch flg = PETSC_TRUE;
58c35f09e5SBarry Smith break;
59c35f09e5SBarry Smith }
60c35f09e5SBarry Smith }
610c4413a7SShri Abhyankar if (flg) {
620c4413a7SShri Abhyankar pv = b->a + bs2 * bdiag[row];
6396b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
649566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));
650c4413a7SShri Abhyankar
66a5b23f4aSJose E. Roman pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
670c4413a7SShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1);
680c4413a7SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */
69ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) {
7096b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
710c4413a7SShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
720c4413a7SShri Abhyankar v = rtmp + 4 * pj[j];
739566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
740c4413a7SShri Abhyankar pv += 4;
750c4413a7SShri Abhyankar }
769566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
770c4413a7SShri Abhyankar }
780c4413a7SShri Abhyankar }
790c4413a7SShri Abhyankar
800c4413a7SShri Abhyankar /* finished row so stick it into b->a */
810c4413a7SShri Abhyankar /* L part */
820c4413a7SShri Abhyankar pv = b->a + bs2 * bi[i];
830c4413a7SShri Abhyankar pj = b->j + bi[i];
840c4413a7SShri Abhyankar nz = bi[i + 1] - bi[i];
85ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
860c4413a7SShri Abhyankar
87a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */
880c4413a7SShri Abhyankar pv = b->a + bs2 * bdiag[i];
890c4413a7SShri Abhyankar pj = b->j + bdiag[i];
909566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
91ff6a9541SJacob Faibussowitsch {
92ff6a9541SJacob Faibussowitsch PetscBool zeropivotdetected;
93ff6a9541SJacob Faibussowitsch
949566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
957b6c816cSBarry Smith if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
96ff6a9541SJacob Faibussowitsch }
970c4413a7SShri Abhyankar
980c4413a7SShri Abhyankar /* U part */
990c4413a7SShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1);
1000c4413a7SShri Abhyankar pj = b->j + bdiag[i + 1] + 1;
1010c4413a7SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1;
102ff6a9541SJacob Faibussowitsch for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
1030c4413a7SShri Abhyankar }
1040c4413a7SShri Abhyankar
1059566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork));
1069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic));
1079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r));
10826fbe8dcSKarl Rupp
1094dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_2;
1104dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
1110c4413a7SShri Abhyankar C->assembled = PETSC_TRUE;
11226fbe8dcSKarl Rupp
1139566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
1143ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1150c4413a7SShri Abhyankar }
1160c4413a7SShri Abhyankar
MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B,Mat A,const MatFactorInfo * info)117d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
118d71ae5a4SJacob Faibussowitsch {
1194c000e2eSHong Zhang Mat C = B;
1204c000e2eSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
121bbd65245SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj;
122bbd65245SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
123bbd65245SShri Abhyankar const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
124bbd65245SShri Abhyankar MatScalar *rtmp, *pc, *mwork, *pv;
125bbd65245SShri Abhyankar MatScalar *aa = a->a, *v;
126bbd65245SShri Abhyankar PetscInt flg;
127182b8fbaSHong Zhang PetscReal shift = info->shiftamount;
128a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected;
1294c000e2eSHong Zhang
1304c000e2eSHong Zhang PetscFunctionBegin;
1310164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure);
1320164db54SHong Zhang
1334c000e2eSHong Zhang /* generate work space needed by the factorization */
1349566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
1359566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(rtmp, bs2 * n));
1364c000e2eSHong Zhang
1374c000e2eSHong Zhang for (i = 0; i < n; i++) {
1384c000e2eSHong Zhang /* zero rtmp */
1394c000e2eSHong Zhang /* L part */
1404c000e2eSHong Zhang nz = bi[i + 1] - bi[i];
1414c000e2eSHong Zhang bjtmp = bj + bi[i];
14248a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
1434c000e2eSHong Zhang
1444c000e2eSHong Zhang /* U part */
145b2b2dd24SShri Abhyankar nz = bdiag[i] - bdiag[i + 1];
146b2b2dd24SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1;
14748a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));
148b2b2dd24SShri Abhyankar
149b2b2dd24SShri Abhyankar /* load in initial (unfactored row) */
150b2b2dd24SShri Abhyankar nz = ai[i + 1] - ai[i];
151b2b2dd24SShri Abhyankar ajtmp = aj + ai[i];
152b2b2dd24SShri Abhyankar v = aa + bs2 * ai[i];
15348a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2));
154b2b2dd24SShri Abhyankar
155b2b2dd24SShri Abhyankar /* elimination */
156b2b2dd24SShri Abhyankar bjtmp = bj + bi[i];
157b2b2dd24SShri Abhyankar nzL = bi[i + 1] - bi[i];
158b2b2dd24SShri Abhyankar for (k = 0; k < nzL; k++) {
159b2b2dd24SShri Abhyankar row = bjtmp[k];
160b2b2dd24SShri Abhyankar pc = rtmp + bs2 * row;
161c35f09e5SBarry Smith for (flg = 0, j = 0; j < bs2; j++) {
162c35f09e5SBarry Smith if (pc[j] != (PetscScalar)0.0) {
163c35f09e5SBarry Smith flg = 1;
164c35f09e5SBarry Smith break;
165c35f09e5SBarry Smith }
166c35f09e5SBarry Smith }
167b2b2dd24SShri Abhyankar if (flg) {
168b2b2dd24SShri Abhyankar pv = b->a + bs2 * bdiag[row];
16996b95a6bSBarry Smith /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
1709566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));
171b2b2dd24SShri Abhyankar
172b2b2dd24SShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
173b2b2dd24SShri Abhyankar pv = b->a + bs2 * (bdiag[row + 1] + 1);
174b2b2dd24SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
175b2b2dd24SShri Abhyankar for (j = 0; j < nz; j++) {
17696b95a6bSBarry Smith /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
177b2b2dd24SShri Abhyankar /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
178b2b2dd24SShri Abhyankar v = rtmp + 4 * pj[j];
1799566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
180b2b2dd24SShri Abhyankar pv += 4;
181b2b2dd24SShri Abhyankar }
1829566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
183b2b2dd24SShri Abhyankar }
184b2b2dd24SShri Abhyankar }
185b2b2dd24SShri Abhyankar
186b2b2dd24SShri Abhyankar /* finished row so stick it into b->a */
187b2b2dd24SShri Abhyankar /* L part */
188b2b2dd24SShri Abhyankar pv = b->a + bs2 * bi[i];
189b2b2dd24SShri Abhyankar pj = b->j + bi[i];
190b2b2dd24SShri Abhyankar nz = bi[i + 1] - bi[i];
19148a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
192b2b2dd24SShri Abhyankar
193a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */
194b2b2dd24SShri Abhyankar pv = b->a + bs2 * bdiag[i];
195b2b2dd24SShri Abhyankar pj = b->j + bdiag[i];
1969566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
1979566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
1987b6c816cSBarry Smith if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
199b2b2dd24SShri Abhyankar
200b2b2dd24SShri Abhyankar /* U part */
201b2b2dd24SShri Abhyankar /*
202b2b2dd24SShri Abhyankar pv = b->a + bs2*bi[2*n-i];
203b2b2dd24SShri Abhyankar pj = b->j + bi[2*n-i];
204b2b2dd24SShri Abhyankar nz = bi[2*n-i+1] - bi[2*n-i] - 1;
205b2b2dd24SShri Abhyankar */
206b2b2dd24SShri Abhyankar pv = b->a + bs2 * (bdiag[i + 1] + 1);
207b2b2dd24SShri Abhyankar pj = b->j + bdiag[i + 1] + 1;
208b2b2dd24SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1;
20948a46eb9SPierre Jolivet for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
210b2b2dd24SShri Abhyankar }
2119566063dSJacob Faibussowitsch PetscCall(PetscFree2(rtmp, mwork));
212ae3d28f0SHong Zhang
2134dd39f65SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering;
2149f5c0bcdSBarry Smith C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_2_NaturalOrdering;
2159f5c0bcdSBarry Smith C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering;
2164dd39f65SShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
217b2b2dd24SShri Abhyankar C->assembled = PETSC_TRUE;
21826fbe8dcSKarl Rupp
2199566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
2203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
221b2b2dd24SShri Abhyankar }
222b2b2dd24SShri Abhyankar
MatILUFactorNumeric_SeqBAIJ_2_inplace(Mat B,Mat A,const MatFactorInfo * info)223421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_inplace(Mat B, Mat A, const MatFactorInfo *info)
224d71ae5a4SJacob Faibussowitsch {
225719d5645SBarry Smith Mat C = B;
2264eeb42bcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
2277cdf2dbbSSatish Balay IS isrow = b->row, isicol = b->icol;
2285d0c19d7SBarry Smith const PetscInt *r, *ic;
2295d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j;
230690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row;
231421480d9SBarry Smith PetscInt idx, *ai = a->i, *aj = a->j, *pj;
232421480d9SBarry Smith const PetscInt *diag_offset;
233329f5518SBarry Smith MatScalar *pv, *v, *rtmp, m1, m2, m3, m4, *pc, *w, *x, x1, x2, x3, x4;
2342515f8d2SSatish Balay MatScalar p1, p2, p3, p4;
2353f1db9ecSBarry Smith MatScalar *ba = b->a, *aa = a->a;
236182b8fbaSHong Zhang PetscReal shift = info->shiftamount;
237a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected;
2384eeb42bcSBarry Smith
2393a40ed3dSBarry Smith PetscFunctionBegin;
240421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
241421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE;
242421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
243421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU;
2440164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure);
2459566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r));
2469566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic));
2479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));
2484eeb42bcSBarry Smith
2494eeb42bcSBarry Smith for (i = 0; i < n; i++) {
2504078e994SBarry Smith nz = bi[i + 1] - bi[i];
2514078e994SBarry Smith ajtmp = bj + bi[i];
2524eeb42bcSBarry Smith for (j = 0; j < nz; j++) {
2539371c9d4SSatish Balay x = rtmp + 4 * ajtmp[j];
2549371c9d4SSatish Balay x[0] = x[1] = x[2] = x[3] = 0.0;
2554eeb42bcSBarry Smith }
2564eeb42bcSBarry Smith /* load in initial (unfactored row) */
2574eeb42bcSBarry Smith idx = r[i];
2584078e994SBarry Smith nz = ai[idx + 1] - ai[idx];
2594078e994SBarry Smith ajtmpold = aj + ai[idx];
2604078e994SBarry Smith v = aa + 4 * ai[idx];
2614eeb42bcSBarry Smith for (j = 0; j < nz; j++) {
2624eeb42bcSBarry Smith x = rtmp + 4 * ic[ajtmpold[j]];
2639371c9d4SSatish Balay x[0] = v[0];
2649371c9d4SSatish Balay x[1] = v[1];
2659371c9d4SSatish Balay x[2] = v[2];
2669371c9d4SSatish Balay x[3] = v[3];
2674eeb42bcSBarry Smith v += 4;
2684eeb42bcSBarry Smith }
2694eeb42bcSBarry Smith row = *ajtmp++;
2704eeb42bcSBarry Smith while (row < i) {
2714eeb42bcSBarry Smith pc = rtmp + 4 * row;
2729371c9d4SSatish Balay p1 = pc[0];
2739371c9d4SSatish Balay p2 = pc[1];
2749371c9d4SSatish Balay p3 = pc[2];
2759371c9d4SSatish Balay p4 = pc[3];
276d4a378daSJed Brown if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
2774078e994SBarry Smith pv = ba + 4 * diag_offset[row];
2784078e994SBarry Smith pj = bj + diag_offset[row] + 1;
2799371c9d4SSatish Balay x1 = pv[0];
2809371c9d4SSatish Balay x2 = pv[1];
2819371c9d4SSatish Balay x3 = pv[2];
2829371c9d4SSatish Balay x4 = pv[3];
2834eeb42bcSBarry Smith pc[0] = m1 = p1 * x1 + p3 * x2;
2844eeb42bcSBarry Smith pc[1] = m2 = p2 * x1 + p4 * x2;
2854eeb42bcSBarry Smith pc[2] = m3 = p1 * x3 + p3 * x4;
2864eeb42bcSBarry Smith pc[3] = m4 = p2 * x3 + p4 * x4;
2874078e994SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1;
2884eeb42bcSBarry Smith pv += 4;
2894eeb42bcSBarry Smith for (j = 0; j < nz; j++) {
2909371c9d4SSatish Balay x1 = pv[0];
2919371c9d4SSatish Balay x2 = pv[1];
2929371c9d4SSatish Balay x3 = pv[2];
2939371c9d4SSatish Balay x4 = pv[3];
2944eeb42bcSBarry Smith x = rtmp + 4 * pj[j];
2954eeb42bcSBarry Smith x[0] -= m1 * x1 + m3 * x2;
2964eeb42bcSBarry Smith x[1] -= m2 * x1 + m4 * x2;
2974eeb42bcSBarry Smith x[2] -= m1 * x3 + m3 * x4;
2984eeb42bcSBarry Smith x[3] -= m2 * x3 + m4 * x4;
2994eeb42bcSBarry Smith pv += 4;
3004eeb42bcSBarry Smith }
3019566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12.0));
3024eeb42bcSBarry Smith }
3034eeb42bcSBarry Smith row = *ajtmp++;
3044eeb42bcSBarry Smith }
3054eeb42bcSBarry Smith /* finished row so stick it into b->a */
3064078e994SBarry Smith pv = ba + 4 * bi[i];
3074078e994SBarry Smith pj = bj + bi[i];
3084078e994SBarry Smith nz = bi[i + 1] - bi[i];
3094eeb42bcSBarry Smith for (j = 0; j < nz; j++) {
3104eeb42bcSBarry Smith x = rtmp + 4 * pj[j];
3119371c9d4SSatish Balay pv[0] = x[0];
3129371c9d4SSatish Balay pv[1] = x[1];
3139371c9d4SSatish Balay pv[2] = x[2];
3149371c9d4SSatish Balay pv[3] = x[3];
3154eeb42bcSBarry Smith pv += 4;
3164eeb42bcSBarry Smith }
3174eeb42bcSBarry Smith /* invert diagonal block */
3184078e994SBarry Smith w = ba + 4 * diag_offset[i];
3199566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
3207b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3214eeb42bcSBarry Smith }
3224eeb42bcSBarry Smith
3239566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp));
3249566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic));
3259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r));
32626fbe8dcSKarl Rupp
32706e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_2_inplace;
32806e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace;
3294eeb42bcSBarry Smith C->assembled = PETSC_TRUE;
33026fbe8dcSKarl Rupp
3319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
3323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
3334eeb42bcSBarry Smith }
3344cd38560SBarry Smith /*
3354cd38560SBarry Smith Version for when blocks are 2 by 2 Using natural ordering
3364cd38560SBarry Smith */
MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo * info)337421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
338d71ae5a4SJacob Faibussowitsch {
3394cd38560SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
340690b6cddSBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j;
341690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row;
342421480d9SBarry Smith PetscInt *ai = a->i, *aj = a->j, *pj;
343421480d9SBarry Smith const PetscInt *diag_offset;
344329f5518SBarry Smith MatScalar *pv, *v, *rtmp, *pc, *w, *x;
3452515f8d2SSatish Balay MatScalar p1, p2, p3, p4, m1, m2, m3, m4, x1, x2, x3, x4;
3464cd38560SBarry Smith MatScalar *ba = b->a, *aa = a->a;
347182b8fbaSHong Zhang PetscReal shift = info->shiftamount;
348a455e926SHong Zhang PetscBool allowzeropivot, zeropivotdetected;
3494cd38560SBarry Smith
3504cd38560SBarry Smith PetscFunctionBegin;
351421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
352421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE;
353421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
354421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU;
3550164db54SHong Zhang allowzeropivot = PetscNot(A->erroriffailure);
3569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));
3574cd38560SBarry Smith for (i = 0; i < n; i++) {
3584cd38560SBarry Smith nz = bi[i + 1] - bi[i];
3594cd38560SBarry Smith ajtmp = bj + bi[i];
3604cd38560SBarry Smith for (j = 0; j < nz; j++) {
3614cd38560SBarry Smith x = rtmp + 4 * ajtmp[j];
3624cd38560SBarry Smith x[0] = x[1] = x[2] = x[3] = 0.0;
3634cd38560SBarry Smith }
3644cd38560SBarry Smith /* load in initial (unfactored row) */
3654cd38560SBarry Smith nz = ai[i + 1] - ai[i];
3664cd38560SBarry Smith ajtmpold = aj + ai[i];
3674cd38560SBarry Smith v = aa + 4 * ai[i];
3684cd38560SBarry Smith for (j = 0; j < nz; j++) {
3694cd38560SBarry Smith x = rtmp + 4 * ajtmpold[j];
3709371c9d4SSatish Balay x[0] = v[0];
3719371c9d4SSatish Balay x[1] = v[1];
3729371c9d4SSatish Balay x[2] = v[2];
3739371c9d4SSatish Balay x[3] = v[3];
3744cd38560SBarry Smith v += 4;
3754cd38560SBarry Smith }
3764cd38560SBarry Smith row = *ajtmp++;
3774cd38560SBarry Smith while (row < i) {
3784cd38560SBarry Smith pc = rtmp + 4 * row;
3799371c9d4SSatish Balay p1 = pc[0];
3809371c9d4SSatish Balay p2 = pc[1];
3819371c9d4SSatish Balay p3 = pc[2];
3829371c9d4SSatish Balay p4 = pc[3];
383d4a378daSJed Brown if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
3844cd38560SBarry Smith pv = ba + 4 * diag_offset[row];
3854cd38560SBarry Smith pj = bj + diag_offset[row] + 1;
3869371c9d4SSatish Balay x1 = pv[0];
3879371c9d4SSatish Balay x2 = pv[1];
3889371c9d4SSatish Balay x3 = pv[2];
3899371c9d4SSatish Balay x4 = pv[3];
3904cd38560SBarry Smith pc[0] = m1 = p1 * x1 + p3 * x2;
3914cd38560SBarry Smith pc[1] = m2 = p2 * x1 + p4 * x2;
3924cd38560SBarry Smith pc[2] = m3 = p1 * x3 + p3 * x4;
3934cd38560SBarry Smith pc[3] = m4 = p2 * x3 + p4 * x4;
3944cd38560SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1;
3954cd38560SBarry Smith pv += 4;
3964cd38560SBarry Smith for (j = 0; j < nz; j++) {
3979371c9d4SSatish Balay x1 = pv[0];
3989371c9d4SSatish Balay x2 = pv[1];
3999371c9d4SSatish Balay x3 = pv[2];
4009371c9d4SSatish Balay x4 = pv[3];
4014cd38560SBarry Smith x = rtmp + 4 * pj[j];
4024cd38560SBarry Smith x[0] -= m1 * x1 + m3 * x2;
4034cd38560SBarry Smith x[1] -= m2 * x1 + m4 * x2;
4044cd38560SBarry Smith x[2] -= m1 * x3 + m3 * x4;
4054cd38560SBarry Smith x[3] -= m2 * x3 + m4 * x4;
4064cd38560SBarry Smith pv += 4;
4074cd38560SBarry Smith }
4089566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(16.0 * nz + 12.0));
4094cd38560SBarry Smith }
4104cd38560SBarry Smith row = *ajtmp++;
4114cd38560SBarry Smith }
4124cd38560SBarry Smith /* finished row so stick it into b->a */
4134cd38560SBarry Smith pv = ba + 4 * bi[i];
4144cd38560SBarry Smith pj = bj + bi[i];
4154cd38560SBarry Smith nz = bi[i + 1] - bi[i];
4164cd38560SBarry Smith for (j = 0; j < nz; j++) {
4174cd38560SBarry Smith x = rtmp + 4 * pj[j];
4189371c9d4SSatish Balay pv[0] = x[0];
4199371c9d4SSatish Balay pv[1] = x[1];
4209371c9d4SSatish Balay pv[2] = x[2];
4219371c9d4SSatish Balay pv[3] = x[3];
4222efa7f71SHong Zhang /*
4232efa7f71SHong Zhang printf(" col %d:",pj[j]);
4242efa7f71SHong Zhang PetscInt j1;
4252efa7f71SHong Zhang for (j1=0; j1<4; j1++) printf(" %g,",*(pv+j1));
4262efa7f71SHong Zhang printf("\n");
4272efa7f71SHong Zhang */
4284cd38560SBarry Smith pv += 4;
4294cd38560SBarry Smith }
4304cd38560SBarry Smith /* invert diagonal block */
4314cd38560SBarry Smith w = ba + 4 * diag_offset[i];
4329566063dSJacob Faibussowitsch PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
4337b6c816cSBarry Smith if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
4344cd38560SBarry Smith }
4354cd38560SBarry Smith
4369566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp));
43726fbe8dcSKarl Rupp
43806e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace;
43906e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace;
4404cd38560SBarry Smith C->assembled = PETSC_TRUE;
44126fbe8dcSKarl Rupp
4429566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
4433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4444cd38560SBarry Smith }
4457fc0212eSBarry Smith
4467fc0212eSBarry Smith /*
4477fc0212eSBarry Smith Version for when blocks are 1 by 1.
4487fc0212eSBarry Smith */
MatLUFactorNumeric_SeqBAIJ_1(Mat B,Mat A,const MatFactorInfo * info)449d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B, Mat A, const MatFactorInfo *info)
450d71ae5a4SJacob Faibussowitsch {
451048b5e81SShri Abhyankar Mat C = B;
452048b5e81SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
453048b5e81SShri Abhyankar IS isrow = b->row, isicol = b->icol;
454048b5e81SShri Abhyankar const PetscInt *r, *ic, *ics;
455048b5e81SShri Abhyankar const PetscInt n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
456048b5e81SShri Abhyankar PetscInt i, j, k, nz, nzL, row, *pj;
457048b5e81SShri Abhyankar const PetscInt *ajtmp, *bjtmp;
458048b5e81SShri Abhyankar MatScalar *rtmp, *pc, multiplier, *pv;
459048b5e81SShri Abhyankar const MatScalar *aa = a->a, *v;
460ace3abfcSBarry Smith PetscBool row_identity, col_identity;
461048b5e81SShri Abhyankar FactorShiftCtx sctx;
462048b5e81SShri Abhyankar const PetscInt *ddiag;
463048b5e81SShri Abhyankar PetscReal rs;
464048b5e81SShri Abhyankar MatScalar d;
465048b5e81SShri Abhyankar
466048b5e81SShri Abhyankar PetscFunctionBegin;
467048b5e81SShri Abhyankar /* MatPivotSetUp(): initialize shift context sctx */
4689566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
469048b5e81SShri Abhyankar
470048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
471421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &ddiag, NULL));
472048b5e81SShri Abhyankar sctx.shift_top = info->zeropivot;
473048b5e81SShri Abhyankar for (i = 0; i < n; i++) {
474048b5e81SShri Abhyankar /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
475048b5e81SShri Abhyankar d = (aa)[ddiag[i]];
476048b5e81SShri Abhyankar rs = -PetscAbsScalar(d) - PetscRealPart(d);
477048b5e81SShri Abhyankar v = aa + ai[i];
478048b5e81SShri Abhyankar nz = ai[i + 1] - ai[i];
47926fbe8dcSKarl Rupp for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
480048b5e81SShri Abhyankar if (rs > sctx.shift_top) sctx.shift_top = rs;
481048b5e81SShri Abhyankar }
482048b5e81SShri Abhyankar sctx.shift_top *= 1.1;
483048b5e81SShri Abhyankar sctx.nshift_max = 5;
484048b5e81SShri Abhyankar sctx.shift_lo = 0.;
485048b5e81SShri Abhyankar sctx.shift_hi = 1.;
486048b5e81SShri Abhyankar }
487048b5e81SShri Abhyankar
4889566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r));
4899566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic));
4909566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp));
491048b5e81SShri Abhyankar ics = ic;
492048b5e81SShri Abhyankar
493048b5e81SShri Abhyankar do {
494048b5e81SShri Abhyankar sctx.newshift = PETSC_FALSE;
495048b5e81SShri Abhyankar for (i = 0; i < n; i++) {
496048b5e81SShri Abhyankar /* zero rtmp */
497048b5e81SShri Abhyankar /* L part */
498048b5e81SShri Abhyankar nz = bi[i + 1] - bi[i];
499048b5e81SShri Abhyankar bjtmp = bj + bi[i];
500048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
501048b5e81SShri Abhyankar
502048b5e81SShri Abhyankar /* U part */
503048b5e81SShri Abhyankar nz = bdiag[i] - bdiag[i + 1];
504048b5e81SShri Abhyankar bjtmp = bj + bdiag[i + 1] + 1;
505048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
506048b5e81SShri Abhyankar
507048b5e81SShri Abhyankar /* load in initial (unfactored row) */
508048b5e81SShri Abhyankar nz = ai[r[i] + 1] - ai[r[i]];
509048b5e81SShri Abhyankar ajtmp = aj + ai[r[i]];
510048b5e81SShri Abhyankar v = aa + ai[r[i]];
51126fbe8dcSKarl Rupp for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
51226fbe8dcSKarl Rupp
513048b5e81SShri Abhyankar /* ZeropivotApply() */
514048b5e81SShri Abhyankar rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
515048b5e81SShri Abhyankar
516048b5e81SShri Abhyankar /* elimination */
517048b5e81SShri Abhyankar bjtmp = bj + bi[i];
518048b5e81SShri Abhyankar row = *bjtmp++;
519048b5e81SShri Abhyankar nzL = bi[i + 1] - bi[i];
520048b5e81SShri Abhyankar for (k = 0; k < nzL; k++) {
521048b5e81SShri Abhyankar pc = rtmp + row;
522d4a378daSJed Brown if (*pc != (PetscScalar)0.0) {
523048b5e81SShri Abhyankar pv = b->a + bdiag[row];
524048b5e81SShri Abhyankar multiplier = *pc * (*pv);
525048b5e81SShri Abhyankar *pc = multiplier;
52626fbe8dcSKarl Rupp
527048b5e81SShri Abhyankar pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
528048b5e81SShri Abhyankar pv = b->a + bdiag[row + 1] + 1;
529048b5e81SShri Abhyankar nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
530048b5e81SShri Abhyankar for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
5319566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * nz));
532048b5e81SShri Abhyankar }
533048b5e81SShri Abhyankar row = *bjtmp++;
534048b5e81SShri Abhyankar }
535048b5e81SShri Abhyankar
536048b5e81SShri Abhyankar /* finished row so stick it into b->a */
537048b5e81SShri Abhyankar rs = 0.0;
538048b5e81SShri Abhyankar /* L part */
539048b5e81SShri Abhyankar pv = b->a + bi[i];
540048b5e81SShri Abhyankar pj = b->j + bi[i];
541048b5e81SShri Abhyankar nz = bi[i + 1] - bi[i];
542048b5e81SShri Abhyankar for (j = 0; j < nz; j++) {
5439371c9d4SSatish Balay pv[j] = rtmp[pj[j]];
5449371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]);
545048b5e81SShri Abhyankar }
546048b5e81SShri Abhyankar
547048b5e81SShri Abhyankar /* U part */
548048b5e81SShri Abhyankar pv = b->a + bdiag[i + 1] + 1;
549048b5e81SShri Abhyankar pj = b->j + bdiag[i + 1] + 1;
550048b5e81SShri Abhyankar nz = bdiag[i] - bdiag[i + 1] - 1;
551048b5e81SShri Abhyankar for (j = 0; j < nz; j++) {
5529371c9d4SSatish Balay pv[j] = rtmp[pj[j]];
5539371c9d4SSatish Balay rs += PetscAbsScalar(pv[j]);
554048b5e81SShri Abhyankar }
555048b5e81SShri Abhyankar
556048b5e81SShri Abhyankar sctx.rs = rs;
557048b5e81SShri Abhyankar sctx.pv = rtmp[i];
5589566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(B, A, info, &sctx, i));
559048b5e81SShri Abhyankar if (sctx.newshift) break; /* break for-loop */
560048b5e81SShri Abhyankar rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
561048b5e81SShri Abhyankar
562a5b23f4aSJose E. Roman /* Mark diagonal and invert diagonal for simpler triangular solves */
563048b5e81SShri Abhyankar pv = b->a + bdiag[i];
564d4a378daSJed Brown *pv = (PetscScalar)1.0 / rtmp[i];
565048b5e81SShri Abhyankar
566048b5e81SShri Abhyankar } /* endof for (i=0; i<n; i++) { */
567048b5e81SShri Abhyankar
568048b5e81SShri Abhyankar /* MatPivotRefine() */
569048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
570048b5e81SShri Abhyankar /*
571048b5e81SShri Abhyankar * if no shift in this attempt & shifting & started shifting & can refine,
572048b5e81SShri Abhyankar * then try lower shift
573048b5e81SShri Abhyankar */
574048b5e81SShri Abhyankar sctx.shift_hi = sctx.shift_fraction;
575048b5e81SShri Abhyankar sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
576048b5e81SShri Abhyankar sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
577048b5e81SShri Abhyankar sctx.newshift = PETSC_TRUE;
578048b5e81SShri Abhyankar sctx.nshift++;
579048b5e81SShri Abhyankar }
580048b5e81SShri Abhyankar } while (sctx.newshift);
581048b5e81SShri Abhyankar
5829566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp));
5839566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic));
5849566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r));
585048b5e81SShri Abhyankar
5869566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity));
5879566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity));
588048b5e81SShri Abhyankar if (row_identity && col_identity) {
589048b5e81SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering;
5909f5c0bcdSBarry Smith C->ops->forwardsolve = MatForwardSolve_SeqBAIJ_1_NaturalOrdering;
5919f5c0bcdSBarry Smith C->ops->backwardsolve = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering;
59293fd935bSShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
593048b5e81SShri Abhyankar } else {
594048b5e81SShri Abhyankar C->ops->solve = MatSolve_SeqBAIJ_1;
59593fd935bSShri Abhyankar C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
596048b5e81SShri Abhyankar }
597048b5e81SShri Abhyankar C->assembled = PETSC_TRUE;
5989566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n));
599048b5e81SShri Abhyankar
600048b5e81SShri Abhyankar /* MatShiftView(A,info,&sctx) */
601048b5e81SShri Abhyankar if (sctx.nshift) {
602048b5e81SShri Abhyankar if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
6039566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
604048b5e81SShri Abhyankar } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
6059566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
606048b5e81SShri Abhyankar } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
6079566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
608048b5e81SShri Abhyankar }
609048b5e81SShri Abhyankar }
6103ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
611048b5e81SShri Abhyankar }
612048b5e81SShri Abhyankar
MatILUFactorNumeric_SeqBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo * info)613421480d9SBarry Smith PetscErrorCode MatILUFactorNumeric_SeqBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
614d71ae5a4SJacob Faibussowitsch {
6157fc0212eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
6167cdf2dbbSSatish Balay IS isrow = b->row, isicol = b->icol;
6175d0c19d7SBarry Smith const PetscInt *r, *ic;
6185d0c19d7SBarry Smith PetscInt i, j, n = a->mbs, *bi = b->i, *bj = b->j;
619690b6cddSBarry Smith PetscInt *ajtmpold, *ajtmp, nz, row, *ai = a->i, *aj = a->j;
620421480d9SBarry Smith PetscInt diag, *pj;
621421480d9SBarry Smith const PetscInt *diag_offset;
622329f5518SBarry Smith MatScalar *pv, *v, *rtmp, multiplier, *pc;
6233f1db9ecSBarry Smith MatScalar *ba = b->a, *aa = a->a;
624ace3abfcSBarry Smith PetscBool row_identity, col_identity;
6257fc0212eSBarry Smith
6263a40ed3dSBarry Smith PetscFunctionBegin;
627421480d9SBarry Smith /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
628421480d9SBarry Smith A->factortype = MAT_FACTOR_NONE;
629421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
630421480d9SBarry Smith A->factortype = MAT_FACTOR_ILU;
6319566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r));
6329566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic));
6339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &rtmp));
6347fc0212eSBarry Smith
6357fc0212eSBarry Smith for (i = 0; i < n; i++) {
6364078e994SBarry Smith nz = bi[i + 1] - bi[i];
6374078e994SBarry Smith ajtmp = bj + bi[i];
6387fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[ajtmp[j]] = 0.0;
6397fc0212eSBarry Smith
6407fc0212eSBarry Smith /* load in initial (unfactored row) */
6414078e994SBarry Smith nz = ai[r[i] + 1] - ai[r[i]];
6424078e994SBarry Smith ajtmpold = aj + ai[r[i]];
6434078e994SBarry Smith v = aa + ai[r[i]];
6447fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];
6457fc0212eSBarry Smith
6467fc0212eSBarry Smith row = *ajtmp++;
6477fc0212eSBarry Smith while (row < i) {
6487fc0212eSBarry Smith pc = rtmp + row;
6497fc0212eSBarry Smith if (*pc != 0.0) {
6504078e994SBarry Smith pv = ba + diag_offset[row];
6514078e994SBarry Smith pj = bj + diag_offset[row] + 1;
6527fc0212eSBarry Smith multiplier = *pc * *pv++;
6537fc0212eSBarry Smith *pc = multiplier;
6544078e994SBarry Smith nz = bi[row + 1] - diag_offset[row] - 1;
6557fc0212eSBarry Smith for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6569566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(1.0 + 2.0 * nz));
6577fc0212eSBarry Smith }
6587fc0212eSBarry Smith row = *ajtmp++;
6597fc0212eSBarry Smith }
6607fc0212eSBarry Smith /* finished row so stick it into b->a */
6614078e994SBarry Smith pv = ba + bi[i];
6624078e994SBarry Smith pj = bj + bi[i];
6634078e994SBarry Smith nz = bi[i + 1] - bi[i];
66426fbe8dcSKarl Rupp for (j = 0; j < nz; j++) pv[j] = rtmp[pj[j]];
6654078e994SBarry Smith diag = diag_offset[i] - bi[i];
6667fc0212eSBarry Smith /* check pivot entry for current row */
66708401ef6SPierre Jolivet PetscCheck(pv[diag] != 0.0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
6687fc0212eSBarry Smith pv[diag] = 1.0 / pv[diag];
6697fc0212eSBarry Smith }
6707fc0212eSBarry Smith
6719566063dSJacob Faibussowitsch PetscCall(PetscFree(rtmp));
6729566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic));
6739566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r));
6749566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity));
6759566063dSJacob Faibussowitsch PetscCall(ISIdentity(isicol, &col_identity));
676f58c8c74SBarry Smith if (row_identity && col_identity) {
67706e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace;
67806e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace;
679f58c8c74SBarry Smith } else {
68006e38f1dSHong Zhang C->ops->solve = MatSolve_SeqBAIJ_1_inplace;
68106e38f1dSHong Zhang C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace;
682f58c8c74SBarry Smith }
6837fc0212eSBarry Smith C->assembled = PETSC_TRUE;
6849566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->cmap->n));
6853ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
6867fc0212eSBarry Smith }
6877fc0212eSBarry Smith
MatFactorGetSolverType_petsc(Mat A,MatSolverType * type)688d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
689d71ae5a4SJacob Faibussowitsch {
6904ac6704cSBarry Smith PetscFunctionBegin;
6914ac6704cSBarry Smith *type = MATSOLVERPETSC;
6923ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
6934ac6704cSBarry Smith }
6944ac6704cSBarry Smith
MatGetFactor_seqbaij_petsc(Mat A,MatFactorType ftype,Mat * B)695d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A, MatFactorType ftype, Mat *B)
696d71ae5a4SJacob Faibussowitsch {
697d0f46423SBarry Smith PetscInt n = A->rmap->n;
698b24902e0SBarry Smith
699b24902e0SBarry Smith PetscFunctionBegin;
700*fc2fb351SPierre Jolivet PetscCheck(!PetscDefined(USE_COMPLEX) || A->hermitian != PETSC_BOOL3_TRUE || !(ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian Factor is not supported");
7019566063dSJacob Faibussowitsch PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
7029566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*B, n, n, n, n));
703c8342467SHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
7049566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQBAIJ));
70526fbe8dcSKarl Rupp
7064dd39f65SShri Abhyankar (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqBAIJ;
7074dd39f65SShri Abhyankar (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
7089566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
7099566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
7109566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
711b24902e0SBarry Smith } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
7129566063dSJacob Faibussowitsch PetscCall(MatSetType(*B, MATSEQSBAIJ));
7139566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(*B, A->rmap->bs, MAT_SKIP_ALLOCATION, NULL));
71426fbe8dcSKarl Rupp
7155c9eb25fSBarry Smith (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqBAIJ;
7165c9eb25fSBarry Smith (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
7174ac6704cSBarry Smith /* Future optimization would be direct symbolic and numerical factorization for BAIJ to support orderings and Cholesky, instead of first converting to SBAIJ */
7189566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
7199566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
720e32f2f54SBarry Smith } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
721d5f3da31SBarry Smith (*B)->factortype = ftype;
722f73b0415SBarry Smith (*B)->canuseordering = PETSC_TRUE;
72300c67f3bSHong Zhang
7249566063dSJacob Faibussowitsch PetscCall(PetscFree((*B)->solvertype));
7259566063dSJacob Faibussowitsch PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
7269566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
7273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
728b24902e0SBarry Smith }
729273d9f13SBarry Smith
MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,const MatFactorInfo * info)730d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqBAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
731d71ae5a4SJacob Faibussowitsch {
7325d517e7eSBarry Smith Mat C;
7335d517e7eSBarry Smith
7343a40ed3dSBarry Smith PetscFunctionBegin;
7359566063dSJacob Faibussowitsch PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
7369566063dSJacob Faibussowitsch PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
7379566063dSJacob Faibussowitsch PetscCall(MatLUFactorNumeric(C, A, info));
73826fbe8dcSKarl Rupp
739db4efbfdSBarry Smith A->ops->solve = C->ops->solve;
740db4efbfdSBarry Smith A->ops->solvetranspose = C->ops->solvetranspose;
74126fbe8dcSKarl Rupp
7429566063dSJacob Faibussowitsch PetscCall(MatHeaderMerge(A, &C));
7433ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
7445d517e7eSBarry Smith }
7454cd38560SBarry Smith
746c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C,Mat A,const MatFactorInfo * info)747d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
748d71ae5a4SJacob Faibussowitsch {
74978910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
75078910aadSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
75178910aadSHong Zhang IS ip = b->row;
7525d0c19d7SBarry Smith const PetscInt *rip;
7535d0c19d7SBarry Smith PetscInt i, j, mbs = a->mbs, bs = A->rmap->bs, *bi = b->i, *bj = b->j, *bcol;
75478910aadSHong Zhang PetscInt *ai = a->i, *aj = a->j;
75578910aadSHong Zhang PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
75678910aadSHong Zhang MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
75707b50cabSHong Zhang PetscReal rs;
7580e95ead3SHong Zhang FactorShiftCtx sctx;
75978910aadSHong Zhang
760c05c3958SHong Zhang PetscFunctionBegin;
76107b50cabSHong Zhang if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */
762b0c98d1dSPierre Jolivet if (!a->sbaijMat) {
763b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE;
764b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
765b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
766b0c98d1dSPierre Jolivet }
767f4f49eeaSPierre Jolivet PetscCall(a->sbaijMat->ops->choleskyfactornumeric(C, a->sbaijMat, info));
7689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&a->sbaijMat));
7693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
7706ad2eaddSHong Zhang }
77178910aadSHong Zhang
77207b50cabSHong Zhang /* MatPivotSetUp(): initialize shift context sctx */
7739566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
7743cea4cbeSHong Zhang
7759566063dSJacob Faibussowitsch PetscCall(ISGetIndices(ip, &rip));
7769566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
77778910aadSHong Zhang
77875567043SBarry Smith sctx.shift_amount = 0.;
7793cea4cbeSHong Zhang sctx.nshift = 0;
78078910aadSHong Zhang do {
78107b50cabSHong Zhang sctx.newshift = PETSC_FALSE;
78278910aadSHong Zhang for (i = 0; i < mbs; i++) {
7839371c9d4SSatish Balay rtmp[i] = 0.0;
7849371c9d4SSatish Balay jl[i] = mbs;
7859371c9d4SSatish Balay il[0] = 0;
78678910aadSHong Zhang }
78778910aadSHong Zhang
78878910aadSHong Zhang for (k = 0; k < mbs; k++) {
78978910aadSHong Zhang bval = ba + bi[k];
79078910aadSHong Zhang /* initialize k-th row by the perm[k]-th row of A */
7919371c9d4SSatish Balay jmin = ai[rip[k]];
7929371c9d4SSatish Balay jmax = ai[rip[k] + 1];
79378910aadSHong Zhang for (j = jmin; j < jmax; j++) {
79478910aadSHong Zhang col = rip[aj[j]];
79578910aadSHong Zhang if (col >= k) { /* only take upper triangular entry */
79678910aadSHong Zhang rtmp[col] = aa[j];
79778910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */
79878910aadSHong Zhang }
79978910aadSHong Zhang }
8003cea4cbeSHong Zhang
8013cea4cbeSHong Zhang /* shift the diagonal of the matrix */
8023cea4cbeSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount;
80378910aadSHong Zhang
80478910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */
80578910aadSHong Zhang dk = rtmp[k];
80678910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */
80778910aadSHong Zhang
80878910aadSHong Zhang while (i < k) {
80978910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */
81078910aadSHong Zhang
81178910aadSHong Zhang /* compute multiplier, update diag(k) and U(i,k) */
81278910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
81378910aadSHong Zhang uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
81478910aadSHong Zhang dk += uikdi * ba[ili];
81578910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */
81678910aadSHong Zhang
81778910aadSHong Zhang /* add multiple of row i to k-th row */
8189371c9d4SSatish Balay jmin = ili + 1;
8199371c9d4SSatish Balay jmax = bi[i + 1];
82078910aadSHong Zhang if (jmin < jmax) {
82178910aadSHong Zhang for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
82278910aadSHong Zhang /* update il and jl for row i */
82378910aadSHong Zhang il[i] = jmin;
8249371c9d4SSatish Balay j = bj[jmin];
8259371c9d4SSatish Balay jl[i] = jl[j];
8269371c9d4SSatish Balay jl[j] = i;
82778910aadSHong Zhang }
82878910aadSHong Zhang i = nexti;
82978910aadSHong Zhang }
83078910aadSHong Zhang
8313cea4cbeSHong Zhang /* shift the diagonals when zero pivot is detected */
8323cea4cbeSHong Zhang /* compute rs=sum of abs(off-diagonal) */
8333cea4cbeSHong Zhang rs = 0.0;
83478910aadSHong Zhang jmin = bi[k] + 1;
83578910aadSHong Zhang nz = bi[k + 1] - jmin;
83678910aadSHong Zhang if (nz) {
83778910aadSHong Zhang bcol = bj + jmin;
83878910aadSHong Zhang while (nz--) {
83989f845c8SHong Zhang rs += PetscAbsScalar(rtmp[*bcol]);
84089f845c8SHong Zhang bcol++;
84178910aadSHong Zhang }
84278910aadSHong Zhang }
8433cea4cbeSHong Zhang
8443cea4cbeSHong Zhang sctx.rs = rs;
8453cea4cbeSHong Zhang sctx.pv = dk;
8469566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(C, A, info, &sctx, k));
84707b50cabSHong Zhang if (sctx.newshift) break;
8480e95ead3SHong Zhang dk = sctx.pv;
84978910aadSHong Zhang
85078910aadSHong Zhang /* copy data into U(k,:) */
85178910aadSHong Zhang ba[bi[k]] = 1.0 / dk; /* U(k,k) */
8529371c9d4SSatish Balay jmin = bi[k] + 1;
8539371c9d4SSatish Balay jmax = bi[k + 1];
85478910aadSHong Zhang if (jmin < jmax) {
85578910aadSHong Zhang for (j = jmin; j < jmax; j++) {
8569371c9d4SSatish Balay col = bj[j];
8579371c9d4SSatish Balay ba[j] = rtmp[col];
8589371c9d4SSatish Balay rtmp[col] = 0.0;
85978910aadSHong Zhang }
86078910aadSHong Zhang /* add the k-th row into il and jl */
86178910aadSHong Zhang il[k] = jmin;
8629371c9d4SSatish Balay i = bj[jmin];
8639371c9d4SSatish Balay jl[k] = jl[i];
8649371c9d4SSatish Balay jl[i] = k;
86578910aadSHong Zhang }
86678910aadSHong Zhang }
86707b50cabSHong Zhang } while (sctx.newshift);
8689566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl));
86978910aadSHong Zhang
8709566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(ip, &rip));
87126fbe8dcSKarl Rupp
87278910aadSHong Zhang C->assembled = PETSC_TRUE;
87378910aadSHong Zhang C->preallocated = PETSC_TRUE;
87426fbe8dcSKarl Rupp
8759566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->N));
8763cea4cbeSHong Zhang if (sctx.nshift) {
877f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
8789566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
879f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
8809566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
88178910aadSHong Zhang }
88278910aadSHong Zhang }
8833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
884c05c3958SHong Zhang }
8854cd38560SBarry Smith
MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo * info)886d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
887d71ae5a4SJacob Faibussowitsch {
88878910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
88978910aadSHong Zhang Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
8903cea4cbeSHong Zhang PetscInt i, j, am = a->mbs;
89178910aadSHong Zhang PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
8923cea4cbeSHong Zhang PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
89378910aadSHong Zhang MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
8940e95ead3SHong Zhang PetscReal rs;
8950e95ead3SHong Zhang FactorShiftCtx sctx;
89678910aadSHong Zhang
897c05c3958SHong Zhang PetscFunctionBegin;
8980e95ead3SHong Zhang /* MatPivotSetUp(): initialize shift context sctx */
8999566063dSJacob Faibussowitsch PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
9003cea4cbeSHong Zhang
9019566063dSJacob Faibussowitsch PetscCall(PetscMalloc3(am, &rtmp, am, &il, am, &jl));
90278910aadSHong Zhang
90378910aadSHong Zhang do {
90407b50cabSHong Zhang sctx.newshift = PETSC_FALSE;
90578910aadSHong Zhang for (i = 0; i < am; i++) {
9069371c9d4SSatish Balay rtmp[i] = 0.0;
9079371c9d4SSatish Balay jl[i] = am;
9089371c9d4SSatish Balay il[0] = 0;
90978910aadSHong Zhang }
91078910aadSHong Zhang
91178910aadSHong Zhang for (k = 0; k < am; k++) {
91278910aadSHong Zhang /* initialize k-th row with elements nonzero in row perm(k) of A */
91378910aadSHong Zhang nz = ai[k + 1] - ai[k];
91478910aadSHong Zhang acol = aj + ai[k];
91578910aadSHong Zhang aval = aa + ai[k];
91678910aadSHong Zhang bval = ba + bi[k];
91778910aadSHong Zhang while (nz--) {
91878910aadSHong Zhang if (*acol < k) { /* skip lower triangular entries */
9199371c9d4SSatish Balay acol++;
9209371c9d4SSatish Balay aval++;
92178910aadSHong Zhang } else {
92278910aadSHong Zhang rtmp[*acol++] = *aval++;
92378910aadSHong Zhang *bval++ = 0.0; /* for in-place factorization */
92478910aadSHong Zhang }
92578910aadSHong Zhang }
9263cea4cbeSHong Zhang
9273cea4cbeSHong Zhang /* shift the diagonal of the matrix */
9283cea4cbeSHong Zhang if (sctx.nshift) rtmp[k] += sctx.shift_amount;
92978910aadSHong Zhang
93078910aadSHong Zhang /* modify k-th row by adding in those rows i with U(i,k)!=0 */
93178910aadSHong Zhang dk = rtmp[k];
93278910aadSHong Zhang i = jl[k]; /* first row to be added to k_th row */
93378910aadSHong Zhang
93478910aadSHong Zhang while (i < k) {
93578910aadSHong Zhang nexti = jl[i]; /* next row to be added to k_th row */
93678910aadSHong Zhang /* compute multiplier, update D(k) and U(i,k) */
93778910aadSHong Zhang ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
93878910aadSHong Zhang uikdi = -ba[ili] * ba[bi[i]];
93978910aadSHong Zhang dk += uikdi * ba[ili];
94078910aadSHong Zhang ba[ili] = uikdi; /* -U(i,k) */
94178910aadSHong Zhang
94278910aadSHong Zhang /* add multiple of row i to k-th row ... */
94378910aadSHong Zhang jmin = ili + 1;
94478910aadSHong Zhang nz = bi[i + 1] - jmin;
94578910aadSHong Zhang if (nz > 0) {
94678910aadSHong Zhang bcol = bj + jmin;
94778910aadSHong Zhang bval = ba + jmin;
94878910aadSHong Zhang while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
94978910aadSHong Zhang /* update il and jl for i-th row */
95078910aadSHong Zhang il[i] = jmin;
9519371c9d4SSatish Balay j = bj[jmin];
9529371c9d4SSatish Balay jl[i] = jl[j];
9539371c9d4SSatish Balay jl[j] = i;
95478910aadSHong Zhang }
95578910aadSHong Zhang i = nexti;
95678910aadSHong Zhang }
95778910aadSHong Zhang
9583cea4cbeSHong Zhang /* shift the diagonals when zero pivot is detected */
9593cea4cbeSHong Zhang /* compute rs=sum of abs(off-diagonal) */
9603cea4cbeSHong Zhang rs = 0.0;
96178910aadSHong Zhang jmin = bi[k] + 1;
96278910aadSHong Zhang nz = bi[k + 1] - jmin;
96378910aadSHong Zhang if (nz) {
96478910aadSHong Zhang bcol = bj + jmin;
96578910aadSHong Zhang while (nz--) {
96689f845c8SHong Zhang rs += PetscAbsScalar(rtmp[*bcol]);
96789f845c8SHong Zhang bcol++;
96878910aadSHong Zhang }
96978910aadSHong Zhang }
9703cea4cbeSHong Zhang
9713cea4cbeSHong Zhang sctx.rs = rs;
9723cea4cbeSHong Zhang sctx.pv = dk;
9739566063dSJacob Faibussowitsch PetscCall(MatPivotCheck(C, A, info, &sctx, k));
97407b50cabSHong Zhang if (sctx.newshift) break; /* sctx.shift_amount is updated */
9750e95ead3SHong Zhang dk = sctx.pv;
97678910aadSHong Zhang
97778910aadSHong Zhang /* copy data into U(k,:) */
97878910aadSHong Zhang ba[bi[k]] = 1.0 / dk;
97978910aadSHong Zhang jmin = bi[k] + 1;
98078910aadSHong Zhang nz = bi[k + 1] - jmin;
98178910aadSHong Zhang if (nz) {
98278910aadSHong Zhang bcol = bj + jmin;
98378910aadSHong Zhang bval = ba + jmin;
98478910aadSHong Zhang while (nz--) {
98578910aadSHong Zhang *bval++ = rtmp[*bcol];
98678910aadSHong Zhang rtmp[*bcol++] = 0.0;
98778910aadSHong Zhang }
98878910aadSHong Zhang /* add k-th row into il and jl */
98978910aadSHong Zhang il[k] = jmin;
9909371c9d4SSatish Balay i = bj[jmin];
9919371c9d4SSatish Balay jl[k] = jl[i];
9929371c9d4SSatish Balay jl[i] = k;
99378910aadSHong Zhang }
99478910aadSHong Zhang }
99507b50cabSHong Zhang } while (sctx.newshift);
9969566063dSJacob Faibussowitsch PetscCall(PetscFree3(rtmp, il, jl));
99778910aadSHong Zhang
9980a3c351aSHong Zhang C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
9990a3c351aSHong Zhang C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
100078910aadSHong Zhang C->assembled = PETSC_TRUE;
100178910aadSHong Zhang C->preallocated = PETSC_TRUE;
100226fbe8dcSKarl Rupp
10039566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(C->rmap->N));
10043cea4cbeSHong Zhang if (sctx.nshift) {
1005f4db908eSBarry Smith if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
10069566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1007f4db908eSBarry Smith } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
10089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
100978910aadSHong Zhang }
101078910aadSHong Zhang }
10113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1012c05c3958SHong Zhang }
1013c05c3958SHong Zhang
1014c6db04a5SJed Brown #include <petscbt.h>
1015c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
MatICCFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)1016d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1017d71ae5a4SJacob Faibussowitsch {
101878910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
101978910aadSHong Zhang Mat_SeqSBAIJ *b;
102078910aadSHong Zhang Mat B;
1021421480d9SBarry Smith PetscBool perm_identity;
10225d0c19d7SBarry Smith PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = a->mbs, bs = A->rmap->bs, *ui;
10235d0c19d7SBarry Smith const PetscInt *rip;
102478910aadSHong Zhang PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
10250298fd71SBarry Smith PetscInt nlnk, *lnk, *lnk_lvl = NULL, ncols, ncols_upper, *cols, *cols_lvl, *uj, **uj_ptr, **uj_lvl_ptr;
102678910aadSHong Zhang PetscReal fill = info->fill, levels = info->levels;
10270298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL;
10280298fd71SBarry Smith PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
102978910aadSHong Zhang PetscBT lnkbt;
1030421480d9SBarry Smith PetscBool diagDense;
1031421480d9SBarry Smith const PetscInt *adiag;
103278910aadSHong Zhang
1033c05c3958SHong Zhang PetscFunctionBegin;
1034421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, &diagDense));
1035421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
103648dd3d27SHong Zhang
10376ad2eaddSHong Zhang if (bs > 1) {
1038b0c98d1dSPierre Jolivet if (!a->sbaijMat) {
1039b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE;
1040b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
1041b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1042b0c98d1dSPierre Jolivet }
104357508eceSPierre Jolivet fact->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
104426fbe8dcSKarl Rupp
10459566063dSJacob Faibussowitsch PetscCall(MatICCFactorSymbolic(fact, a->sbaijMat, perm, info));
10463ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
10476ad2eaddSHong Zhang }
10486ad2eaddSHong Zhang
10499566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity));
10509566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip));
105178910aadSHong Zhang
105278910aadSHong Zhang /* special case that simply copies fill pattern */
105378910aadSHong Zhang if (!levels && perm_identity) {
10549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui));
1055421480d9SBarry Smith for (i = 0; i < am; i++) ui[i] = ai[i + 1] - adiag[i]; /* ui: rowlengths - changes when !perm_identity */
1056719d5645SBarry Smith B = fact;
10579566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, 1, 0, ui));
105878910aadSHong Zhang
105978910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data;
106078910aadSHong Zhang uj = b->j;
106178910aadSHong Zhang for (i = 0; i < am; i++) {
1062421480d9SBarry Smith aj = a->j + adiag[i];
106326fbe8dcSKarl Rupp for (j = 0; j < ui[i]; j++) *uj++ = *aj++;
106478910aadSHong Zhang b->ilen[i] = ui[i];
106578910aadSHong Zhang }
10669566063dSJacob Faibussowitsch PetscCall(PetscFree(ui));
106726fbe8dcSKarl Rupp
1068d5f3da31SBarry Smith B->factortype = MAT_FACTOR_NONE;
106926fbe8dcSKarl Rupp
10709566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
10719566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1072d5f3da31SBarry Smith B->factortype = MAT_FACTOR_ICC;
107378910aadSHong Zhang
107478910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
10753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
107678910aadSHong Zhang }
107778910aadSHong Zhang
107878910aadSHong Zhang /* initialization */
10799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &ui));
1080e60cf9a0SBarry Smith ui[0] = 0;
10819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(2 * am + 1, &cols_lvl));
108278910aadSHong Zhang
108378910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows
108478910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
10859566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &il, am, &jl));
108678910aadSHong Zhang for (i = 0; i < am; i++) {
10879371c9d4SSatish Balay jl[i] = am;
10889371c9d4SSatish Balay il[i] = 0;
108978910aadSHong Zhang }
109078910aadSHong Zhang
109178910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */
109278910aadSHong Zhang nlnk = am + 1;
10939566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
109478910aadSHong Zhang
109595b5ac22SHong Zhang /* initial FreeSpace size is fill*(ai[am]+am)/2 */
10969566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space));
109726fbe8dcSKarl Rupp
109878910aadSHong Zhang current_space = free_space;
109926fbe8dcSKarl Rupp
11009566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space_lvl));
110178910aadSHong Zhang current_space_lvl = free_space_lvl;
110278910aadSHong Zhang
110378910aadSHong Zhang for (k = 0; k < am; k++) { /* for each active row k */
110478910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */
110578910aadSHong Zhang nzk = 0;
110678910aadSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]];
110778910aadSHong Zhang ncols_upper = 0;
110878910aadSHong Zhang cols = cols_lvl + am;
110978910aadSHong Zhang for (j = 0; j < ncols; j++) {
111078910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)];
111178910aadSHong Zhang if (i >= k) { /* only take upper triangular entry */
111278910aadSHong Zhang cols[ncols_upper] = i;
111378910aadSHong Zhang cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */
111478910aadSHong Zhang ncols_upper++;
111578910aadSHong Zhang }
111678910aadSHong Zhang }
11179566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLAdd(ncols_upper, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
111878910aadSHong Zhang nzk += nlnk;
111978910aadSHong Zhang
112078910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */
112178910aadSHong Zhang prow = jl[k]; /* 1st pivot row */
112278910aadSHong Zhang
112378910aadSHong Zhang while (prow < k) {
112478910aadSHong Zhang nextprow = jl[prow];
112578910aadSHong Zhang
112678910aadSHong Zhang /* merge prow into k-th row */
112778910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
112878910aadSHong Zhang jmax = ui[prow + 1];
112978910aadSHong Zhang ncols = jmax - jmin;
113078910aadSHong Zhang i = jmin - ui[prow];
113178910aadSHong Zhang cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
113278910aadSHong Zhang for (j = 0; j < ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
11339566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLAddSorted(ncols, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
113478910aadSHong Zhang nzk += nlnk;
113578910aadSHong Zhang
113678910aadSHong Zhang /* update il and jl for prow */
113778910aadSHong Zhang if (jmin < jmax) {
113878910aadSHong Zhang il[prow] = jmin;
113926fbe8dcSKarl Rupp
11409371c9d4SSatish Balay j = *cols;
11419371c9d4SSatish Balay jl[prow] = jl[j];
11429371c9d4SSatish Balay jl[j] = prow;
114378910aadSHong Zhang }
114478910aadSHong Zhang prow = nextprow;
114578910aadSHong Zhang }
114678910aadSHong Zhang
114778910aadSHong Zhang /* if free space is not available, make more free space */
114878910aadSHong Zhang if (current_space->local_remaining < nzk) {
114978910aadSHong Zhang i = am - k + 1; /* num of unfactored rows */
1150f91af8c7SBarry Smith i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
11519566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space));
11529566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
115378910aadSHong Zhang reallocs++;
115478910aadSHong Zhang }
115578910aadSHong Zhang
115678910aadSHong Zhang /* copy data into free_space and free_space_lvl, then initialize lnk */
11579566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
115878910aadSHong Zhang
115978910aadSHong Zhang /* add the k-th row into il and jl */
116078910aadSHong Zhang if (nzk - 1 > 0) {
116178910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
11629371c9d4SSatish Balay jl[k] = jl[i];
11639371c9d4SSatish Balay jl[i] = k;
116478910aadSHong Zhang il[k] = ui[k] + 1;
116578910aadSHong Zhang }
116678910aadSHong Zhang uj_ptr[k] = current_space->array;
116778910aadSHong Zhang uj_lvl_ptr[k] = current_space_lvl->array;
116878910aadSHong Zhang
116978910aadSHong Zhang current_space->array += nzk;
117078910aadSHong Zhang current_space->local_used += nzk;
117178910aadSHong Zhang current_space->local_remaining -= nzk;
117278910aadSHong Zhang
117378910aadSHong Zhang current_space_lvl->array += nzk;
117478910aadSHong Zhang current_space_lvl->local_used += nzk;
117578910aadSHong Zhang current_space_lvl->local_remaining -= nzk;
117678910aadSHong Zhang
117778910aadSHong Zhang ui[k + 1] = ui[k] + nzk;
117878910aadSHong Zhang }
117978910aadSHong Zhang
11809566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip));
11819566063dSJacob Faibussowitsch PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, il, jl));
11829566063dSJacob Faibussowitsch PetscCall(PetscFree(cols_lvl));
118378910aadSHong Zhang
11849263d837SHong Zhang /* copy free_space into uj and free free_space; set uj in new datastructure; */
11859566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &uj));
11869566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
11879566063dSJacob Faibussowitsch PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
11889566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
118978910aadSHong Zhang
119078910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */
1191719d5645SBarry Smith B = fact;
11929566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, 1, MAT_SKIP_ALLOCATION, NULL));
119378910aadSHong Zhang
119478910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data;
1195e6b907acSBarry Smith b->free_a = PETSC_TRUE;
1196e6b907acSBarry Smith b->free_ij = PETSC_TRUE;
119726fbe8dcSKarl Rupp
11989566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[am] + 1, &b->a));
119926fbe8dcSKarl Rupp
120078910aadSHong Zhang b->j = uj;
120178910aadSHong Zhang b->i = ui;
1202f4259b30SLisandro Dalcin b->diag = NULL;
1203f4259b30SLisandro Dalcin b->ilen = NULL;
1204f4259b30SLisandro Dalcin b->imax = NULL;
120578910aadSHong Zhang b->row = perm;
120678910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
120726fbe8dcSKarl Rupp
12089566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm));
120926fbe8dcSKarl Rupp
121078910aadSHong Zhang b->icol = perm;
121126fbe8dcSKarl Rupp
12129566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm));
12139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(am + 1, &b->solve_work));
121426fbe8dcSKarl Rupp
121578910aadSHong Zhang b->maxnz = b->nz = ui[am];
121678910aadSHong Zhang
121778910aadSHong Zhang B->info.factor_mallocs = reallocs;
121878910aadSHong Zhang B->info.fill_ratio_given = fill;
121975567043SBarry Smith if (ai[am] != 0.) {
1220da81f932SPierre Jolivet /* nonzeros in lower triangular part of A (including diagonals)= (ai[am]+am)/2 */
122195b5ac22SHong Zhang B->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
122278910aadSHong Zhang } else {
122378910aadSHong Zhang B->info.fill_ratio_needed = 0.0;
122478910aadSHong Zhang }
12259263d837SHong Zhang #if defined(PETSC_USE_INFO)
12269263d837SHong Zhang if (ai[am] != 0) {
122795b5ac22SHong Zhang PetscReal af = B->info.fill_ratio_needed;
12289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
12299566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
12309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
12319263d837SHong Zhang } else {
12329566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n"));
12339263d837SHong Zhang }
12349263d837SHong Zhang #endif
123578910aadSHong Zhang if (perm_identity) {
12360a3c351aSHong Zhang B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
12370a3c351aSHong Zhang B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
123878910aadSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
123978910aadSHong Zhang } else {
124057508eceSPierre Jolivet fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
124178910aadSHong Zhang }
12423ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1243c05c3958SHong Zhang }
1244c05c3958SHong Zhang
MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)1245d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1246d71ae5a4SJacob Faibussowitsch {
124778910aadSHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
124878910aadSHong Zhang Mat_SeqSBAIJ *b;
124978910aadSHong Zhang Mat B;
1250421480d9SBarry Smith PetscBool perm_identity;
125178910aadSHong Zhang PetscReal fill = info->fill;
12525d0c19d7SBarry Smith const PetscInt *rip;
12535d0c19d7SBarry Smith PetscInt i, mbs = a->mbs, bs = A->rmap->bs, *ai = a->i, *aj = a->j, reallocs = 0, prow;
125478910aadSHong Zhang PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
125578910aadSHong Zhang PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
12560298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL;
125778910aadSHong Zhang PetscBT lnkbt;
1258421480d9SBarry Smith PetscBool diagDense;
125978910aadSHong Zhang
1260c05c3958SHong Zhang PetscFunctionBegin;
12616ad2eaddSHong Zhang if (bs > 1) { /* convert to seqsbaij */
1262b0c98d1dSPierre Jolivet if (!a->sbaijMat) {
1263b0c98d1dSPierre Jolivet A->symmetric = PETSC_BOOL3_TRUE;
1264b0c98d1dSPierre Jolivet if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
1265b0c98d1dSPierre Jolivet PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1266b0c98d1dSPierre Jolivet }
126757508eceSPierre Jolivet fact->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */
126826fbe8dcSKarl Rupp
12699566063dSJacob Faibussowitsch PetscCall(MatCholeskyFactorSymbolic(fact, a->sbaijMat, perm, info));
12703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
12716ad2eaddSHong Zhang }
1272421480d9SBarry Smith PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense));
1273421480d9SBarry Smith PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
12749186b105SHong Zhang
127578910aadSHong Zhang /* check whether perm is the identity mapping */
12769566063dSJacob Faibussowitsch PetscCall(ISIdentity(perm, &perm_identity));
127728b400f6SJacob Faibussowitsch PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported");
12789566063dSJacob Faibussowitsch PetscCall(ISGetIndices(perm, &rip));
127978910aadSHong Zhang
128078910aadSHong Zhang /* initialization */
12819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &ui));
1282e60cf9a0SBarry Smith ui[0] = 0;
128378910aadSHong Zhang
128478910aadSHong Zhang /* jl: linked list for storing indices of the pivot rows
128578910aadSHong Zhang il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
12869566063dSJacob Faibussowitsch PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
128778910aadSHong Zhang for (i = 0; i < mbs; i++) {
12889371c9d4SSatish Balay jl[i] = mbs;
12899371c9d4SSatish Balay il[i] = 0;
129078910aadSHong Zhang }
129178910aadSHong Zhang
129278910aadSHong Zhang /* create and initialize a linked list for storing column indices of the active row k */
129378910aadSHong Zhang nlnk = mbs + 1;
12949566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
129578910aadSHong Zhang
12966a69fef8SHong Zhang /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */
12979566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[mbs] / 2, mbs / 2)), &free_space));
129826fbe8dcSKarl Rupp
129978910aadSHong Zhang current_space = free_space;
130078910aadSHong Zhang
130178910aadSHong Zhang for (k = 0; k < mbs; k++) { /* for each active row k */
130278910aadSHong Zhang /* initialize lnk by the column indices of row rip[k] of A */
130378910aadSHong Zhang nzk = 0;
130478910aadSHong Zhang ncols = ai[rip[k] + 1] - ai[rip[k]];
1305e60cf9a0SBarry Smith ncols_upper = 0;
130678910aadSHong Zhang for (j = 0; j < ncols; j++) {
130778910aadSHong Zhang i = rip[*(aj + ai[rip[k]] + j)];
130878910aadSHong Zhang if (i >= k) { /* only take upper triangular entry */
130978910aadSHong Zhang cols[ncols_upper] = i;
131078910aadSHong Zhang ncols_upper++;
131178910aadSHong Zhang }
131278910aadSHong Zhang }
13139566063dSJacob Faibussowitsch PetscCall(PetscLLAdd(ncols_upper, cols, mbs, &nlnk, lnk, lnkbt));
131478910aadSHong Zhang nzk += nlnk;
131578910aadSHong Zhang
131678910aadSHong Zhang /* update lnk by computing fill-in for each pivot row to be merged in */
131778910aadSHong Zhang prow = jl[k]; /* 1st pivot row */
131878910aadSHong Zhang
131978910aadSHong Zhang while (prow < k) {
132078910aadSHong Zhang nextprow = jl[prow];
132178910aadSHong Zhang /* merge prow into k-th row */
132278910aadSHong Zhang jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
132378910aadSHong Zhang jmax = ui[prow + 1];
132478910aadSHong Zhang ncols = jmax - jmin;
132578910aadSHong Zhang uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
13269566063dSJacob Faibussowitsch PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
132778910aadSHong Zhang nzk += nlnk;
132878910aadSHong Zhang
132978910aadSHong Zhang /* update il and jl for prow */
133078910aadSHong Zhang if (jmin < jmax) {
133178910aadSHong Zhang il[prow] = jmin;
133226fbe8dcSKarl Rupp j = *uj_ptr;
133326fbe8dcSKarl Rupp jl[prow] = jl[j];
133426fbe8dcSKarl Rupp jl[j] = prow;
133578910aadSHong Zhang }
133678910aadSHong Zhang prow = nextprow;
133778910aadSHong Zhang }
133878910aadSHong Zhang
133978910aadSHong Zhang /* if free space is not available, make more free space */
134078910aadSHong Zhang if (current_space->local_remaining < nzk) {
134178910aadSHong Zhang i = mbs - k + 1; /* num of unfactored rows */
1342f91af8c7SBarry Smith i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
13439566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(i, ¤t_space));
134478910aadSHong Zhang reallocs++;
134578910aadSHong Zhang }
134678910aadSHong Zhang
134778910aadSHong Zhang /* copy data into free space, then initialize lnk */
13489566063dSJacob Faibussowitsch PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
134978910aadSHong Zhang
135078910aadSHong Zhang /* add the k-th row into il and jl */
135178910aadSHong Zhang if (nzk - 1 > 0) {
135278910aadSHong Zhang i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
13539371c9d4SSatish Balay jl[k] = jl[i];
13549371c9d4SSatish Balay jl[i] = k;
135578910aadSHong Zhang il[k] = ui[k] + 1;
135678910aadSHong Zhang }
135778910aadSHong Zhang ui_ptr[k] = current_space->array;
135878910aadSHong Zhang current_space->array += nzk;
135978910aadSHong Zhang current_space->local_used += nzk;
136078910aadSHong Zhang current_space->local_remaining -= nzk;
136178910aadSHong Zhang
136278910aadSHong Zhang ui[k + 1] = ui[k] + nzk;
136378910aadSHong Zhang }
136478910aadSHong Zhang
13659566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(perm, &rip));
13669566063dSJacob Faibussowitsch PetscCall(PetscFree4(ui_ptr, il, jl, cols));
136778910aadSHong Zhang
13689263d837SHong Zhang /* copy free_space into uj and free free_space; set uj in new datastructure; */
13699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
13709566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
13719566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt));
137278910aadSHong Zhang
137378910aadSHong Zhang /* put together the new matrix in MATSEQSBAIJ format */
1374719d5645SBarry Smith B = fact;
13759566063dSJacob Faibussowitsch PetscCall(MatSeqSBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
137678910aadSHong Zhang
137778910aadSHong Zhang b = (Mat_SeqSBAIJ *)B->data;
1378e6b907acSBarry Smith b->free_a = PETSC_TRUE;
1379e6b907acSBarry Smith b->free_ij = PETSC_TRUE;
138026fbe8dcSKarl Rupp
13819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));
138226fbe8dcSKarl Rupp
138378910aadSHong Zhang b->j = uj;
138478910aadSHong Zhang b->i = ui;
1385f4259b30SLisandro Dalcin b->diag = NULL;
1386f4259b30SLisandro Dalcin b->ilen = NULL;
1387f4259b30SLisandro Dalcin b->imax = NULL;
138878910aadSHong Zhang b->row = perm;
138978910aadSHong Zhang b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
139026fbe8dcSKarl Rupp
13919566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm));
139278910aadSHong Zhang b->icol = perm;
13939566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)perm));
13949566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
139578910aadSHong Zhang b->maxnz = b->nz = ui[mbs];
139678910aadSHong Zhang
139778910aadSHong Zhang B->info.factor_mallocs = reallocs;
139878910aadSHong Zhang B->info.fill_ratio_given = fill;
139975567043SBarry Smith if (ai[mbs] != 0.) {
140095b5ac22SHong Zhang /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */
140195b5ac22SHong Zhang B->info.fill_ratio_needed = ((PetscReal)2 * ui[mbs]) / (ai[mbs] + mbs);
140278910aadSHong Zhang } else {
140378910aadSHong Zhang B->info.fill_ratio_needed = 0.0;
140478910aadSHong Zhang }
14059263d837SHong Zhang #if defined(PETSC_USE_INFO)
14069263d837SHong Zhang if (ai[mbs] != 0.) {
14079263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed;
14089566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
14099566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
14109566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
14119263d837SHong Zhang } else {
14129566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n"));
14139263d837SHong Zhang }
14149263d837SHong Zhang #endif
141578910aadSHong Zhang if (perm_identity) {
14166ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
141778910aadSHong Zhang } else {
14186ad2eaddSHong Zhang B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
141978910aadSHong Zhang }
14203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1421c05c3958SHong Zhang }
1422c8342467SHong Zhang
MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)1423d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A, Vec bb, Vec xx)
1424d71ae5a4SJacob Faibussowitsch {
14251a83e813SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14261a83e813SShri Abhyankar const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
14271a83e813SShri Abhyankar PetscInt i, k, n = a->mbs;
14281a83e813SShri Abhyankar PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2;
1429d9ca1df4SBarry Smith const MatScalar *aa = a->a, *v;
1430d9ca1df4SBarry Smith PetscScalar *x, *s, *t, *ls;
1431d9ca1df4SBarry Smith const PetscScalar *b;
14321a83e813SShri Abhyankar
14331a83e813SShri Abhyankar PetscFunctionBegin;
1434ce6b3536SJed Brown PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
14359566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
14369566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
14371a83e813SShri Abhyankar t = a->solve_work;
14381a83e813SShri Abhyankar
14391a83e813SShri Abhyankar /* forward solve the lower triangular */
14409566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */
14411a83e813SShri Abhyankar
14421a83e813SShri Abhyankar for (i = 1; i < n; i++) {
14431a83e813SShri Abhyankar v = aa + bs2 * ai[i];
14441a83e813SShri Abhyankar vi = aj + ai[i];
14451a83e813SShri Abhyankar nz = ai[i + 1] - ai[i];
14461a83e813SShri Abhyankar s = t + bs * i;
14479566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */
14481a83e813SShri Abhyankar for (k = 0; k < nz; k++) {
144996b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[k]);
14501a83e813SShri Abhyankar v += bs2;
14511a83e813SShri Abhyankar }
14521a83e813SShri Abhyankar }
14531a83e813SShri Abhyankar
14541a83e813SShri Abhyankar /* backward solve the upper triangular */
14551a83e813SShri Abhyankar ls = a->solve_work + A->cmap->n;
14561a83e813SShri Abhyankar for (i = n - 1; i >= 0; i--) {
14571a83e813SShri Abhyankar v = aa + bs2 * (adiag[i + 1] + 1);
14581a83e813SShri Abhyankar vi = aj + adiag[i + 1] + 1;
14591a83e813SShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1;
14609566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ls, t + i * bs, bs));
14611a83e813SShri Abhyankar for (k = 0; k < nz; k++) {
146296b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[k]);
14631a83e813SShri Abhyankar v += bs2;
14641a83e813SShri Abhyankar }
146596b95a6bSBarry Smith PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * adiag[i], t + i * bs); /* *inv(diagonal[i]) */
14669566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs));
14671a83e813SShri Abhyankar }
14681a83e813SShri Abhyankar
14699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
14709566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
14719566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
14723ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
14731a83e813SShri Abhyankar }
14741a83e813SShri Abhyankar
MatSolve_SeqBAIJ_N(Mat A,Vec bb,Vec xx)1475d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqBAIJ_N(Mat A, Vec bb, Vec xx)
1476d71ae5a4SJacob Faibussowitsch {
147735aa4fcfSShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
147835aa4fcfSShri Abhyankar IS iscol = a->col, isrow = a->row;
147935aa4fcfSShri Abhyankar const PetscInt *r, *c, *rout, *cout, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
148035aa4fcfSShri Abhyankar PetscInt i, m, n = a->mbs;
148135aa4fcfSShri Abhyankar PetscInt nz, bs = A->rmap->bs, bs2 = a->bs2;
1482d9ca1df4SBarry Smith const MatScalar *aa = a->a, *v;
1483d9ca1df4SBarry Smith PetscScalar *x, *s, *t, *ls;
1484d9ca1df4SBarry Smith const PetscScalar *b;
148535aa4fcfSShri Abhyankar
148635aa4fcfSShri Abhyankar PetscFunctionBegin;
1487ce6b3536SJed Brown PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
14889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(bb, &b));
14899566063dSJacob Faibussowitsch PetscCall(VecGetArray(xx, &x));
149035aa4fcfSShri Abhyankar t = a->solve_work;
149135aa4fcfSShri Abhyankar
14929371c9d4SSatish Balay PetscCall(ISGetIndices(isrow, &rout));
14939371c9d4SSatish Balay r = rout;
14949371c9d4SSatish Balay PetscCall(ISGetIndices(iscol, &cout));
14959371c9d4SSatish Balay c = cout;
149635aa4fcfSShri Abhyankar
149735aa4fcfSShri Abhyankar /* forward solve the lower triangular */
14989566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(t, b + bs * r[0], bs));
149935aa4fcfSShri Abhyankar for (i = 1; i < n; i++) {
150035aa4fcfSShri Abhyankar v = aa + bs2 * ai[i];
150135aa4fcfSShri Abhyankar vi = aj + ai[i];
150235aa4fcfSShri Abhyankar nz = ai[i + 1] - ai[i];
150335aa4fcfSShri Abhyankar s = t + bs * i;
15049566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(s, b + bs * r[i], bs));
150535aa4fcfSShri Abhyankar for (m = 0; m < nz; m++) {
150696b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[m]);
150735aa4fcfSShri Abhyankar v += bs2;
150835aa4fcfSShri Abhyankar }
150935aa4fcfSShri Abhyankar }
151035aa4fcfSShri Abhyankar
151135aa4fcfSShri Abhyankar /* backward solve the upper triangular */
151235aa4fcfSShri Abhyankar ls = a->solve_work + A->cmap->n;
151335aa4fcfSShri Abhyankar for (i = n - 1; i >= 0; i--) {
151435aa4fcfSShri Abhyankar v = aa + bs2 * (adiag[i + 1] + 1);
151535aa4fcfSShri Abhyankar vi = aj + adiag[i + 1] + 1;
151635aa4fcfSShri Abhyankar nz = adiag[i] - adiag[i + 1] - 1;
15179566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(ls, t + i * bs, bs));
151835aa4fcfSShri Abhyankar for (m = 0; m < nz; m++) {
151996b95a6bSBarry Smith PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[m]);
152035aa4fcfSShri Abhyankar v += bs2;
152135aa4fcfSShri Abhyankar }
152296b95a6bSBarry Smith PetscKernel_w_gets_A_times_v(bs, ls, v, t + i * bs); /* *inv(diagonal[i]) */
15239566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(x + bs * c[i], t + i * bs, bs));
152435aa4fcfSShri Abhyankar }
15259566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &rout));
15269566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(iscol, &cout));
15279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(bb, &b));
15289566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(xx, &x));
15299566063dSJacob Faibussowitsch PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
15303ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
153135aa4fcfSShri Abhyankar }
1532