xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
1c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
3c6db04a5SJed Brown #include <petscbt.h>
4c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
53b2fbd54SBarry Smith 
MatFactorGetSolverType_petsc(Mat A,MatSolverType * type)6d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7d71ae5a4SJacob Faibussowitsch {
84ac6704cSBarry Smith   PetscFunctionBegin;
94ac6704cSBarry Smith   *type = MATSOLVERPETSC;
103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
114ac6704cSBarry Smith }
124ac6704cSBarry Smith 
MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat * B)13d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14d71ae5a4SJacob Faibussowitsch {
15d0f46423SBarry Smith   PetscInt n = A->rmap->n;
16b24902e0SBarry Smith 
17b24902e0SBarry Smith   PetscFunctionBegin;
18*fc2fb351SPierre Jolivet   if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
1903e5aca4SStefano Zampini     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
2003e5aca4SStefano Zampini     *B = NULL;
2103e5aca4SStefano Zampini     PetscFunctionReturn(PETSC_SUCCESS);
2203e5aca4SStefano Zampini   }
2303e5aca4SStefano Zampini 
249566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
259566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, n, n, n, n));
26599ef60dSHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
279566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQAIJ));
282205254eSKarl Rupp 
2935233d99SShri Abhyankar     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
3035233d99SShri Abhyankar     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
312205254eSKarl Rupp 
329566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
339566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
349566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
359566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
36b24902e0SBarry Smith   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
379566063dSJacob Faibussowitsch     PetscCall(MatSetType(*B, MATSEQSBAIJ));
389566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
392205254eSKarl Rupp 
4035233d99SShri Abhyankar     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
4135233d99SShri Abhyankar     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
429566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
439566063dSJacob Faibussowitsch     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
44e32f2f54SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
45d5f3da31SBarry Smith   (*B)->factortype = ftype;
4600c67f3bSHong Zhang 
479566063dSJacob Faibussowitsch   PetscCall(PetscFree((*B)->solvertype));
489566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
49f73b0415SBarry Smith   (*B)->canuseordering = PETSC_TRUE;
509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
513ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
52b24902e0SBarry Smith }
53b24902e0SBarry Smith 
MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)54d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
55d71ae5a4SJacob Faibussowitsch {
56ce3d78c0SShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
57ce3d78c0SShri Abhyankar   IS                 isicol;
588758e1faSBarry Smith   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
598758e1faSBarry Smith   PetscInt           i, n = A->rmap->n;
608758e1faSBarry Smith   PetscInt          *bi, *bj;
61ce3d78c0SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
62ce3d78c0SShri Abhyankar   PetscReal          f;
63ce3d78c0SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
640298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
65ce3d78c0SShri Abhyankar   PetscBT            lnkbt;
66421480d9SBarry Smith   PetscBool          diagDense;
67ce3d78c0SShri Abhyankar 
68ce3d78c0SShri Abhyankar   PetscFunctionBegin;
6908401ef6SPierre Jolivet   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
70421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
71421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
72ece542f9SHong Zhang 
739566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
749566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
759566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
76ce3d78c0SShri Abhyankar 
771bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
789f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
80a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
819b48462bSShri Abhyankar 
829b48462bSShri Abhyankar   /* linked list for storing column indices of the active row */
839b48462bSShri Abhyankar   nlnk = n + 1;
849566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
859b48462bSShri Abhyankar 
869566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
879b48462bSShri Abhyankar 
889b48462bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
899b48462bSShri Abhyankar   f = info->fill;
90aa91b3e7SRichard Tran Mills   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
919566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
929b48462bSShri Abhyankar   current_space = free_space;
939b48462bSShri Abhyankar 
949b48462bSShri Abhyankar   for (i = 0; i < n; i++) {
959b48462bSShri Abhyankar     /* copy previous fill into linked list */
969b48462bSShri Abhyankar     nzi   = 0;
979b48462bSShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
989b48462bSShri Abhyankar     ajtmp = aj + ai[r[i]];
999566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
1009b48462bSShri Abhyankar     nzi += nlnk;
1019b48462bSShri Abhyankar 
1029b48462bSShri Abhyankar     /* add pivot rows into linked list */
1039b48462bSShri Abhyankar     row = lnk[n];
1049b48462bSShri Abhyankar     while (row < i) {
1059b48462bSShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
1069b48462bSShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
1079566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
1089b48462bSShri Abhyankar       nzi += nlnk;
1099b48462bSShri Abhyankar       row = lnk[row];
1109b48462bSShri Abhyankar     }
1119b48462bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1129b48462bSShri Abhyankar     im[i]     = nzi;
1139b48462bSShri Abhyankar 
1149b48462bSShri Abhyankar     /* mark bdiag */
1159b48462bSShri Abhyankar     nzbd = 0;
1169b48462bSShri Abhyankar     nnz  = nzi;
1179b48462bSShri Abhyankar     k    = lnk[n];
1189b48462bSShri Abhyankar     while (nnz-- && k < i) {
1199b48462bSShri Abhyankar       nzbd++;
1209b48462bSShri Abhyankar       k = lnk[k];
1219b48462bSShri Abhyankar     }
1229b48462bSShri Abhyankar     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
1239b48462bSShri Abhyankar 
1249b48462bSShri Abhyankar     /* if free space is not available, make more free space */
1259b48462bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1262f18eb33SBarry Smith       /* estimated additional space needed */
1271df4278eSBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
1289566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1299b48462bSShri Abhyankar       reallocs++;
1309b48462bSShri Abhyankar     }
1319b48462bSShri Abhyankar 
1329b48462bSShri Abhyankar     /* copy data into free space, then initialize lnk */
1339566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
1342205254eSKarl Rupp 
1359b48462bSShri Abhyankar     bi_ptr[i] = current_space->array;
1369b48462bSShri Abhyankar     current_space->array += nzi;
1379b48462bSShri Abhyankar     current_space->local_used += nzi;
1389b48462bSShri Abhyankar     current_space->local_remaining -= nzi;
1399b48462bSShri Abhyankar   }
1409b48462bSShri Abhyankar 
1419566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
1429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1439b48462bSShri Abhyankar 
1449263d837SHong Zhang   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
14584648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1469566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1479566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
1489566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
1499b48462bSShri Abhyankar 
1509b48462bSShri Abhyankar   /* put together the new matrix */
1519566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
15257508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)B->data;
1539b48462bSShri Abhyankar   b->free_ij = PETSC_TRUE;
1549f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1559f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
1569b48462bSShri Abhyankar   b->j      = bj;
1579b48462bSShri Abhyankar   b->i      = bi;
1589b48462bSShri Abhyankar   b->diag   = bdiag;
159f4259b30SLisandro Dalcin   b->ilen   = NULL;
160f4259b30SLisandro Dalcin   b->imax   = NULL;
1619b48462bSShri Abhyankar   b->row    = isrow;
1629b48462bSShri Abhyankar   b->col    = iscol;
1639566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
1649566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1659b48462bSShri Abhyankar   b->icol = isicol;
16684648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
1679b48462bSShri Abhyankar 
1689b48462bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
1699b48462bSShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
1702205254eSKarl Rupp 
171d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
172f284f12aSHong Zhang   B->info.factor_mallocs   = reallocs;
173f284f12aSHong Zhang   B->info.fill_ratio_given = f;
1749b48462bSShri Abhyankar 
1759b48462bSShri Abhyankar   if (ai[n]) {
176f284f12aSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1779b48462bSShri Abhyankar   } else {
178f284f12aSHong Zhang     B->info.fill_ratio_needed = 0.0;
1799b48462bSShri Abhyankar   }
1809263d837SHong Zhang #if defined(PETSC_USE_INFO)
1819263d837SHong Zhang   if (ai[n] != 0) {
1829263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
1839566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1849566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
1859566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
1869566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
1879263d837SHong Zhang   } else {
1889566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
1899263d837SHong Zhang   }
1909263d837SHong Zhang #endif
19135233d99SShri Abhyankar   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1924d12350bSJunchao Zhang   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1939566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
1943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1959b48462bSShri Abhyankar }
1969b48462bSShri Abhyankar 
1975b5bc046SBarry Smith /*
1985b5bc046SBarry Smith     Trouble in factorization, should we dump the original matrix?
1995b5bc046SBarry Smith */
MatFactorDumpMatrix(Mat A)200d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorDumpMatrix(Mat A)
201d71ae5a4SJacob Faibussowitsch {
202ace3abfcSBarry Smith   PetscBool flg = PETSC_FALSE;
2035b5bc046SBarry Smith 
2045b5bc046SBarry Smith   PetscFunctionBegin;
2059566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
2065b5bc046SBarry Smith   if (flg) {
2075b5bc046SBarry Smith     PetscViewer viewer;
2085b5bc046SBarry Smith     char        filename[PETSC_MAX_PATH_LEN];
2095b5bc046SBarry Smith 
2109566063dSJacob Faibussowitsch     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
2119566063dSJacob Faibussowitsch     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
2129566063dSJacob Faibussowitsch     PetscCall(MatView(A, viewer));
2139566063dSJacob Faibussowitsch     PetscCall(PetscViewerDestroy(&viewer));
2145b5bc046SBarry Smith   }
2153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2165b5bc046SBarry Smith }
2175b5bc046SBarry Smith 
MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)218d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
219d71ae5a4SJacob Faibussowitsch {
220c9c72f8fSHong Zhang   Mat              C = B;
221c9c72f8fSHong Zhang   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
222c9c72f8fSHong Zhang   IS               isrow = b->row, isicol = b->icol;
223c9c72f8fSHong Zhang   const PetscInt  *r, *ic, *ics;
224bbd65245SShri Abhyankar   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
225bbd65245SShri Abhyankar   PetscInt         i, j, k, nz, nzL, row, *pj;
226bbd65245SShri Abhyankar   const PetscInt  *ajtmp, *bjtmp;
227bbd65245SShri Abhyankar   MatScalar       *rtmp, *pc, multiplier, *pv;
228b65878eeSJunchao Zhang   const MatScalar *aa, *v;
229b65878eeSJunchao Zhang   MatScalar       *ba;
230ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
231d90ac83dSHong Zhang   FactorShiftCtx   sctx;
2324f81c4b7SBarry Smith   const PetscInt  *ddiag;
233d4ad06f7SHong Zhang   PetscReal        rs;
234d4ad06f7SHong Zhang   MatScalar        d;
235d4ad06f7SHong Zhang 
236c9c72f8fSHong Zhang   PetscFunctionBegin;
237b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
238b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
239d6e5152cSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
2409566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
241d4ad06f7SHong Zhang 
242f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
243421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
244d4ad06f7SHong Zhang     sctx.shift_top = info->zeropivot;
245d4ad06f7SHong Zhang     for (i = 0; i < n; i++) {
246d4ad06f7SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
247421480d9SBarry Smith       d  = aa[ddiag[i]];
248d4ad06f7SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
249d4ad06f7SHong Zhang       v  = aa + ai[i];
250d4ad06f7SHong Zhang       nz = ai[i + 1] - ai[i];
2512205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
252d4ad06f7SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
253d4ad06f7SHong Zhang     }
254d4ad06f7SHong Zhang     sctx.shift_top *= 1.1;
255d4ad06f7SHong Zhang     sctx.nshift_max = 5;
256d4ad06f7SHong Zhang     sctx.shift_lo   = 0.;
257d4ad06f7SHong Zhang     sctx.shift_hi   = 1.;
258d4ad06f7SHong Zhang   }
259d4ad06f7SHong Zhang 
2609566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
2619566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
2629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
263c9c72f8fSHong Zhang   ics = ic;
264c9c72f8fSHong Zhang 
265d4ad06f7SHong Zhang   do {
26607b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
267c9c72f8fSHong Zhang     for (i = 0; i < n; i++) {
268c9c72f8fSHong Zhang       /* zero rtmp */
269c9c72f8fSHong Zhang       /* L part */
270c9c72f8fSHong Zhang       nz    = bi[i + 1] - bi[i];
271c9c72f8fSHong Zhang       bjtmp = bj + bi[i];
272c9c72f8fSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
273c9c72f8fSHong Zhang 
274c9c72f8fSHong Zhang       /* U part */
27562fbd6afSShri Abhyankar       nz    = bdiag[i] - bdiag[i + 1];
27662fbd6afSShri Abhyankar       bjtmp = bj + bdiag[i + 1] + 1;
277813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
278813a1f2bSShri Abhyankar 
279813a1f2bSShri Abhyankar       /* load in initial (unfactored row) */
280813a1f2bSShri Abhyankar       nz    = ai[r[i] + 1] - ai[r[i]];
281813a1f2bSShri Abhyankar       ajtmp = aj + ai[r[i]];
282813a1f2bSShri Abhyankar       v     = aa + ai[r[i]];
283ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
284813a1f2bSShri Abhyankar       /* ZeropivotApply() */
28598a93161SHong Zhang       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
286813a1f2bSShri Abhyankar 
287813a1f2bSShri Abhyankar       /* elimination */
288813a1f2bSShri Abhyankar       bjtmp = bj + bi[i];
289813a1f2bSShri Abhyankar       row   = *bjtmp++;
290813a1f2bSShri Abhyankar       nzL   = bi[i + 1] - bi[i];
291f268cba8SShri Abhyankar       for (k = 0; k < nzL; k++) {
292813a1f2bSShri Abhyankar         pc = rtmp + row;
293813a1f2bSShri Abhyankar         if (*pc != 0.0) {
294b65878eeSJunchao Zhang           pv         = ba + bdiag[row];
295813a1f2bSShri Abhyankar           multiplier = *pc * (*pv);
296813a1f2bSShri Abhyankar           *pc        = multiplier;
2972205254eSKarl Rupp 
29862fbd6afSShri Abhyankar           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
299b65878eeSJunchao Zhang           pv = ba + bdiag[row + 1] + 1;
30062fbd6afSShri Abhyankar           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
3012205254eSKarl Rupp 
302813a1f2bSShri Abhyankar           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
3039566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
304813a1f2bSShri Abhyankar         }
305f268cba8SShri Abhyankar         row = *bjtmp++;
306813a1f2bSShri Abhyankar       }
307813a1f2bSShri Abhyankar 
308813a1f2bSShri Abhyankar       /* finished row so stick it into b->a */
309813a1f2bSShri Abhyankar       rs = 0.0;
310813a1f2bSShri Abhyankar       /* L part */
311b65878eeSJunchao Zhang       pv = ba + bi[i];
312813a1f2bSShri Abhyankar       pj = b->j + bi[i];
313813a1f2bSShri Abhyankar       nz = bi[i + 1] - bi[i];
314813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
3159371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
3169371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
317813a1f2bSShri Abhyankar       }
318813a1f2bSShri Abhyankar 
319813a1f2bSShri Abhyankar       /* U part */
320b65878eeSJunchao Zhang       pv = ba + bdiag[i + 1] + 1;
32162fbd6afSShri Abhyankar       pj = b->j + bdiag[i + 1] + 1;
32262fbd6afSShri Abhyankar       nz = bdiag[i] - bdiag[i + 1] - 1;
323813a1f2bSShri Abhyankar       for (j = 0; j < nz; j++) {
3249371c9d4SSatish Balay         pv[j] = rtmp[pj[j]];
3259371c9d4SSatish Balay         rs += PetscAbsScalar(pv[j]);
326813a1f2bSShri Abhyankar       }
327813a1f2bSShri Abhyankar 
328813a1f2bSShri Abhyankar       sctx.rs = rs;
329813a1f2bSShri Abhyankar       sctx.pv = rtmp[i];
3309566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
33107b50cabSHong Zhang       if (sctx.newshift) break; /* break for-loop */
33207b50cabSHong Zhang       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
333813a1f2bSShri Abhyankar 
334a5b23f4aSJose E. Roman       /* Mark diagonal and invert diagonal for simpler triangular solves */
335b65878eeSJunchao Zhang       pv  = ba + bdiag[i];
336813a1f2bSShri Abhyankar       *pv = 1.0 / rtmp[i];
337813a1f2bSShri Abhyankar 
338813a1f2bSShri Abhyankar     } /* endof for (i=0; i<n; i++) { */
339813a1f2bSShri Abhyankar 
3408ff23777SHong Zhang     /* MatPivotRefine() */
34107b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
342813a1f2bSShri Abhyankar       /*
343813a1f2bSShri Abhyankar        * if no shift in this attempt & shifting & started shifting & can refine,
344813a1f2bSShri Abhyankar        * then try lower shift
345813a1f2bSShri Abhyankar        */
346813a1f2bSShri Abhyankar       sctx.shift_hi       = sctx.shift_fraction;
347813a1f2bSShri Abhyankar       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
348813a1f2bSShri Abhyankar       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
34907b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
350813a1f2bSShri Abhyankar       sctx.nshift++;
351813a1f2bSShri Abhyankar     }
35207b50cabSHong Zhang   } while (sctx.newshift);
353813a1f2bSShri Abhyankar 
354b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
355b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
356b65878eeSJunchao Zhang 
3579566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
3589566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
3599566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
360d3ac4fa3SBarry Smith 
3619566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
3629566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
3634d12350bSJunchao Zhang   if (b->inode.size_csr) {
364abb87a52SBarry Smith     C->ops->solve = MatSolve_SeqAIJ_Inode;
365abb87a52SBarry Smith   } else if (row_identity && col_identity) {
36635233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
367813a1f2bSShri Abhyankar   } else {
36835233d99SShri Abhyankar     C->ops->solve = MatSolve_SeqAIJ;
369813a1f2bSShri Abhyankar   }
37035233d99SShri Abhyankar   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
37135233d99SShri Abhyankar   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
37235233d99SShri Abhyankar   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
37335233d99SShri Abhyankar   C->ops->matsolve          = MatMatSolve_SeqAIJ;
374a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
375813a1f2bSShri Abhyankar   C->assembled              = PETSC_TRUE;
376813a1f2bSShri Abhyankar   C->preallocated           = PETSC_TRUE;
3772205254eSKarl Rupp 
3789566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
379813a1f2bSShri Abhyankar 
38014d2772eSHong Zhang   /* MatShiftView(A,info,&sctx) */
381813a1f2bSShri Abhyankar   if (sctx.nshift) {
382f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
3839566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
384f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
3859566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
386f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
3879566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
388813a1f2bSShri Abhyankar     }
389813a1f2bSShri Abhyankar   }
3903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
391813a1f2bSShri Abhyankar }
392813a1f2bSShri Abhyankar 
393ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
394ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
395ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
396ff6a9541SJacob Faibussowitsch 
MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)397d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
398d71ae5a4SJacob Faibussowitsch {
399719d5645SBarry Smith   Mat              C = B;
400416022c9SBarry Smith   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
40182bf6240SBarry Smith   IS               isrow = b->row, isicol = b->icol;
4025d0c19d7SBarry Smith   const PetscInt  *r, *ic, *ics;
403d278edc7SBarry Smith   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
404d278edc7SBarry Smith   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
405421480d9SBarry Smith   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
406d278edc7SBarry Smith   MatScalar       *pv, *rtmp, *pc, multiplier, d;
407b65878eeSJunchao Zhang   const MatScalar *v, *aa;
408b65878eeSJunchao Zhang   MatScalar       *ba;
40933f9893dSHong Zhang   PetscReal        rs = 0.0;
410d90ac83dSHong Zhang   FactorShiftCtx   sctx;
411ace3abfcSBarry Smith   PetscBool        row_identity, col_identity;
412289bc588SBarry Smith 
4133a40ed3dSBarry Smith   PetscFunctionBegin;
414421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
415421480d9SBarry Smith 
416b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
417b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
418504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
4199566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
420ada7143aSSatish Balay 
421f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
422421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
4239f7d0b68SHong Zhang     sctx.shift_top = info->zeropivot;
4246cc28720Svictorle     for (i = 0; i < n; i++) {
4259f95998fSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
426421480d9SBarry Smith       d  = aa[ddiag[i]];
4271a890ab1SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
4289f7d0b68SHong Zhang       v  = aa + ai[i];
4299f7d0b68SHong Zhang       nz = ai[i + 1] - ai[i];
4302205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
4311a890ab1SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
4326cc28720Svictorle     }
433118f5789SBarry Smith     sctx.shift_top *= 1.1;
434d2276718SHong Zhang     sctx.nshift_max = 5;
435d2276718SHong Zhang     sctx.shift_lo   = 0.;
436d2276718SHong Zhang     sctx.shift_hi   = 1.;
437a0a356efSHong Zhang   }
438a0a356efSHong Zhang 
4399566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
4409566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
4419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
442504a3fadSHong Zhang   ics = ic;
443504a3fadSHong Zhang 
444085256b3SBarry Smith   do {
44507b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
446289bc588SBarry Smith     for (i = 0; i < n; i++) {
447b3bf805bSHong Zhang       nz    = bi[i + 1] - bi[i];
448b3bf805bSHong Zhang       bjtmp = bj + bi[i];
4499f7d0b68SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
450289bc588SBarry Smith 
451289bc588SBarry Smith       /* load in initial (unfactored row) */
4529f7d0b68SHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
4539f7d0b68SHong Zhang       ajtmp = aj + ai[r[i]];
4549f7d0b68SHong Zhang       v     = aa + ai[r[i]];
455ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
456d2276718SHong Zhang       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
457289bc588SBarry Smith 
458b3bf805bSHong Zhang       row = *bjtmp++;
459f2582111SSatish Balay       while (row < i) {
4608c37ef55SBarry Smith         pc = rtmp + row;
4618c37ef55SBarry Smith         if (*pc != 0.0) {
462421480d9SBarry Smith           pv         = ba + ddiag[row];
463421480d9SBarry Smith           pj         = b->j + ddiag[row] + 1;
46435aab85fSBarry Smith           multiplier = *pc / *pv++;
4658c37ef55SBarry Smith           *pc        = multiplier;
466421480d9SBarry Smith           nz         = bi[row + 1] - ddiag[row] - 1;
4679f7d0b68SHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
4689566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
469289bc588SBarry Smith         }
470b3bf805bSHong Zhang         row = *bjtmp++;
471289bc588SBarry Smith       }
472416022c9SBarry Smith       /* finished row so stick it into b->a */
473b65878eeSJunchao Zhang       pv   = ba + bi[i];
474b3bf805bSHong Zhang       pj   = b->j + bi[i];
475b3bf805bSHong Zhang       nz   = bi[i + 1] - bi[i];
476421480d9SBarry Smith       diag = ddiag[i] - bi[i];
477e57ff13aSBarry Smith       rs   = 0.0;
478d3d32019SBarry Smith       for (j = 0; j < nz; j++) {
4799f7d0b68SHong Zhang         pv[j] = rtmp[pj[j]];
4809f7d0b68SHong Zhang         rs += PetscAbsScalar(pv[j]);
481d3d32019SBarry Smith       }
482e57ff13aSBarry Smith       rs -= PetscAbsScalar(pv[diag]);
483d2276718SHong Zhang 
484d2276718SHong Zhang       sctx.rs = rs;
485d2276718SHong Zhang       sctx.pv = pv[diag];
4869566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
48707b50cabSHong Zhang       if (sctx.newshift) break;
488504a3fadSHong Zhang       pv[diag] = sctx.pv;
48935aab85fSBarry Smith     }
490d2276718SHong Zhang 
49107b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
4926cc28720Svictorle       /*
4936c037d1bSvictorle        * if no shift in this attempt & shifting & started shifting & can refine,
4946cc28720Svictorle        * then try lower shift
4956cc28720Svictorle        */
4960481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
4970481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
4980481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
49907b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
500d2276718SHong Zhang       sctx.nshift++;
5016cc28720Svictorle     }
50207b50cabSHong Zhang   } while (sctx.newshift);
5030f11f9aeSSatish Balay 
504a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
505421480d9SBarry Smith   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
5069566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
5079566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
5089566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
509b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
510b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
511d3ac4fa3SBarry Smith 
5129566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
5139566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isicol, &col_identity));
5148d8c7c43SBarry Smith   if (row_identity && col_identity) {
515ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
5168d8c7c43SBarry Smith   } else {
517ad04f41aSHong Zhang     C->ops->solve = MatSolve_SeqAIJ_inplace;
518db4efbfdSBarry Smith   }
519ad04f41aSHong Zhang   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
520ad04f41aSHong Zhang   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
521ad04f41aSHong Zhang   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
522ad04f41aSHong Zhang   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
523a3d9026eSPierre Jolivet   C->ops->matsolvetranspose = NULL;
5242205254eSKarl Rupp 
525c456f294SBarry Smith   C->assembled    = PETSC_TRUE;
5265c9eb25fSBarry Smith   C->preallocated = PETSC_TRUE;
5272205254eSKarl Rupp 
5289566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->cmap->n));
529d2276718SHong Zhang   if (sctx.nshift) {
530f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
5319566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
532f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
5339566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
5346cc28720Svictorle     }
5350697b6caSHong Zhang   }
53657508eceSPierre Jolivet   C->ops->solve          = MatSolve_SeqAIJ_inplace;
53757508eceSPierre Jolivet   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
5382205254eSKarl Rupp 
5399566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode(C));
5403ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
541289bc588SBarry Smith }
5420889b5dcSvictorle 
543ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
544ff6a9541SJacob Faibussowitsch 
545137fb511SHong Zhang /*
546137fb511SHong Zhang    This routine implements inplace ILU(0) with row or/and column permutations.
547137fb511SHong Zhang    Input:
548137fb511SHong Zhang      A - original matrix
549137fb511SHong Zhang    Output;
550137fb511SHong Zhang      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
551137fb511SHong Zhang          a->j (col index) is permuted by the inverse of colperm, then sorted
552137fb511SHong Zhang          a->a reordered accordingly with a->j
553137fb511SHong Zhang          a->diag (ptr to diagonal elements) is updated.
554137fb511SHong Zhang */
MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo * info)555d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
556d71ae5a4SJacob Faibussowitsch {
557b051339dSHong Zhang   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
558b051339dSHong Zhang   IS              isrow = a->row, isicol = a->icol;
5595d0c19d7SBarry Smith   const PetscInt *r, *ic, *ics;
5605d0c19d7SBarry Smith   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
5615d0c19d7SBarry Smith   PetscInt       *ajtmp, nz, row;
562421480d9SBarry Smith   PetscInt        nbdiag, *pj;
563a77337e4SBarry Smith   PetscScalar    *rtmp, *pc, multiplier, d;
564504a3fadSHong Zhang   MatScalar      *pv, *v;
565137fb511SHong Zhang   PetscReal       rs;
566d90ac83dSHong Zhang   FactorShiftCtx  sctx;
567b65878eeSJunchao Zhang   MatScalar      *aa, *vtmp;
568421480d9SBarry Smith   PetscInt       *diag;
569137fb511SHong Zhang 
570137fb511SHong Zhang   PetscFunctionBegin;
57108401ef6SPierre Jolivet   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
572504a3fadSHong Zhang 
573421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
574b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArray(A, &aa));
575504a3fadSHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
5769566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
577504a3fadSHong Zhang 
578504a3fadSHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
579421480d9SBarry Smith     const PetscInt *ddiag;
580421480d9SBarry Smith 
581421480d9SBarry Smith     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
582504a3fadSHong Zhang     sctx.shift_top = info->zeropivot;
583504a3fadSHong Zhang     for (i = 0; i < n; i++) {
584504a3fadSHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
585421480d9SBarry Smith       d    = aa[ddiag[i]];
586504a3fadSHong Zhang       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
587504a3fadSHong Zhang       vtmp = aa + ai[i];
588504a3fadSHong Zhang       nz   = ai[i + 1] - ai[i];
5892205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
590504a3fadSHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
591504a3fadSHong Zhang     }
592504a3fadSHong Zhang     sctx.shift_top *= 1.1;
593504a3fadSHong Zhang     sctx.nshift_max = 5;
594504a3fadSHong Zhang     sctx.shift_lo   = 0.;
595504a3fadSHong Zhang     sctx.shift_hi   = 1.;
596504a3fadSHong Zhang   }
597504a3fadSHong Zhang 
5989566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
5999566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
6009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &rtmp));
6019566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(rtmp, n + 1));
602b051339dSHong Zhang   ics = ic;
603137fb511SHong Zhang 
604504a3fadSHong Zhang #if defined(MV)
60575567043SBarry Smith   sctx.shift_top      = 0.;
606e60cf9a0SBarry Smith   sctx.nshift_max     = 0;
60775567043SBarry Smith   sctx.shift_lo       = 0.;
60875567043SBarry Smith   sctx.shift_hi       = 0.;
60975567043SBarry Smith   sctx.shift_fraction = 0.;
610137fb511SHong Zhang 
611f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
61275567043SBarry Smith     sctx.shift_top = 0.;
613137fb511SHong Zhang     for (i = 0; i < n; i++) {
614137fb511SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615b65878eeSJunchao Zhang       d  = aa[diag[i]];
616137fb511SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
617b65878eeSJunchao Zhang       v  = aa + ai[i];
618b051339dSHong Zhang       nz = ai[i + 1] - ai[i];
6192205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
620137fb511SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
621137fb511SHong Zhang     }
622137fb511SHong Zhang     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
623137fb511SHong Zhang     sctx.shift_top *= 1.1;
624137fb511SHong Zhang     sctx.nshift_max = 5;
625137fb511SHong Zhang     sctx.shift_lo   = 0.;
626137fb511SHong Zhang     sctx.shift_hi   = 1.;
627137fb511SHong Zhang   }
628137fb511SHong Zhang 
62975567043SBarry Smith   sctx.shift_amount = 0.;
630137fb511SHong Zhang   sctx.nshift       = 0;
631504a3fadSHong Zhang #endif
632504a3fadSHong Zhang 
633137fb511SHong Zhang   do {
63407b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
635137fb511SHong Zhang     for (i = 0; i < n; i++) {
636b051339dSHong Zhang       /* load in initial unfactored row */
637b051339dSHong Zhang       nz    = ai[r[i] + 1] - ai[r[i]];
638b051339dSHong Zhang       ajtmp = aj + ai[r[i]];
639b65878eeSJunchao Zhang       v     = aa + ai[r[i]];
640137fb511SHong Zhang       /* sort permuted ajtmp and values v accordingly */
641b051339dSHong Zhang       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
6429566063dSJacob Faibussowitsch       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
643137fb511SHong Zhang 
644b051339dSHong Zhang       diag[r[i]] = ai[r[i]];
645137fb511SHong Zhang       for (j = 0; j < nz; j++) {
646137fb511SHong Zhang         rtmp[ajtmp[j]] = v[j];
647b051339dSHong Zhang         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
648137fb511SHong Zhang       }
649137fb511SHong Zhang       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
650137fb511SHong Zhang 
651b051339dSHong Zhang       row = *ajtmp++;
652137fb511SHong Zhang       while (row < i) {
653137fb511SHong Zhang         pc = rtmp + row;
654137fb511SHong Zhang         if (*pc != 0.0) {
655b65878eeSJunchao Zhang           pv = aa + diag[r[row]];
656b051339dSHong Zhang           pj = aj + diag[r[row]] + 1;
657137fb511SHong Zhang 
658137fb511SHong Zhang           multiplier = *pc / *pv++;
659137fb511SHong Zhang           *pc        = multiplier;
660b051339dSHong Zhang           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
661b051339dSHong Zhang           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
6629566063dSJacob Faibussowitsch           PetscCall(PetscLogFlops(1 + 2.0 * nz));
663137fb511SHong Zhang         }
664b051339dSHong Zhang         row = *ajtmp++;
665137fb511SHong Zhang       }
666b65878eeSJunchao Zhang       /* finished row so overwrite it onto aa */
667b65878eeSJunchao Zhang       pv     = aa + ai[r[i]];
668b051339dSHong Zhang       pj     = aj + ai[r[i]];
669b051339dSHong Zhang       nz     = ai[r[i] + 1] - ai[r[i]];
670b051339dSHong Zhang       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
671137fb511SHong Zhang 
672137fb511SHong Zhang       rs = 0.0;
673137fb511SHong Zhang       for (j = 0; j < nz; j++) {
674b051339dSHong Zhang         pv[j] = rtmp[pj[j]];
675b051339dSHong Zhang         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
676137fb511SHong Zhang       }
677137fb511SHong Zhang 
678137fb511SHong Zhang       sctx.rs = rs;
679b051339dSHong Zhang       sctx.pv = pv[nbdiag];
6809566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
68107b50cabSHong Zhang       if (sctx.newshift) break;
682504a3fadSHong Zhang       pv[nbdiag] = sctx.pv;
683137fb511SHong Zhang     }
684137fb511SHong Zhang 
68507b50cabSHong Zhang     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
686137fb511SHong Zhang       /*
687137fb511SHong Zhang        * if no shift in this attempt & shifting & started shifting & can refine,
688137fb511SHong Zhang        * then try lower shift
689137fb511SHong Zhang        */
6900481f469SBarry Smith       sctx.shift_hi       = sctx.shift_fraction;
6910481f469SBarry Smith       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
6920481f469SBarry Smith       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
69307b50cabSHong Zhang       sctx.newshift       = PETSC_TRUE;
694137fb511SHong Zhang       sctx.nshift++;
695137fb511SHong Zhang     }
69607b50cabSHong Zhang   } while (sctx.newshift);
697137fb511SHong Zhang 
698a5b23f4aSJose E. Roman   /* invert diagonal entries for simpler triangular solves */
699b65878eeSJunchao Zhang   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
700137fb511SHong Zhang 
701b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArray(A, &aa));
7029566063dSJacob Faibussowitsch   PetscCall(PetscFree(rtmp));
7039566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
7049566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
7052205254eSKarl Rupp 
706b051339dSHong Zhang   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
707ad04f41aSHong Zhang   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
708ad04f41aSHong Zhang   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
709ad04f41aSHong Zhang   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
7102205254eSKarl Rupp 
711b051339dSHong Zhang   A->assembled    = PETSC_TRUE;
7125c9eb25fSBarry Smith   A->preallocated = PETSC_TRUE;
7132205254eSKarl Rupp 
7149566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(A->cmap->n));
715137fb511SHong Zhang   if (sctx.nshift) {
716f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
7179566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
718f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
7199566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
720137fb511SHong Zhang     }
721137fb511SHong Zhang   }
7223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
723137fb511SHong Zhang }
724137fb511SHong Zhang 
MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo * info)725d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
726d71ae5a4SJacob Faibussowitsch {
727416022c9SBarry Smith   Mat C;
728416022c9SBarry Smith 
7293a40ed3dSBarry Smith   PetscFunctionBegin;
7309566063dSJacob Faibussowitsch   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
7319566063dSJacob Faibussowitsch   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
7329566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(C, A, info));
7332205254eSKarl Rupp 
734db4efbfdSBarry Smith   A->ops->solve          = C->ops->solve;
735db4efbfdSBarry Smith   A->ops->solvetranspose = C->ops->solvetranspose;
7362205254eSKarl Rupp 
7379566063dSJacob Faibussowitsch   PetscCall(MatHeaderMerge(A, &C));
7383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
739da3a660dSBarry Smith }
7401d098868SHong Zhang 
MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
742d71ae5a4SJacob Faibussowitsch {
743416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
744416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
7455d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
7465d0c19d7SBarry Smith   PetscInt           nz;
7475d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
748dd6ea824SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
749d9fead3dSBarry Smith   const PetscScalar *b;
750b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
751421480d9SBarry Smith   const PetscInt    *adiag;
7528c37ef55SBarry Smith 
7533a40ed3dSBarry Smith   PetscFunctionBegin;
7543ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
75591bf9adeSBarry Smith 
756421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
757b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
7589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
7599566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
760416022c9SBarry Smith   tmp = a->solve_work;
7618c37ef55SBarry Smith 
7629371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
7639371c9d4SSatish Balay   r = rout;
7649371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
7659371c9d4SSatish Balay   c = cout + (n - 1);
7668c37ef55SBarry Smith 
7678c37ef55SBarry Smith   /* forward solve the lower triangular */
7688c37ef55SBarry Smith   tmp[0] = b[*r++];
769010a20caSHong Zhang   tmps   = tmp;
7708c37ef55SBarry Smith   for (i = 1; i < n; i++) {
771010a20caSHong Zhang     v   = aa + ai[i];
772010a20caSHong Zhang     vi  = aj + ai[i];
773421480d9SBarry Smith     nz  = adiag[i] - ai[i];
77453e7425aSBarry Smith     sum = b[*r++];
775003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
7768c37ef55SBarry Smith     tmp[i] = sum;
7778c37ef55SBarry Smith   }
7788c37ef55SBarry Smith 
7798c37ef55SBarry Smith   /* backward solve the upper triangular */
7808c37ef55SBarry Smith   for (i = n - 1; i >= 0; i--) {
781421480d9SBarry Smith     v   = aa + adiag[i] + 1;
782421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
783421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
7848c37ef55SBarry Smith     sum = tmp[i];
785003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
786421480d9SBarry Smith     x[*c--] = tmp[i] = sum * aa[adiag[i]];
7878c37ef55SBarry Smith   }
788b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
7899566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
7909566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
7919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
7929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
7939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
7943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7958c37ef55SBarry Smith }
796026e39d0SSatish Balay 
MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)797ff6a9541SJacob Faibussowitsch static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
798d71ae5a4SJacob Faibussowitsch {
7992bebee5dSHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
8002bebee5dSHong Zhang   IS                 iscol = a->col, isrow = a->row;
8015d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
802910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
8035d0c19d7SBarry Smith   const PetscInt    *rout, *cout, *r, *c;
804910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
805b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
806910cf402Sprj-   PetscBool          isdense;
807421480d9SBarry Smith   const PetscInt    *adiag;
8082bebee5dSHong Zhang 
8092bebee5dSHong Zhang   PetscFunctionBegin;
8103ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
8119566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
81228b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
813f9fb9879SHong Zhang   if (X != B) {
8149566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
81528b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
816f9fb9879SHong Zhang   }
817421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
818b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
8199566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
8209566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
8219566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
8229566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
8239371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
8249371c9d4SSatish Balay   r = rout;
8259371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
8269371c9d4SSatish Balay   c = cout;
827a36b22ccSSatish Balay   for (neq = 0; neq < B->cmap->n; neq++) {
8282bebee5dSHong Zhang     /* forward solve the lower triangular */
8292bebee5dSHong Zhang     tmp[0] = b[r[0]];
8302bebee5dSHong Zhang     tmps   = tmp;
8312bebee5dSHong Zhang     for (i = 1; i < n; i++) {
8322bebee5dSHong Zhang       v   = aa + ai[i];
8332bebee5dSHong Zhang       vi  = aj + ai[i];
834421480d9SBarry Smith       nz  = adiag[i] - ai[i];
8352bebee5dSHong Zhang       sum = b[r[i]];
836003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
8372bebee5dSHong Zhang       tmp[i] = sum;
8382bebee5dSHong Zhang     }
8392bebee5dSHong Zhang     /* backward solve the upper triangular */
8402bebee5dSHong Zhang     for (i = n - 1; i >= 0; i--) {
841421480d9SBarry Smith       v   = aa + adiag[i] + 1;
842421480d9SBarry Smith       vi  = aj + adiag[i] + 1;
843421480d9SBarry Smith       nz  = ai[i + 1] - adiag[i] - 1;
8442bebee5dSHong Zhang       sum = tmp[i];
845003131ecSBarry Smith       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
846421480d9SBarry Smith       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
8472bebee5dSHong Zhang     }
848910cf402Sprj-     b += ldb;
849910cf402Sprj-     x += ldx;
8502bebee5dSHong Zhang   }
851b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
8529566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
8539566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
8549566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
8559566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
8569566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
8573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
8582bebee5dSHong Zhang }
8592bebee5dSHong Zhang 
MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)860d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
861d71ae5a4SJacob Faibussowitsch {
8629bd0847aSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
8639bd0847aSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
864421480d9SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
865421480d9SBarry Smith   const PetscInt    *adiag;
866910cf402Sprj-   PetscInt           nz, neq, ldb, ldx;
8679bd0847aSShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
868910cf402Sprj-   PetscScalar       *x, *tmp = a->solve_work, sum;
869b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
870910cf402Sprj-   PetscBool          isdense;
8719bd0847aSShri Abhyankar 
8729bd0847aSShri Abhyankar   PetscFunctionBegin;
8733ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
8749566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
87528b400f6SJacob Faibussowitsch   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
876f9fb9879SHong Zhang   if (X != B) {
8779566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
87828b400f6SJacob Faibussowitsch     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
879f9fb9879SHong Zhang   }
880421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
881b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
8829566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArrayRead(B, &b));
8839566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(B, &ldb));
8849566063dSJacob Faibussowitsch   PetscCall(MatDenseGetArray(X, &x));
8859566063dSJacob Faibussowitsch   PetscCall(MatDenseGetLDA(X, &ldx));
8869371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
8879371c9d4SSatish Balay   r = rout;
8889371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
8899371c9d4SSatish Balay   c = cout;
8909bd0847aSShri Abhyankar   for (neq = 0; neq < B->cmap->n; neq++) {
8919bd0847aSShri Abhyankar     /* forward solve the lower triangular */
8929bd0847aSShri Abhyankar     tmp[0] = b[r[0]];
8939bd0847aSShri Abhyankar     v      = aa;
8949bd0847aSShri Abhyankar     vi     = aj;
8959bd0847aSShri Abhyankar     for (i = 1; i < n; i++) {
8969bd0847aSShri Abhyankar       nz  = ai[i + 1] - ai[i];
8979bd0847aSShri Abhyankar       sum = b[r[i]];
8989bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
8999bd0847aSShri Abhyankar       tmp[i] = sum;
9009371c9d4SSatish Balay       v += nz;
9019371c9d4SSatish Balay       vi += nz;
9029bd0847aSShri Abhyankar     }
9039bd0847aSShri Abhyankar     /* backward solve the upper triangular */
9049bd0847aSShri Abhyankar     for (i = n - 1; i >= 0; i--) {
9059bd0847aSShri Abhyankar       v   = aa + adiag[i + 1] + 1;
9069bd0847aSShri Abhyankar       vi  = aj + adiag[i + 1] + 1;
9079bd0847aSShri Abhyankar       nz  = adiag[i] - adiag[i + 1] - 1;
9089bd0847aSShri Abhyankar       sum = tmp[i];
9099bd0847aSShri Abhyankar       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
9109bd0847aSShri Abhyankar       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
9119bd0847aSShri Abhyankar     }
912910cf402Sprj-     b += ldb;
913910cf402Sprj-     x += ldx;
9149bd0847aSShri Abhyankar   }
915b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
9169566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
9179566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
9189566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArrayRead(B, &b));
9199566063dSJacob Faibussowitsch   PetscCall(MatDenseRestoreArray(X, &x));
9209566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
9213ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9229bd0847aSShri Abhyankar }
9239bd0847aSShri Abhyankar 
MatMatSolveTranspose_SeqAIJ(Mat A,Mat B,Mat X)924a3d9026eSPierre Jolivet PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
925a3d9026eSPierre Jolivet {
926a3d9026eSPierre Jolivet   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
927a3d9026eSPierre Jolivet   IS                 iscol = a->col, isrow = a->row;
928421480d9SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
929421480d9SBarry Smith   const PetscInt    *adiag = a->diag;
930a3d9026eSPierre Jolivet   PetscInt           nz, neq, ldb, ldx;
931a3d9026eSPierre Jolivet   const PetscInt    *rout, *cout, *r, *c;
932a3d9026eSPierre Jolivet   PetscScalar       *x, *tmp = a->solve_work, s1;
933b65878eeSJunchao Zhang   const PetscScalar *b, *aa, *v;
934a3d9026eSPierre Jolivet   PetscBool          isdense;
935a3d9026eSPierre Jolivet 
936a3d9026eSPierre Jolivet   PetscFunctionBegin;
937a3d9026eSPierre Jolivet   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
938a3d9026eSPierre Jolivet   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
939a3d9026eSPierre Jolivet   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
940a3d9026eSPierre Jolivet   if (X != B) {
941a3d9026eSPierre Jolivet     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
942a3d9026eSPierre Jolivet     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
943a3d9026eSPierre Jolivet   }
944421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
945b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
946a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArrayRead(B, &b));
947a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(B, &ldb));
948a3d9026eSPierre Jolivet   PetscCall(MatDenseGetArray(X, &x));
949a3d9026eSPierre Jolivet   PetscCall(MatDenseGetLDA(X, &ldx));
950a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(isrow, &rout));
951a3d9026eSPierre Jolivet   r = rout;
952a3d9026eSPierre Jolivet   PetscCall(ISGetIndices(iscol, &cout));
953a3d9026eSPierre Jolivet   c = cout;
954a3d9026eSPierre Jolivet   for (neq = 0; neq < B->cmap->n; neq++) {
955a3d9026eSPierre Jolivet     /* copy the b into temp work space according to permutation */
956a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) tmp[i] = b[c[i]];
957a3d9026eSPierre Jolivet 
958a3d9026eSPierre Jolivet     /* forward solve the U^T */
959a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) {
960a3d9026eSPierre Jolivet       v  = aa + adiag[i + 1] + 1;
961a3d9026eSPierre Jolivet       vi = aj + adiag[i + 1] + 1;
962a3d9026eSPierre Jolivet       nz = adiag[i] - adiag[i + 1] - 1;
963a3d9026eSPierre Jolivet       s1 = tmp[i];
964a3d9026eSPierre Jolivet       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
965a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
966a3d9026eSPierre Jolivet       tmp[i] = s1;
967a3d9026eSPierre Jolivet     }
968a3d9026eSPierre Jolivet 
969a3d9026eSPierre Jolivet     /* backward solve the L^T */
970a3d9026eSPierre Jolivet     for (i = n - 1; i >= 0; i--) {
971a3d9026eSPierre Jolivet       v  = aa + ai[i];
972a3d9026eSPierre Jolivet       vi = aj + ai[i];
973a3d9026eSPierre Jolivet       nz = ai[i + 1] - ai[i];
974a3d9026eSPierre Jolivet       s1 = tmp[i];
975a3d9026eSPierre Jolivet       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
976a3d9026eSPierre Jolivet     }
977a3d9026eSPierre Jolivet 
978a3d9026eSPierre Jolivet     /* copy tmp into x according to permutation */
979a3d9026eSPierre Jolivet     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
980a3d9026eSPierre Jolivet     b += ldb;
981a3d9026eSPierre Jolivet     x += ldx;
982a3d9026eSPierre Jolivet   }
983b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
984a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(isrow, &rout));
985a3d9026eSPierre Jolivet   PetscCall(ISRestoreIndices(iscol, &cout));
986a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArrayRead(B, &b));
987a3d9026eSPierre Jolivet   PetscCall(MatDenseRestoreArray(X, &x));
988a3d9026eSPierre Jolivet   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
989a3d9026eSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
990a3d9026eSPierre Jolivet }
991a3d9026eSPierre Jolivet 
MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)992ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
993d71ae5a4SJacob Faibussowitsch {
994137fb511SHong Zhang   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
995137fb511SHong Zhang   IS                 iscol = a->col, isrow = a->row;
996421480d9SBarry Smith   const PetscInt    *r, *c, *rout, *cout, *adiag;
9975d0c19d7SBarry Smith   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
9985d0c19d7SBarry Smith   PetscInt           nz, row;
999fdc842d1SBarry Smith   PetscScalar       *x, *tmp, *tmps, sum;
1000fdc842d1SBarry Smith   const PetscScalar *b;
1001b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1002137fb511SHong Zhang 
1003137fb511SHong Zhang   PetscFunctionBegin;
10043ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1005137fb511SHong Zhang 
1006421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1007b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
10089566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10099566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1010137fb511SHong Zhang   tmp = a->solve_work;
1011137fb511SHong Zhang 
10129371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
10139371c9d4SSatish Balay   r = rout;
10149371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
10159371c9d4SSatish Balay   c = cout + (n - 1);
1016137fb511SHong Zhang 
1017137fb511SHong Zhang   /* forward solve the lower triangular */
1018137fb511SHong Zhang   tmp[0] = b[*r++];
1019137fb511SHong Zhang   tmps   = tmp;
1020137fb511SHong Zhang   for (row = 1; row < n; row++) {
1021137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1022137fb511SHong Zhang     v   = aa + ai[i];
1023137fb511SHong Zhang     vi  = aj + ai[i];
1024421480d9SBarry Smith     nz  = adiag[i] - ai[i];
1025137fb511SHong Zhang     sum = b[*r++];
1026003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1027137fb511SHong Zhang     tmp[row] = sum;
1028137fb511SHong Zhang   }
1029137fb511SHong Zhang 
1030137fb511SHong Zhang   /* backward solve the upper triangular */
1031137fb511SHong Zhang   for (row = n - 1; row >= 0; row--) {
1032137fb511SHong Zhang     i   = rout[row]; /* permuted row */
1033421480d9SBarry Smith     v   = aa + adiag[i] + 1;
1034421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
1035421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
1036137fb511SHong Zhang     sum = tmp[row];
1037003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1038421480d9SBarry Smith     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1039137fb511SHong Zhang   }
1040b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
10419566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
10429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
10439566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
10449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
10459566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
10463ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1047137fb511SHong Zhang }
1048137fb511SHong Zhang 
1049c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)1050ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1051d71ae5a4SJacob Faibussowitsch {
1052930ae53cSBarry Smith   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1053d0f46423SBarry Smith   PetscInt           n  = A->rmap->n;
1054421480d9SBarry Smith   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1055356650c2SBarry Smith   PetscScalar       *x;
1056356650c2SBarry Smith   const PetscScalar *b;
1057b65878eeSJunchao Zhang   const MatScalar   *aa;
1058aa482453SBarry Smith #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1059356650c2SBarry Smith   PetscInt         adiag_i, i, nz, ai_i;
1060b377110cSBarry Smith   const PetscInt  *vi;
1061dd6ea824SBarry Smith   const MatScalar *v;
1062dd6ea824SBarry Smith   PetscScalar      sum;
1063d85afc42SSatish Balay #endif
1064930ae53cSBarry Smith 
10653a40ed3dSBarry Smith   PetscFunctionBegin;
10663ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1067421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1068b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
10699566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
10709566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1071930ae53cSBarry Smith 
1072aa482453SBarry Smith #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
10731c4feecaSSatish Balay   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
10741c4feecaSSatish Balay #else
1075930ae53cSBarry Smith   /* forward solve the lower triangular */
1076930ae53cSBarry Smith   x[0] = b[0];
1077930ae53cSBarry Smith   for (i = 1; i < n; i++) {
1078930ae53cSBarry Smith     ai_i = ai[i];
1079930ae53cSBarry Smith     v    = aa + ai_i;
1080930ae53cSBarry Smith     vi   = aj + ai_i;
1081930ae53cSBarry Smith     nz   = adiag[i] - ai_i;
1082930ae53cSBarry Smith     sum  = b[i];
1083003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1084930ae53cSBarry Smith     x[i] = sum;
1085930ae53cSBarry Smith   }
1086930ae53cSBarry Smith 
1087930ae53cSBarry Smith   /* backward solve the upper triangular */
1088930ae53cSBarry Smith   for (i = n - 1; i >= 0; i--) {
1089930ae53cSBarry Smith     adiag_i = adiag[i];
1090d708749eSBarry Smith     v       = aa + adiag_i + 1;
1091d708749eSBarry Smith     vi      = aj + adiag_i + 1;
1092930ae53cSBarry Smith     nz      = ai[i + 1] - adiag_i - 1;
1093930ae53cSBarry Smith     sum     = x[i];
1094003131ecSBarry Smith     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1095930ae53cSBarry Smith     x[i] = sum * aa[adiag_i];
1096930ae53cSBarry Smith   }
10971c4feecaSSatish Balay #endif
10989566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1099b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11019566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
11023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1103930ae53cSBarry Smith }
1104930ae53cSBarry Smith 
MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)1105ff6a9541SJacob Faibussowitsch static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1106d71ae5a4SJacob Faibussowitsch {
1107416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1108416022c9SBarry Smith   IS                 iscol = a->col, isrow = a->row;
110904fbf559SBarry Smith   PetscInt           i, n                  = A->rmap->n, j;
11105d0c19d7SBarry Smith   PetscInt           nz;
1111421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
111204fbf559SBarry Smith   PetscScalar       *x, *tmp, sum;
111304fbf559SBarry Smith   const PetscScalar *b;
1114b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1115da3a660dSBarry Smith 
11163a40ed3dSBarry Smith   PetscFunctionBegin;
11179566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
1118da3a660dSBarry Smith 
1119421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1120b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11219566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11229566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1123416022c9SBarry Smith   tmp = a->solve_work;
1124da3a660dSBarry Smith 
11259371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11269371c9d4SSatish Balay   r = rout;
11279371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11289371c9d4SSatish Balay   c = cout + (n - 1);
1129da3a660dSBarry Smith 
1130da3a660dSBarry Smith   /* forward solve the lower triangular */
1131da3a660dSBarry Smith   tmp[0] = b[*r++];
1132da3a660dSBarry Smith   for (i = 1; i < n; i++) {
1133010a20caSHong Zhang     v   = aa + ai[i];
1134010a20caSHong Zhang     vi  = aj + ai[i];
1135421480d9SBarry Smith     nz  = adiag[i] - ai[i];
1136da3a660dSBarry Smith     sum = b[*r++];
113704fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1138da3a660dSBarry Smith     tmp[i] = sum;
1139da3a660dSBarry Smith   }
1140da3a660dSBarry Smith 
1141da3a660dSBarry Smith   /* backward solve the upper triangular */
1142da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1143421480d9SBarry Smith     v   = aa + adiag[i] + 1;
1144421480d9SBarry Smith     vi  = aj + adiag[i] + 1;
1145421480d9SBarry Smith     nz  = ai[i + 1] - adiag[i] - 1;
1146da3a660dSBarry Smith     sum = tmp[i];
114704fbf559SBarry Smith     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1148421480d9SBarry Smith     tmp[i] = sum * aa[adiag[i]];
1149da3a660dSBarry Smith     x[*c--] += tmp[i];
1150da3a660dSBarry Smith   }
1151da3a660dSBarry Smith 
1152b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
11539566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
11549566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
11559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
11569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
11579566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
11583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1159da3a660dSBarry Smith }
116004fbf559SBarry Smith 
MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)1161d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1162d71ae5a4SJacob Faibussowitsch {
11633c0119dfSShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
11643c0119dfSShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
11653c0119dfSShri Abhyankar   PetscInt           i, n                  = A->rmap->n, j;
11663c0119dfSShri Abhyankar   PetscInt           nz;
1167421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
11683c0119dfSShri Abhyankar   PetscScalar       *x, *tmp, sum;
11693c0119dfSShri Abhyankar   const PetscScalar *b;
1170b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
11713c0119dfSShri Abhyankar 
11723c0119dfSShri Abhyankar   PetscFunctionBegin;
11739566063dSJacob Faibussowitsch   if (yy != xx) PetscCall(VecCopy(yy, xx));
11743c0119dfSShri Abhyankar 
1175421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1176b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
11779566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
11789566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
11793c0119dfSShri Abhyankar   tmp = a->solve_work;
11803c0119dfSShri Abhyankar 
11819371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
11829371c9d4SSatish Balay   r = rout;
11839371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
11849371c9d4SSatish Balay   c = cout;
11853c0119dfSShri Abhyankar 
11863c0119dfSShri Abhyankar   /* forward solve the lower triangular */
11873c0119dfSShri Abhyankar   tmp[0] = b[r[0]];
11883c0119dfSShri Abhyankar   v      = aa;
11893c0119dfSShri Abhyankar   vi     = aj;
11903c0119dfSShri Abhyankar   for (i = 1; i < n; i++) {
11913c0119dfSShri Abhyankar     nz  = ai[i + 1] - ai[i];
11923c0119dfSShri Abhyankar     sum = b[r[i]];
11933c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
11943c0119dfSShri Abhyankar     tmp[i] = sum;
11952205254eSKarl Rupp     v += nz;
11962205254eSKarl Rupp     vi += nz;
11973c0119dfSShri Abhyankar   }
11983c0119dfSShri Abhyankar 
11993c0119dfSShri Abhyankar   /* backward solve the upper triangular */
12003c0119dfSShri Abhyankar   v  = aa + adiag[n - 1];
12013c0119dfSShri Abhyankar   vi = aj + adiag[n - 1];
12023c0119dfSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
12033c0119dfSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
12043c0119dfSShri Abhyankar     sum = tmp[i];
12053c0119dfSShri Abhyankar     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
12063c0119dfSShri Abhyankar     tmp[i] = sum * v[nz];
12073c0119dfSShri Abhyankar     x[c[i]] += tmp[i];
12089371c9d4SSatish Balay     v += nz + 1;
12099371c9d4SSatish Balay     vi += nz + 1;
12103c0119dfSShri Abhyankar   }
12113c0119dfSShri Abhyankar 
1212b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
12139566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12149566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
12159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz));
12183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
12193c0119dfSShri Abhyankar }
12203c0119dfSShri Abhyankar 
MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)1221d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1222d71ae5a4SJacob Faibussowitsch {
1223416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
12242235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
122504fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
122604fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
122704fbf559SBarry Smith   PetscInt           nz;
122804fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1229b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
123004fbf559SBarry Smith   const PetscScalar *b;
1231da3a660dSBarry Smith 
12323a40ed3dSBarry Smith   PetscFunctionBegin;
1233b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12359566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1236416022c9SBarry Smith   tmp = a->solve_work;
1237da3a660dSBarry Smith 
12389371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12399371c9d4SSatish Balay   r = rout;
12409371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
12419371c9d4SSatish Balay   c = cout;
1242da3a660dSBarry Smith 
1243da3a660dSBarry Smith   /* copy the b into temp work space according to permutation */
12442235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1245da3a660dSBarry Smith 
1246da3a660dSBarry Smith   /* forward solve the U^T */
1247da3a660dSBarry Smith   for (i = 0; i < n; i++) {
1248010a20caSHong Zhang     v  = aa + diag[i];
1249010a20caSHong Zhang     vi = aj + diag[i] + 1;
1250f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
1251c38d4ed2SBarry Smith     s1 = tmp[i];
1252ef66eb69SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
125304fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1254c38d4ed2SBarry Smith     tmp[i] = s1;
1255da3a660dSBarry Smith   }
1256da3a660dSBarry Smith 
1257da3a660dSBarry Smith   /* backward solve the L^T */
1258da3a660dSBarry Smith   for (i = n - 1; i >= 0; i--) {
1259010a20caSHong Zhang     v  = aa + diag[i] - 1;
1260010a20caSHong Zhang     vi = aj + diag[i] - 1;
1261f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
1262f1af5d2fSBarry Smith     s1 = tmp[i];
126304fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1264da3a660dSBarry Smith   }
1265da3a660dSBarry Smith 
1266da3a660dSBarry Smith   /* copy tmp into x according to permutation */
1267591294e4SBarry Smith   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1268da3a660dSBarry Smith 
12699566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
12709566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1271b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
12729566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
12739566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1274da3a660dSBarry Smith 
12759566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
12763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1277da3a660dSBarry Smith }
1278da3a660dSBarry Smith 
MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)1279d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1280d71ae5a4SJacob Faibussowitsch {
1281d1fa9404SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1282d1fa9404SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1283421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1284d1fa9404SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1285d1fa9404SShri Abhyankar   PetscInt           nz;
1286d1fa9404SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1287b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1288d1fa9404SShri Abhyankar   const PetscScalar *b;
1289d1fa9404SShri Abhyankar 
1290d1fa9404SShri Abhyankar   PetscFunctionBegin;
1291421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1292b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
12939566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
12949566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
1295d1fa9404SShri Abhyankar   tmp = a->solve_work;
1296d1fa9404SShri Abhyankar 
12979371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
12989371c9d4SSatish Balay   r = rout;
12999371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13009371c9d4SSatish Balay   c = cout;
1301d1fa9404SShri Abhyankar 
1302d1fa9404SShri Abhyankar   /* copy the b into temp work space according to permutation */
1303d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1304d1fa9404SShri Abhyankar 
1305d1fa9404SShri Abhyankar   /* forward solve the U^T */
1306d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) {
1307d1fa9404SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1308d1fa9404SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1309d1fa9404SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1310d1fa9404SShri Abhyankar     s1 = tmp[i];
1311d1fa9404SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1312d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1313d1fa9404SShri Abhyankar     tmp[i] = s1;
1314d1fa9404SShri Abhyankar   }
1315d1fa9404SShri Abhyankar 
1316d1fa9404SShri Abhyankar   /* backward solve the L^T */
1317d1fa9404SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1318d1fa9404SShri Abhyankar     v  = aa + ai[i];
1319d1fa9404SShri Abhyankar     vi = aj + ai[i];
1320d1fa9404SShri Abhyankar     nz = ai[i + 1] - ai[i];
1321d1fa9404SShri Abhyankar     s1 = tmp[i];
1322d1fa9404SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1323d1fa9404SShri Abhyankar   }
1324d1fa9404SShri Abhyankar 
1325d1fa9404SShri Abhyankar   /* copy tmp into x according to permutation */
1326d1fa9404SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1327d1fa9404SShri Abhyankar 
13289566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13299566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1330b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
13319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13329566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
1333d1fa9404SShri Abhyankar 
13349566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1336d1fa9404SShri Abhyankar }
1337d1fa9404SShri Abhyankar 
MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)1338d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1339d71ae5a4SJacob Faibussowitsch {
1340416022c9SBarry Smith   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
13412235e667SBarry Smith   IS                 iscol = a->col, isrow = a->row;
134204fbf559SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
134304fbf559SBarry Smith   PetscInt           i, n = A->rmap->n, j;
134404fbf559SBarry Smith   PetscInt           nz;
134504fbf559SBarry Smith   PetscScalar       *x, *tmp, s1;
1346b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
134704fbf559SBarry Smith   const PetscScalar *b;
13486abc6512SBarry Smith 
13493a40ed3dSBarry Smith   PetscFunctionBegin;
13509566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1351b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
13529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
13539566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1354416022c9SBarry Smith   tmp = a->solve_work;
13556abc6512SBarry Smith 
13569371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
13579371c9d4SSatish Balay   r = rout;
13589371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
13599371c9d4SSatish Balay   c = cout;
13606abc6512SBarry Smith 
13616abc6512SBarry Smith   /* copy the b into temp work space according to permutation */
13622235e667SBarry Smith   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
13636abc6512SBarry Smith 
13646abc6512SBarry Smith   /* forward solve the U^T */
13656abc6512SBarry Smith   for (i = 0; i < n; i++) {
1366010a20caSHong Zhang     v  = aa + diag[i];
1367010a20caSHong Zhang     vi = aj + diag[i] + 1;
1368f1af5d2fSBarry Smith     nz = ai[i + 1] - diag[i] - 1;
136904fbf559SBarry Smith     s1 = tmp[i];
137004fbf559SBarry Smith     s1 *= (*v++); /* multiply by inverse of diagonal entry */
137104fbf559SBarry Smith     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
137204fbf559SBarry Smith     tmp[i] = s1;
13736abc6512SBarry Smith   }
13746abc6512SBarry Smith 
13756abc6512SBarry Smith   /* backward solve the L^T */
13766abc6512SBarry Smith   for (i = n - 1; i >= 0; i--) {
1377010a20caSHong Zhang     v  = aa + diag[i] - 1;
1378010a20caSHong Zhang     vi = aj + diag[i] - 1;
1379f1af5d2fSBarry Smith     nz = diag[i] - ai[i];
138004fbf559SBarry Smith     s1 = tmp[i];
138104fbf559SBarry Smith     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
13826abc6512SBarry Smith   }
13836abc6512SBarry Smith 
13846abc6512SBarry Smith   /* copy tmp into x according to permutation */
13856abc6512SBarry Smith   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
13866abc6512SBarry Smith 
13879566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
13889566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1389b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
13909566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
13919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
13926abc6512SBarry Smith 
13939566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
13943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1395da3a660dSBarry Smith }
139604fbf559SBarry Smith 
MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)1397d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1398d71ae5a4SJacob Faibussowitsch {
1399533e6011SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1400533e6011SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
1401421480d9SBarry Smith   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1402533e6011SShri Abhyankar   PetscInt           i, n = A->rmap->n, j;
1403533e6011SShri Abhyankar   PetscInt           nz;
1404533e6011SShri Abhyankar   PetscScalar       *x, *tmp, s1;
1405b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
1406533e6011SShri Abhyankar   const PetscScalar *b;
1407533e6011SShri Abhyankar 
1408533e6011SShri Abhyankar   PetscFunctionBegin;
14099566063dSJacob Faibussowitsch   if (zz != xx) PetscCall(VecCopy(zz, xx));
1410421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1411b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
14129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
14139566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
1414533e6011SShri Abhyankar   tmp = a->solve_work;
1415533e6011SShri Abhyankar 
14169371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
14179371c9d4SSatish Balay   r = rout;
14189371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
14199371c9d4SSatish Balay   c = cout;
1420533e6011SShri Abhyankar 
1421533e6011SShri Abhyankar   /* copy the b into temp work space according to permutation */
1422533e6011SShri Abhyankar   for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1423533e6011SShri Abhyankar 
1424533e6011SShri Abhyankar   /* forward solve the U^T */
1425533e6011SShri Abhyankar   for (i = 0; i < n; i++) {
1426533e6011SShri Abhyankar     v  = aa + adiag[i + 1] + 1;
1427533e6011SShri Abhyankar     vi = aj + adiag[i + 1] + 1;
1428533e6011SShri Abhyankar     nz = adiag[i] - adiag[i + 1] - 1;
1429533e6011SShri Abhyankar     s1 = tmp[i];
1430533e6011SShri Abhyankar     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431533e6011SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1432533e6011SShri Abhyankar     tmp[i] = s1;
1433533e6011SShri Abhyankar   }
1434533e6011SShri Abhyankar 
1435533e6011SShri Abhyankar   /* backward solve the L^T */
1436533e6011SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1437533e6011SShri Abhyankar     v  = aa + ai[i];
1438533e6011SShri Abhyankar     vi = aj + ai[i];
1439533e6011SShri Abhyankar     nz = ai[i + 1] - ai[i];
1440533e6011SShri Abhyankar     s1 = tmp[i];
1441c5e3b2a3SShri Abhyankar     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442533e6011SShri Abhyankar   }
1443533e6011SShri Abhyankar 
1444533e6011SShri Abhyankar   /* copy tmp into x according to permutation */
1445533e6011SShri Abhyankar   for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1446533e6011SShri Abhyankar 
14479566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
14489566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
1449b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
14509566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
14519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
1452533e6011SShri Abhyankar 
14539566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
14543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1455533e6011SShri Abhyankar }
1456533e6011SShri Abhyankar 
14578fc3a347SHong Zhang /*
14588a76ed4fSHong Zhang    ilu() under revised new data structure.
1459813a1f2bSShri Abhyankar    Factored arrays bj and ba are stored as
1460813a1f2bSShri Abhyankar      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1461813a1f2bSShri Abhyankar 
1462813a1f2bSShri Abhyankar    bi=fact->i is an array of size n+1, in which
1463baabb860SHong Zhang      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1464baabb860SHong Zhang      bi[n]:  points to L(n-1,n-1)+1
1465baabb860SHong Zhang 
1466813a1f2bSShri Abhyankar   bdiag=fact->diag is an array of size n+1,in which
1467baabb860SHong Zhang      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1468a24f213cSHong Zhang      bdiag[n]: points to entry of U(n-1,0)-1
1469813a1f2bSShri Abhyankar 
1470813a1f2bSShri Abhyankar    U(i,:) contains bdiag[i] as its last entry, i.e.,
1471813a1f2bSShri Abhyankar     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1472813a1f2bSShri Abhyankar */
MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1473d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1474d71ae5a4SJacob Faibussowitsch {
1475813a1f2bSShri Abhyankar   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1476421480d9SBarry Smith   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1477bbd65245SShri Abhyankar   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
14781df811f5SHong Zhang   IS             isicol;
1479813a1f2bSShri Abhyankar 
1480813a1f2bSShri Abhyankar   PetscFunctionBegin;
14819566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1482421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
14839566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
148457508eceSPierre Jolivet   b = (Mat_SeqAIJ *)fact->data;
1485813a1f2bSShri Abhyankar 
1486813a1f2bSShri Abhyankar   /* allocate matrix arrays for new data structure */
148784648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
148884648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
14899f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
14909f0612e4SBarry Smith   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
14919f0612e4SBarry Smith   b->free_a  = PETSC_TRUE;
14929f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
14932205254eSKarl Rupp 
1494aa624791SPierre Jolivet   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1495813a1f2bSShri Abhyankar   bdiag = b->diag;
1496813a1f2bSShri Abhyankar 
1497813a1f2bSShri Abhyankar   /* set bi and bj with new data structure */
1498813a1f2bSShri Abhyankar   bi = b->i;
1499813a1f2bSShri Abhyankar   bj = b->j;
1500813a1f2bSShri Abhyankar 
1501813a1f2bSShri Abhyankar   /* L part */
1502e60cf9a0SBarry Smith   bi[0] = 0;
1503813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1504813a1f2bSShri Abhyankar     nz        = adiag[i] - ai[i];
1505813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nz;
1506813a1f2bSShri Abhyankar     aj        = a->j + ai[i];
1507ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1508813a1f2bSShri Abhyankar   }
1509813a1f2bSShri Abhyankar 
1510813a1f2bSShri Abhyankar   /* U part */
151162fbd6afSShri Abhyankar   bdiag[n] = bi[n] - 1;
1512813a1f2bSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
1513813a1f2bSShri Abhyankar     nz = ai[i + 1] - adiag[i] - 1;
1514813a1f2bSShri Abhyankar     aj = a->j + adiag[i] + 1;
1515ad540459SPierre Jolivet     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1516813a1f2bSShri Abhyankar     /* diag[i] */
1517bbd65245SShri Abhyankar     bj[k++]  = i;
1518a24f213cSHong Zhang     bdiag[i] = bdiag[i + 1] + nz + 1;
1519813a1f2bSShri Abhyankar   }
15201df811f5SHong Zhang 
1521d5f3da31SBarry Smith   fact->factortype             = MAT_FACTOR_ILU;
15221df811f5SHong Zhang   fact->info.factor_mallocs    = 0;
15231df811f5SHong Zhang   fact->info.fill_ratio_given  = info->fill;
15241df811f5SHong Zhang   fact->info.fill_ratio_needed = 1.0;
152535233d99SShri Abhyankar   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
15269566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
15271df811f5SHong Zhang 
152857508eceSPierre Jolivet   b       = (Mat_SeqAIJ *)fact->data;
15291df811f5SHong Zhang   b->row  = isrow;
15301df811f5SHong Zhang   b->col  = iscol;
15311df811f5SHong Zhang   b->icol = isicol;
153284648c2dSPierre Jolivet   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
15339566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
15349566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
15353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1536813a1f2bSShri Abhyankar }
1537813a1f2bSShri Abhyankar 
MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo * info)1538d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1539d71ae5a4SJacob Faibussowitsch {
1540813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1541813a1f2bSShri Abhyankar   IS                 isicol;
1542813a1f2bSShri Abhyankar   const PetscInt    *r, *ic;
15431df811f5SHong Zhang   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1544813a1f2bSShri Abhyankar   PetscInt          *bi, *cols, nnz, *cols_lvl;
1545813a1f2bSShri Abhyankar   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1546813a1f2bSShri Abhyankar   PetscInt           i, levels, diagonal_fill;
1547421480d9SBarry Smith   PetscBool          col_identity, row_identity;
1548813a1f2bSShri Abhyankar   PetscReal          f;
15490298fd71SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1550813a1f2bSShri Abhyankar   PetscBT            lnkbt;
1551813a1f2bSShri Abhyankar   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
15520298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
15530298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1554421480d9SBarry Smith   PetscBool          diagDense;
1555813a1f2bSShri Abhyankar 
1556813a1f2bSShri Abhyankar   PetscFunctionBegin;
155708401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1558421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1559421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1560ad04f41aSHong Zhang 
1561813a1f2bSShri Abhyankar   levels = (PetscInt)info->levels;
15629566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
15639566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
1564813a1f2bSShri Abhyankar   if (!levels && row_identity && col_identity) {
1565813a1f2bSShri Abhyankar     /* special case: ilu(0) with natural ordering */
15669566063dSJacob Faibussowitsch     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
15674d12350bSJunchao Zhang     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
15683ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
1569813a1f2bSShri Abhyankar   }
1570813a1f2bSShri Abhyankar 
15719566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
15729566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
15739566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
1574813a1f2bSShri Abhyankar 
15751bfa06eaSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
15769f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
15779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
1578e60cf9a0SBarry Smith   bi[0] = bdiag[0] = 0;
15799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1580813a1f2bSShri Abhyankar 
1581813a1f2bSShri Abhyankar   /* create a linked list for storing column indices of the active row */
1582813a1f2bSShri Abhyankar   nlnk = n + 1;
15839566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1584813a1f2bSShri Abhyankar 
1585813a1f2bSShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
15861df811f5SHong Zhang   f             = info->fill;
15871df811f5SHong Zhang   diagonal_fill = (PetscInt)info->diagonal_fill;
15889566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1589813a1f2bSShri Abhyankar   current_space = free_space;
15909566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1591813a1f2bSShri Abhyankar   current_space_lvl = free_space_lvl;
1592813a1f2bSShri Abhyankar   for (i = 0; i < n; i++) {
1593813a1f2bSShri Abhyankar     nzi = 0;
1594813a1f2bSShri Abhyankar     /* copy current row into linked list */
1595813a1f2bSShri Abhyankar     nnz = ai[r[i] + 1] - ai[r[i]];
159628b400f6SJacob Faibussowitsch     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1597813a1f2bSShri Abhyankar     cols   = aj + ai[r[i]];
1598813a1f2bSShri Abhyankar     lnk[i] = -1; /* marker to indicate if diagonal exists */
15999566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1600813a1f2bSShri Abhyankar     nzi += nlnk;
1601813a1f2bSShri Abhyankar 
1602813a1f2bSShri Abhyankar     /* make sure diagonal entry is included */
1603813a1f2bSShri Abhyankar     if (diagonal_fill && lnk[i] == -1) {
1604813a1f2bSShri Abhyankar       fm = n;
1605813a1f2bSShri Abhyankar       while (lnk[fm] < i) fm = lnk[fm];
1606813a1f2bSShri Abhyankar       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1607813a1f2bSShri Abhyankar       lnk[fm]    = i;
1608813a1f2bSShri Abhyankar       lnk_lvl[i] = 0;
16099371c9d4SSatish Balay       nzi++;
16109371c9d4SSatish Balay       dcount++;
1611813a1f2bSShri Abhyankar     }
1612813a1f2bSShri Abhyankar 
1613813a1f2bSShri Abhyankar     /* add pivot rows into the active row */
1614813a1f2bSShri Abhyankar     nzbd = 0;
1615813a1f2bSShri Abhyankar     prow = lnk[n];
1616813a1f2bSShri Abhyankar     while (prow < i) {
1617813a1f2bSShri Abhyankar       nnz      = bdiag[prow];
1618813a1f2bSShri Abhyankar       cols     = bj_ptr[prow] + nnz + 1;
1619813a1f2bSShri Abhyankar       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1620813a1f2bSShri Abhyankar       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
16219566063dSJacob Faibussowitsch       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1622813a1f2bSShri Abhyankar       nzi += nlnk;
1623813a1f2bSShri Abhyankar       prow = lnk[prow];
1624813a1f2bSShri Abhyankar       nzbd++;
1625813a1f2bSShri Abhyankar     }
1626813a1f2bSShri Abhyankar     bdiag[i]  = nzbd;
1627813a1f2bSShri Abhyankar     bi[i + 1] = bi[i] + nzi;
1628813a1f2bSShri Abhyankar     /* if free space is not available, make more free space */
1629813a1f2bSShri Abhyankar     if (current_space->local_remaining < nzi) {
1630f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
16319566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
16329566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1633813a1f2bSShri Abhyankar       reallocs++;
1634813a1f2bSShri Abhyankar     }
1635813a1f2bSShri Abhyankar 
1636813a1f2bSShri Abhyankar     /* copy data into free_space and free_space_lvl, then initialize lnk */
16379566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1638813a1f2bSShri Abhyankar     bj_ptr[i]    = current_space->array;
1639813a1f2bSShri Abhyankar     bjlvl_ptr[i] = current_space_lvl->array;
1640813a1f2bSShri Abhyankar 
1641813a1f2bSShri Abhyankar     /* make sure the active row i has diagonal entry */
164200045ab3SPierre Jolivet     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1643813a1f2bSShri Abhyankar 
1644813a1f2bSShri Abhyankar     current_space->array += nzi;
1645813a1f2bSShri Abhyankar     current_space->local_used += nzi;
1646813a1f2bSShri Abhyankar     current_space->local_remaining -= nzi;
1647813a1f2bSShri Abhyankar     current_space_lvl->array += nzi;
1648813a1f2bSShri Abhyankar     current_space_lvl->local_used += nzi;
1649813a1f2bSShri Abhyankar     current_space_lvl->local_remaining -= nzi;
1650813a1f2bSShri Abhyankar   }
1651813a1f2bSShri Abhyankar 
16529566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
16539566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
1654813a1f2bSShri Abhyankar   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
165584648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
16569566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1657813a1f2bSShri Abhyankar 
16589566063dSJacob Faibussowitsch   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
16599566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
16609566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1661813a1f2bSShri Abhyankar 
1662813a1f2bSShri Abhyankar #if defined(PETSC_USE_INFO)
1663813a1f2bSShri Abhyankar   {
1664aef85c9fSShri Abhyankar     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
16659566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
16669566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
16679566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
16689566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
166948a46eb9SPierre Jolivet     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1670813a1f2bSShri Abhyankar   }
1671813a1f2bSShri Abhyankar #endif
1672813a1f2bSShri Abhyankar   /* put together the new matrix */
16739566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
167457508eceSPierre Jolivet   b          = (Mat_SeqAIJ *)fact->data;
1675813a1f2bSShri Abhyankar   b->free_ij = PETSC_TRUE;
16769f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
16779f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
1678813a1f2bSShri Abhyankar   b->j      = bj;
1679813a1f2bSShri Abhyankar   b->i      = bi;
1680813a1f2bSShri Abhyankar   b->diag   = bdiag;
1681f4259b30SLisandro Dalcin   b->ilen   = NULL;
1682f4259b30SLisandro Dalcin   b->imax   = NULL;
1683813a1f2bSShri Abhyankar   b->row    = isrow;
1684813a1f2bSShri Abhyankar   b->col    = iscol;
16859566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
16869566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
1687813a1f2bSShri Abhyankar   b->icol = isicol;
16882205254eSKarl Rupp 
168984648c2dSPierre Jolivet   PetscCall(PetscMalloc1(n, &b->solve_work));
1690813a1f2bSShri Abhyankar   /* In b structure:  Free imax, ilen, old a, old j.
1691813a1f2bSShri Abhyankar      Allocate bdiag, solve_work, new a, new j */
1692f268cba8SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
16932205254eSKarl Rupp 
169457508eceSPierre Jolivet   fact->info.factor_mallocs    = reallocs;
169557508eceSPierre Jolivet   fact->info.fill_ratio_given  = f;
169657508eceSPierre Jolivet   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
169757508eceSPierre Jolivet   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
16984d12350bSJunchao Zhang   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
16999566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
17003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1701813a1f2bSShri Abhyankar }
1702813a1f2bSShri Abhyankar 
MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo * info)1703d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1704d71ae5a4SJacob Faibussowitsch {
17055f44c854SHong Zhang   Mat              C  = B;
17065f44c854SHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
17075f44c854SHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
17085f44c854SHong Zhang   IS               ip = b->row, iip = b->icol;
17095f44c854SHong Zhang   const PetscInt  *rip, *riip;
17105f44c854SHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
17115f44c854SHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
17125f44c854SHong Zhang   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1713b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1714ace3abfcSBarry Smith   PetscBool        perm_identity;
1715d90ac83dSHong Zhang   FactorShiftCtx   sctx;
171698a93161SHong Zhang   PetscReal        rs;
1717b65878eeSJunchao Zhang   const MatScalar *aa, *v;
1718b65878eeSJunchao Zhang   MatScalar        d;
1719421480d9SBarry Smith   const PetscInt  *adiag;
172098a93161SHong Zhang 
17215f44c854SHong Zhang   PetscFunctionBegin;
1722421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1723b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
172498a93161SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
17259566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
172698a93161SHong Zhang 
1727f4db908eSBarry Smith   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
172898a93161SHong Zhang     sctx.shift_top = info->zeropivot;
172998a93161SHong Zhang     for (i = 0; i < mbs; i++) {
173098a93161SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1731421480d9SBarry Smith       d  = aa[adiag[i]];
173298a93161SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
173398a93161SHong Zhang       v  = aa + ai[i];
173498a93161SHong Zhang       nz = ai[i + 1] - ai[i];
17352205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
173698a93161SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
173798a93161SHong Zhang     }
173898a93161SHong Zhang     sctx.shift_top *= 1.1;
173998a93161SHong Zhang     sctx.nshift_max = 5;
174098a93161SHong Zhang     sctx.shift_lo   = 0.;
174198a93161SHong Zhang     sctx.shift_hi   = 1.;
174298a93161SHong Zhang   }
17435f44c854SHong Zhang 
17449566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
17459566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
17465f44c854SHong Zhang 
17475f44c854SHong Zhang   /* allocate working arrays
17485f44c854SHong Zhang      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
17495f44c854SHong Zhang      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
17505f44c854SHong Zhang   */
17519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
17525f44c854SHong Zhang 
175398a93161SHong Zhang   do {
175407b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
175598a93161SHong Zhang 
1756c5546cabSHong Zhang     for (i = 0; i < mbs; i++) c2r[i] = mbs;
17572e987568SSatish Balay     if (mbs) il[0] = 0;
17585f44c854SHong Zhang 
17595f44c854SHong Zhang     for (k = 0; k < mbs; k++) {
17605f44c854SHong Zhang       /* zero rtmp */
17615f44c854SHong Zhang       nz    = bi[k + 1] - bi[k];
17625f44c854SHong Zhang       bjtmp = bj + bi[k];
17635f44c854SHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
17645f44c854SHong Zhang 
17655f44c854SHong Zhang       /* load in initial unfactored row */
17665f44c854SHong Zhang       bval = ba + bi[k];
17679371c9d4SSatish Balay       jmin = ai[rip[k]];
17689371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
17695f44c854SHong Zhang       for (j = jmin; j < jmax; j++) {
17705f44c854SHong Zhang         col = riip[aj[j]];
17715f44c854SHong Zhang         if (col >= k) { /* only take upper triangular entry */
17725f44c854SHong Zhang           rtmp[col] = aa[j];
17735f44c854SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
17745f44c854SHong Zhang         }
17755f44c854SHong Zhang       }
177698a93161SHong Zhang       /* shift the diagonal of the matrix: ZeropivotApply() */
177798a93161SHong Zhang       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
17785f44c854SHong Zhang 
17795f44c854SHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
17805f44c854SHong Zhang       dk = rtmp[k];
17815f44c854SHong Zhang       i  = c2r[k]; /* first row to be added to k_th row  */
17825f44c854SHong Zhang 
17835f44c854SHong Zhang       while (i < k) {
17845f44c854SHong Zhang         nexti = c2r[i]; /* next row to be added to k_th row */
17855f44c854SHong Zhang 
17865f44c854SHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
17875f44c854SHong Zhang         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
17885f44c854SHong Zhang         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
17895f44c854SHong Zhang         dk += uikdi * ba[ili];           /* update diag[k] */
17905f44c854SHong Zhang         ba[ili] = uikdi;                 /* -U(i,k) */
17915f44c854SHong Zhang 
17925f44c854SHong Zhang         /* add multiple of row i to k-th row */
17939371c9d4SSatish Balay         jmin = ili + 1;
17949371c9d4SSatish Balay         jmax = bi[i + 1];
17955f44c854SHong Zhang         if (jmin < jmax) {
17965f44c854SHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
17975f44c854SHong Zhang           /* update il and c2r for row i */
17985f44c854SHong Zhang           il[i]  = jmin;
17999371c9d4SSatish Balay           j      = bj[jmin];
18009371c9d4SSatish Balay           c2r[i] = c2r[j];
18019371c9d4SSatish Balay           c2r[j] = i;
18025f44c854SHong Zhang         }
18035f44c854SHong Zhang         i = nexti;
18045f44c854SHong Zhang       }
18055f44c854SHong Zhang 
18065f44c854SHong Zhang       /* copy data into U(k,:) */
180798a93161SHong Zhang       rs   = 0.0;
18089371c9d4SSatish Balay       jmin = bi[k];
18099371c9d4SSatish Balay       jmax = bi[k + 1] - 1;
18105f44c854SHong Zhang       if (jmin < jmax) {
18115f44c854SHong Zhang         for (j = jmin; j < jmax; j++) {
18129371c9d4SSatish Balay           col   = bj[j];
18139371c9d4SSatish Balay           ba[j] = rtmp[col];
18149371c9d4SSatish Balay           rs += PetscAbsScalar(ba[j]);
18155f44c854SHong Zhang         }
18165f44c854SHong Zhang         /* add the k-th row into il and c2r */
18175f44c854SHong Zhang         il[k]  = jmin;
18189371c9d4SSatish Balay         i      = bj[jmin];
18199371c9d4SSatish Balay         c2r[k] = c2r[i];
18209371c9d4SSatish Balay         c2r[i] = k;
18215f44c854SHong Zhang       }
182298a93161SHong Zhang 
182398a93161SHong Zhang       /* MatPivotCheck() */
182498a93161SHong Zhang       sctx.rs = rs;
182598a93161SHong Zhang       sctx.pv = dk;
18269566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
182707b50cabSHong Zhang       if (sctx.newshift) break;
182898a93161SHong Zhang       dk = sctx.pv;
182998a93161SHong Zhang 
183098a93161SHong Zhang       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
183198a93161SHong Zhang     }
183207b50cabSHong Zhang   } while (sctx.newshift);
18335f44c854SHong Zhang 
1834b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
18359566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, c2r));
18369566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
18379566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
18385f44c854SHong Zhang 
18399566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
18405f44c854SHong Zhang   if (perm_identity) {
184135233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
184235233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
18432169352eSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
18442169352eSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
18455f44c854SHong Zhang   } else {
184635233d99SShri Abhyankar     B->ops->solve          = MatSolve_SeqSBAIJ_1;
184735233d99SShri Abhyankar     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
184835233d99SShri Abhyankar     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
184935233d99SShri Abhyankar     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
18505f44c854SHong Zhang   }
18515f44c854SHong Zhang 
18525f44c854SHong Zhang   C->assembled    = PETSC_TRUE;
18535f44c854SHong Zhang   C->preallocated = PETSC_TRUE;
18542205254eSKarl Rupp 
18559566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
185698a93161SHong Zhang 
185798a93161SHong Zhang   /* MatPivotView() */
185898a93161SHong Zhang   if (sctx.nshift) {
1859f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
18609566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1861f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
18629566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1863f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
18649566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
186598a93161SHong Zhang     }
186698a93161SHong Zhang   }
18673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
18685f44c854SHong Zhang }
18695f44c854SHong Zhang 
MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo * info)1870d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1871d71ae5a4SJacob Faibussowitsch {
1872719d5645SBarry Smith   Mat              C  = B;
18730968510aSHong Zhang   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
18742ed133dbSHong Zhang   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
18759bfd6278SHong Zhang   IS               ip = b->row, iip = b->icol;
18765d0c19d7SBarry Smith   const PetscInt  *rip, *riip;
1877c5546cabSHong Zhang   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
18782ed133dbSHong Zhang   PetscInt        *ai = a->i, *aj = a->j;
18792ed133dbSHong Zhang   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1880b65878eeSJunchao Zhang   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1881b65878eeSJunchao Zhang   const MatScalar *aa, *v;
1882ace3abfcSBarry Smith   PetscBool        perm_identity;
18830e95ead3SHong Zhang   FactorShiftCtx   sctx;
18840e95ead3SHong Zhang   PetscReal        rs;
1885b65878eeSJunchao Zhang   MatScalar        d;
1886421480d9SBarry Smith   const PetscInt  *adiag;
1887d5d45c9bSBarry Smith 
1888a6175056SHong Zhang   PetscFunctionBegin;
1889421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1890b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
18910e95ead3SHong Zhang   /* MatPivotSetUp(): initialize shift context sctx */
18929566063dSJacob Faibussowitsch   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
18930e95ead3SHong Zhang 
18940e95ead3SHong Zhang   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
18950e95ead3SHong Zhang     sctx.shift_top = info->zeropivot;
18960e95ead3SHong Zhang     for (i = 0; i < mbs; i++) {
18970e95ead3SHong Zhang       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1898421480d9SBarry Smith       d  = aa[adiag[i]];
18990e95ead3SHong Zhang       rs = -PetscAbsScalar(d) - PetscRealPart(d);
19000e95ead3SHong Zhang       v  = aa + ai[i];
19010e95ead3SHong Zhang       nz = ai[i + 1] - ai[i];
19022205254eSKarl Rupp       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
19030e95ead3SHong Zhang       if (rs > sctx.shift_top) sctx.shift_top = rs;
19040e95ead3SHong Zhang     }
19050e95ead3SHong Zhang     sctx.shift_top *= 1.1;
19060e95ead3SHong Zhang     sctx.nshift_max = 5;
19070e95ead3SHong Zhang     sctx.shift_lo   = 0.;
19080e95ead3SHong Zhang     sctx.shift_hi   = 1.;
19090e95ead3SHong Zhang   }
1910ee45ca4aSHong Zhang 
19119566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(ip, &rip));
19129566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iip, &riip));
19132ed133dbSHong Zhang 
19142ed133dbSHong Zhang   /* initialization */
19159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
19160e95ead3SHong Zhang 
19172ed133dbSHong Zhang   do {
191807b50cabSHong Zhang     sctx.newshift = PETSC_FALSE;
19190e95ead3SHong Zhang 
1920c5546cabSHong Zhang     for (i = 0; i < mbs; i++) jl[i] = mbs;
1921c5546cabSHong Zhang     il[0] = 0;
19222ed133dbSHong Zhang 
19232ed133dbSHong Zhang     for (k = 0; k < mbs; k++) {
1924c5546cabSHong Zhang       /* zero rtmp */
1925c5546cabSHong Zhang       nz    = bi[k + 1] - bi[k];
1926c5546cabSHong Zhang       bjtmp = bj + bi[k];
1927c5546cabSHong Zhang       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1928c5546cabSHong Zhang 
1929c05c3958SHong Zhang       bval = ba + bi[k];
19302ed133dbSHong Zhang       /* initialize k-th row by the perm[k]-th row of A */
19319371c9d4SSatish Balay       jmin = ai[rip[k]];
19329371c9d4SSatish Balay       jmax = ai[rip[k] + 1];
19332ed133dbSHong Zhang       for (j = jmin; j < jmax; j++) {
19349bfd6278SHong Zhang         col = riip[aj[j]];
19352ed133dbSHong Zhang         if (col >= k) { /* only take upper triangular entry */
19362ed133dbSHong Zhang           rtmp[col] = aa[j];
1937c05c3958SHong Zhang           *bval++   = 0.0; /* for in-place factorization */
19382ed133dbSHong Zhang         }
19392ed133dbSHong Zhang       }
194039e3d630SHong Zhang       /* shift the diagonal of the matrix */
1941540703acSHong Zhang       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
19422ed133dbSHong Zhang 
19432ed133dbSHong Zhang       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
19442ed133dbSHong Zhang       dk = rtmp[k];
19452ed133dbSHong Zhang       i  = jl[k]; /* first row to be added to k_th row  */
19462ed133dbSHong Zhang 
19472ed133dbSHong Zhang       while (i < k) {
19482ed133dbSHong Zhang         nexti = jl[i]; /* next row to be added to k_th row */
19492ed133dbSHong Zhang 
19502ed133dbSHong Zhang         /* compute multiplier, update diag(k) and U(i,k) */
19512ed133dbSHong Zhang         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
19522ed133dbSHong Zhang         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
19532ed133dbSHong Zhang         dk += uikdi * ba[ili];
19542ed133dbSHong Zhang         ba[ili] = uikdi; /* -U(i,k) */
19552ed133dbSHong Zhang 
19562ed133dbSHong Zhang         /* add multiple of row i to k-th row */
19579371c9d4SSatish Balay         jmin = ili + 1;
19589371c9d4SSatish Balay         jmax = bi[i + 1];
19592ed133dbSHong Zhang         if (jmin < jmax) {
19602ed133dbSHong Zhang           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
19612ed133dbSHong Zhang           /* update il and jl for row i */
19622ed133dbSHong Zhang           il[i] = jmin;
19639371c9d4SSatish Balay           j     = bj[jmin];
19649371c9d4SSatish Balay           jl[i] = jl[j];
19659371c9d4SSatish Balay           jl[j] = i;
19662ed133dbSHong Zhang         }
19672ed133dbSHong Zhang         i = nexti;
19682ed133dbSHong Zhang       }
19692ed133dbSHong Zhang 
19700697b6caSHong Zhang       /* shift the diagonals when zero pivot is detected */
19710697b6caSHong Zhang       /* compute rs=sum of abs(off-diagonal) */
19720697b6caSHong Zhang       rs   = 0.0;
19733c9e8598SHong Zhang       jmin = bi[k] + 1;
19743c9e8598SHong Zhang       nz   = bi[k + 1] - jmin;
19753c9e8598SHong Zhang       bcol = bj + jmin;
1976ad540459SPierre Jolivet       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1977540703acSHong Zhang 
1978540703acSHong Zhang       sctx.rs = rs;
1979540703acSHong Zhang       sctx.pv = dk;
19809566063dSJacob Faibussowitsch       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
198107b50cabSHong Zhang       if (sctx.newshift) break;
19820e95ead3SHong Zhang       dk = sctx.pv;
19833c9e8598SHong Zhang 
19843c9e8598SHong Zhang       /* copy data into U(k,:) */
198539e3d630SHong Zhang       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
19869371c9d4SSatish Balay       jmin      = bi[k] + 1;
19879371c9d4SSatish Balay       jmax      = bi[k + 1];
198839e3d630SHong Zhang       if (jmin < jmax) {
198939e3d630SHong Zhang         for (j = jmin; j < jmax; j++) {
19909371c9d4SSatish Balay           col   = bj[j];
19919371c9d4SSatish Balay           ba[j] = rtmp[col];
19923c9e8598SHong Zhang         }
199339e3d630SHong Zhang         /* add the k-th row into il and jl */
19943c9e8598SHong Zhang         il[k] = jmin;
19959371c9d4SSatish Balay         i     = bj[jmin];
19969371c9d4SSatish Balay         jl[k] = jl[i];
19979371c9d4SSatish Balay         jl[i] = k;
19983c9e8598SHong Zhang       }
19993c9e8598SHong Zhang     }
200007b50cabSHong Zhang   } while (sctx.newshift);
20010e95ead3SHong Zhang 
20029566063dSJacob Faibussowitsch   PetscCall(PetscFree3(rtmp, il, jl));
20039566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(ip, &rip));
20049566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iip, &riip));
2005b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2006db4efbfdSBarry Smith 
20079566063dSJacob Faibussowitsch   PetscCall(ISIdentity(ip, &perm_identity));
2008db4efbfdSBarry Smith   if (perm_identity) {
20090a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20100a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20110a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
20120a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013db4efbfdSBarry Smith   } else {
20140a3c351aSHong Zhang     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
20150a3c351aSHong Zhang     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
20160a3c351aSHong Zhang     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
20170a3c351aSHong Zhang     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2018db4efbfdSBarry Smith   }
2019db4efbfdSBarry Smith 
20203c9e8598SHong Zhang   C->assembled    = PETSC_TRUE;
20213c9e8598SHong Zhang   C->preallocated = PETSC_TRUE;
20222205254eSKarl Rupp 
20239566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(C->rmap->n));
2024540703acSHong Zhang   if (sctx.nshift) {
2025f4db908eSBarry Smith     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
20269566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2027f4db908eSBarry Smith     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
20289566063dSJacob Faibussowitsch       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
20290697b6caSHong Zhang     }
20303c9e8598SHong Zhang   }
20313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
20323c9e8598SHong Zhang }
2033a6175056SHong Zhang 
20348a76ed4fSHong Zhang /*
20358a76ed4fSHong Zhang    icc() under revised new data structure.
20368a76ed4fSHong Zhang    Factored arrays bj and ba are stored as
20378a76ed4fSHong Zhang      U(0,:),...,U(i,:),U(n-1,:)
20388a76ed4fSHong Zhang 
20398a76ed4fSHong Zhang    ui=fact->i is an array of size n+1, in which
20408a76ed4fSHong Zhang    ui+
20418a76ed4fSHong Zhang      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
20428a76ed4fSHong Zhang      ui[n]:  points to U(n-1,n-1)+1
20438a76ed4fSHong Zhang 
20448a76ed4fSHong Zhang   udiag=fact->diag is an array of size n,in which
20458a76ed4fSHong Zhang      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
20468a76ed4fSHong Zhang 
20478a76ed4fSHong Zhang    U(i,:) contains udiag[i] as its last entry, i.e.,
20488a76ed4fSHong Zhang     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
20498a76ed4fSHong Zhang */
20508a76ed4fSHong Zhang 
MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2051d71ae5a4SJacob Faibussowitsch PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2052d71ae5a4SJacob Faibussowitsch {
20530968510aSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2054ed59401aSHong Zhang   Mat_SeqSBAIJ      *b;
2055421480d9SBarry Smith   PetscBool          perm_identity;
20565f44c854SHong Zhang   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2057421480d9SBarry Smith   const PetscInt    *rip, *riip, *adiag;
2058ed59401aSHong Zhang   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2059421480d9SBarry Smith   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
20605a8e39fbSHong Zhang   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2061ed59401aSHong Zhang   PetscReal          fill = info->fill, levels = info->levels;
20620298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
20630298fd71SBarry Smith   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
20647a48dd6fSHong Zhang   PetscBT            lnkbt;
2065b635d36bSHong Zhang   IS                 iperm;
2066421480d9SBarry Smith   PetscBool          diagDense;
2067a6175056SHong Zhang 
2068a6175056SHong Zhang   PetscFunctionBegin;
206908401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2070421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2071421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
20729566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
20739566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
20747a48dd6fSHong Zhang 
20759f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
20769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
207739e3d630SHong Zhang   ui[0] = 0;
207839e3d630SHong Zhang 
20798a76ed4fSHong Zhang   /* ICC(0) without matrix ordering: simply rearrange column indices */
2080622e2028SHong Zhang   if (!levels && perm_identity) {
20815f44c854SHong Zhang     for (i = 0; i < am; i++) {
2082421480d9SBarry Smith       ncols     = ai[i + 1] - adiag[i];
2083c5546cabSHong Zhang       ui[i + 1] = ui[i] + ncols;
2084c5546cabSHong Zhang       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2085dc1e2a3fSHong Zhang     }
20869566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2087dc1e2a3fSHong Zhang     cols = uj;
2088dc1e2a3fSHong Zhang     for (i = 0; i < am; i++) {
2089421480d9SBarry Smith       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2090421480d9SBarry Smith       ncols = ai[i + 1] - adiag[i] - 1;
20915f44c854SHong Zhang       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2092910cf402Sprj-       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
20935f44c854SHong Zhang     }
20945f44c854SHong Zhang   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
20959566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(iperm, &riip));
20969566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(perm, &rip));
20975f44c854SHong Zhang 
20985f44c854SHong Zhang     /* initialization */
20999566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(am + 1, &ajtmp));
21005f44c854SHong Zhang 
21015f44c854SHong Zhang     /* jl: linked list for storing indices of the pivot rows
21025f44c854SHong Zhang        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
21039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
21045f44c854SHong Zhang     for (i = 0; i < am; i++) {
21059371c9d4SSatish Balay       jl[i] = am;
21069371c9d4SSatish Balay       il[i] = 0;
21075f44c854SHong Zhang     }
21085f44c854SHong Zhang 
21095f44c854SHong Zhang     /* create and initialize a linked list for storing column indices of the active row k */
21105f44c854SHong Zhang     nlnk = am + 1;
21119566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
21125f44c854SHong Zhang 
211395b5ac22SHong Zhang     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
21149566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
21155f44c854SHong Zhang     current_space = free_space;
21169566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
21175f44c854SHong Zhang     current_space_lvl = free_space_lvl;
21185f44c854SHong Zhang 
21195f44c854SHong Zhang     for (k = 0; k < am; k++) { /* for each active row k */
21205f44c854SHong Zhang       /* initialize lnk by the column indices of row rip[k] of A */
21215f44c854SHong Zhang       nzk   = 0;
21225f44c854SHong Zhang       ncols = ai[rip[k] + 1] - ai[rip[k]];
212328b400f6SJacob Faibussowitsch       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
21245f44c854SHong Zhang       ncols_upper = 0;
21255f44c854SHong Zhang       for (j = 0; j < ncols; j++) {
21265f44c854SHong Zhang         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
21275f44c854SHong Zhang         if (riip[i] >= k) {         /* only take upper triangular entry */
21285f44c854SHong Zhang           ajtmp[ncols_upper] = i;
21295f44c854SHong Zhang           ncols_upper++;
21305f44c854SHong Zhang         }
21315f44c854SHong Zhang       }
21329566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
21335f44c854SHong Zhang       nzk += nlnk;
21345f44c854SHong Zhang 
21355f44c854SHong Zhang       /* update lnk by computing fill-in for each pivot row to be merged in */
21365f44c854SHong Zhang       prow = jl[k]; /* 1st pivot row */
21375f44c854SHong Zhang 
21385f44c854SHong Zhang       while (prow < k) {
21395f44c854SHong Zhang         nextprow = jl[prow];
21405f44c854SHong Zhang 
21415f44c854SHong Zhang         /* merge prow into k-th row */
21425f44c854SHong Zhang         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
21435f44c854SHong Zhang         jmax  = ui[prow + 1];
21445f44c854SHong Zhang         ncols = jmax - jmin;
21455f44c854SHong Zhang         i     = jmin - ui[prow];
21465f44c854SHong Zhang         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
21475f44c854SHong Zhang         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
21485f44c854SHong Zhang         j     = *(uj - 1);
21499566063dSJacob Faibussowitsch         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
21505f44c854SHong Zhang         nzk += nlnk;
21515f44c854SHong Zhang 
21525f44c854SHong Zhang         /* update il and jl for prow */
21535f44c854SHong Zhang         if (jmin < jmax) {
21545f44c854SHong Zhang           il[prow] = jmin;
21559371c9d4SSatish Balay           j        = *cols;
21569371c9d4SSatish Balay           jl[prow] = jl[j];
21579371c9d4SSatish Balay           jl[j]    = prow;
21585f44c854SHong Zhang         }
21595f44c854SHong Zhang         prow = nextprow;
21605f44c854SHong Zhang       }
21615f44c854SHong Zhang 
21625f44c854SHong Zhang       /* if free space is not available, make more free space */
21635f44c854SHong Zhang       if (current_space->local_remaining < nzk) {
21645f44c854SHong Zhang         i = am - k + 1;                                    /* num of unfactored rows */
2165f91af8c7SBarry Smith         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
21669566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space));
21679566063dSJacob Faibussowitsch         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
21685f44c854SHong Zhang         reallocs++;
21695f44c854SHong Zhang       }
21705f44c854SHong Zhang 
21715f44c854SHong Zhang       /* copy data into free_space and free_space_lvl, then initialize lnk */
217208401ef6SPierre Jolivet       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
21739566063dSJacob Faibussowitsch       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
21745f44c854SHong Zhang 
21755f44c854SHong Zhang       /* add the k-th row into il and jl */
21765f44c854SHong Zhang       if (nzk > 1) {
21775f44c854SHong Zhang         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
21789371c9d4SSatish Balay         jl[k] = jl[i];
21799371c9d4SSatish Balay         jl[i] = k;
21805f44c854SHong Zhang         il[k] = ui[k] + 1;
21815f44c854SHong Zhang       }
21825f44c854SHong Zhang       uj_ptr[k]     = current_space->array;
21835f44c854SHong Zhang       uj_lvl_ptr[k] = current_space_lvl->array;
21845f44c854SHong Zhang 
21855f44c854SHong Zhang       current_space->array += nzk;
21865f44c854SHong Zhang       current_space->local_used += nzk;
21875f44c854SHong Zhang       current_space->local_remaining -= nzk;
21885f44c854SHong Zhang 
21895f44c854SHong Zhang       current_space_lvl->array += nzk;
21905f44c854SHong Zhang       current_space_lvl->local_used += nzk;
21915f44c854SHong Zhang       current_space_lvl->local_remaining -= nzk;
21925f44c854SHong Zhang 
21935f44c854SHong Zhang       ui[k + 1] = ui[k] + nzk;
21945f44c854SHong Zhang     }
21955f44c854SHong Zhang 
21969566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(perm, &rip));
21979566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(iperm, &riip));
21989566063dSJacob Faibussowitsch     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
21999566063dSJacob Faibussowitsch     PetscCall(PetscFree(ajtmp));
22005f44c854SHong Zhang 
22019263d837SHong Zhang     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
22029f0612e4SBarry Smith     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
22039566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
22049566063dSJacob Faibussowitsch     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
22059566063dSJacob Faibussowitsch     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
22065f44c854SHong Zhang 
22075f44c854SHong Zhang   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
22085f44c854SHong Zhang 
22095f44c854SHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
221057508eceSPierre Jolivet   b          = (Mat_SeqSBAIJ *)fact->data;
22119f0612e4SBarry Smith   b->free_ij = PETSC_TRUE;
221284648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
22139f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
22145f44c854SHong Zhang   b->j      = uj;
22155f44c854SHong Zhang   b->i      = ui;
22165f44c854SHong Zhang   b->diag   = udiag;
2217f4259b30SLisandro Dalcin   b->ilen   = NULL;
2218f4259b30SLisandro Dalcin   b->imax   = NULL;
22195f44c854SHong Zhang   b->row    = perm;
22205f44c854SHong Zhang   b->col    = perm;
22219566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
22229566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
22235f44c854SHong Zhang   b->icol          = iperm;
22245f44c854SHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
22252205254eSKarl Rupp 
222684648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
22272205254eSKarl Rupp 
22285f44c854SHong Zhang   b->maxnz = b->nz = ui[am];
22295f44c854SHong Zhang 
2230f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2231f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
22325f44c854SHong Zhang   if (ai[am] != 0) {
22336a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
223495b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
22355f44c854SHong Zhang   } else {
2236f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
22375f44c854SHong Zhang   }
22389263d837SHong Zhang #if defined(PETSC_USE_INFO)
22399263d837SHong Zhang   if (ai[am] != 0) {
22409263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
22419566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
22429566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
22439566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
22449263d837SHong Zhang   } else {
22459566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
22469263d837SHong Zhang   }
22479263d837SHong Zhang #endif
224835233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
22493ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
22505f44c854SHong Zhang }
22515f44c854SHong Zhang 
MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo * info)2252d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2253d71ae5a4SJacob Faibussowitsch {
22540be760fbSHong Zhang   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
22550be760fbSHong Zhang   Mat_SeqSBAIJ      *b;
2256421480d9SBarry Smith   PetscBool          perm_identity;
22570be760fbSHong Zhang   PetscReal          fill = info->fill;
22580be760fbSHong Zhang   const PetscInt    *rip, *riip;
22590be760fbSHong Zhang   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
22600be760fbSHong Zhang   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
22610be760fbSHong Zhang   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
22620298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
22630be760fbSHong Zhang   PetscBT            lnkbt;
22640be760fbSHong Zhang   IS                 iperm;
2265421480d9SBarry Smith   PetscBool          diagDense;
22660be760fbSHong Zhang 
22670be760fbSHong Zhang   PetscFunctionBegin;
226808401ef6SPierre Jolivet   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2269421480d9SBarry Smith   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2270421480d9SBarry Smith   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
22719186b105SHong Zhang 
22720be760fbSHong Zhang   /* check whether perm is the identity mapping */
22739566063dSJacob Faibussowitsch   PetscCall(ISIdentity(perm, &perm_identity));
22749566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
22759566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iperm, &riip));
22769566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(perm, &rip));
22770be760fbSHong Zhang 
22780be760fbSHong Zhang   /* initialization */
22799f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
22809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(am + 1, &udiag));
22810be760fbSHong Zhang   ui[0] = 0;
22820be760fbSHong Zhang 
22830be760fbSHong Zhang   /* jl: linked list for storing indices of the pivot rows
22840be760fbSHong Zhang      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
22859566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
22860be760fbSHong Zhang   for (i = 0; i < am; i++) {
22879371c9d4SSatish Balay     jl[i] = am;
22889371c9d4SSatish Balay     il[i] = 0;
22890be760fbSHong Zhang   }
22900be760fbSHong Zhang 
22910be760fbSHong Zhang   /* create and initialize a linked list for storing column indices of the active row k */
22920be760fbSHong Zhang   nlnk = am + 1;
22939566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
22940be760fbSHong Zhang 
229595b5ac22SHong Zhang   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
22969566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
22970be760fbSHong Zhang   current_space = free_space;
22980be760fbSHong Zhang 
22990be760fbSHong Zhang   for (k = 0; k < am; k++) { /* for each active row k */
23000be760fbSHong Zhang     /* initialize lnk by the column indices of row rip[k] of A */
23010be760fbSHong Zhang     nzk   = 0;
23020be760fbSHong Zhang     ncols = ai[rip[k] + 1] - ai[rip[k]];
230328b400f6SJacob Faibussowitsch     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
23040be760fbSHong Zhang     ncols_upper = 0;
23050be760fbSHong Zhang     for (j = 0; j < ncols; j++) {
23060be760fbSHong Zhang       i = riip[*(aj + ai[rip[k]] + j)];
23070be760fbSHong Zhang       if (i >= k) { /* only take upper triangular entry */
23080be760fbSHong Zhang         cols[ncols_upper] = i;
23090be760fbSHong Zhang         ncols_upper++;
23100be760fbSHong Zhang       }
23110be760fbSHong Zhang     }
23129566063dSJacob Faibussowitsch     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
23130be760fbSHong Zhang     nzk += nlnk;
23140be760fbSHong Zhang 
23150be760fbSHong Zhang     /* update lnk by computing fill-in for each pivot row to be merged in */
23160be760fbSHong Zhang     prow = jl[k]; /* 1st pivot row */
23170be760fbSHong Zhang 
23180be760fbSHong Zhang     while (prow < k) {
23190be760fbSHong Zhang       nextprow = jl[prow];
23200be760fbSHong Zhang       /* merge prow into k-th row */
23210be760fbSHong Zhang       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
23220be760fbSHong Zhang       jmax   = ui[prow + 1];
23230be760fbSHong Zhang       ncols  = jmax - jmin;
23240be760fbSHong Zhang       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
23259566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
23260be760fbSHong Zhang       nzk += nlnk;
23270be760fbSHong Zhang 
23280be760fbSHong Zhang       /* update il and jl for prow */
23290be760fbSHong Zhang       if (jmin < jmax) {
23300be760fbSHong Zhang         il[prow] = jmin;
23312205254eSKarl Rupp         j        = *uj_ptr;
23322205254eSKarl Rupp         jl[prow] = jl[j];
23332205254eSKarl Rupp         jl[j]    = prow;
23340be760fbSHong Zhang       }
23350be760fbSHong Zhang       prow = nextprow;
23360be760fbSHong Zhang     }
23370be760fbSHong Zhang 
23380be760fbSHong Zhang     /* if free space is not available, make more free space */
23390be760fbSHong Zhang     if (current_space->local_remaining < nzk) {
23400be760fbSHong Zhang       i = am - k + 1;                                    /* num of unfactored rows */
2341f91af8c7SBarry Smith       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
23429566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(i, &current_space));
23430be760fbSHong Zhang       reallocs++;
23440be760fbSHong Zhang     }
23450be760fbSHong Zhang 
23460be760fbSHong Zhang     /* copy data into free space, then initialize lnk */
23479566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
23480be760fbSHong Zhang 
23490be760fbSHong Zhang     /* add the k-th row into il and jl */
23507b056e98SHong Zhang     if (nzk > 1) {
23510be760fbSHong Zhang       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
23529371c9d4SSatish Balay       jl[k] = jl[i];
23539371c9d4SSatish Balay       jl[i] = k;
23540be760fbSHong Zhang       il[k] = ui[k] + 1;
23550be760fbSHong Zhang     }
23560be760fbSHong Zhang     ui_ptr[k] = current_space->array;
23572205254eSKarl Rupp 
23580be760fbSHong Zhang     current_space->array += nzk;
23590be760fbSHong Zhang     current_space->local_used += nzk;
23600be760fbSHong Zhang     current_space->local_remaining -= nzk;
23610be760fbSHong Zhang 
23620be760fbSHong Zhang     ui[k + 1] = ui[k] + nzk;
23630be760fbSHong Zhang   }
23640be760fbSHong Zhang 
23659566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(perm, &rip));
23669566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iperm, &riip));
23679566063dSJacob Faibussowitsch   PetscCall(PetscFree4(ui_ptr, jl, il, cols));
23680be760fbSHong Zhang 
23699263d837SHong Zhang   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
237084648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
23719566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
23729566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
23730be760fbSHong Zhang 
23740be760fbSHong Zhang   /* put together the new matrix in MATSEQSBAIJ format */
2375f284f12aSHong Zhang   b          = (Mat_SeqSBAIJ *)fact->data;
23760be760fbSHong Zhang   b->free_ij = PETSC_TRUE;
237784648c2dSPierre Jolivet   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
23789f0612e4SBarry Smith   b->free_a = PETSC_TRUE;
23790be760fbSHong Zhang   b->j      = uj;
23800be760fbSHong Zhang   b->i      = ui;
23810be760fbSHong Zhang   b->diag   = udiag;
2382f4259b30SLisandro Dalcin   b->ilen   = NULL;
2383f4259b30SLisandro Dalcin   b->imax   = NULL;
23840be760fbSHong Zhang   b->row    = perm;
23850be760fbSHong Zhang   b->col    = perm;
238626fbe8dcSKarl Rupp 
23879566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
23889566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)perm));
23892205254eSKarl Rupp 
23900be760fbSHong Zhang   b->icol          = iperm;
23910be760fbSHong Zhang   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
239226fbe8dcSKarl Rupp 
239384648c2dSPierre Jolivet   PetscCall(PetscMalloc1(am, &b->solve_work));
23942205254eSKarl Rupp 
23950be760fbSHong Zhang   b->maxnz = b->nz = ui[am];
23960be760fbSHong Zhang 
2397f284f12aSHong Zhang   fact->info.factor_mallocs   = reallocs;
2398f284f12aSHong Zhang   fact->info.fill_ratio_given = fill;
23990be760fbSHong Zhang   if (ai[am] != 0) {
24006a69fef8SHong Zhang     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
240195b5ac22SHong Zhang     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
24020be760fbSHong Zhang   } else {
2403f284f12aSHong Zhang     fact->info.fill_ratio_needed = 0.0;
24040be760fbSHong Zhang   }
24059263d837SHong Zhang #if defined(PETSC_USE_INFO)
24069263d837SHong Zhang   if (ai[am] != 0) {
24079263d837SHong Zhang     PetscReal af = fact->info.fill_ratio_needed;
24089566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
24099566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
24109566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
24119263d837SHong Zhang   } else {
24129566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
24139263d837SHong Zhang   }
24149263d837SHong Zhang #endif
241535233d99SShri Abhyankar   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
24163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
24170be760fbSHong Zhang }
24180be760fbSHong Zhang 
MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)2419d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2420d71ae5a4SJacob Faibussowitsch {
2421813a1f2bSShri Abhyankar   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2422813a1f2bSShri Abhyankar   PetscInt           n  = A->rmap->n;
2423813a1f2bSShri Abhyankar   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2424813a1f2bSShri Abhyankar   PetscScalar       *x, sum;
2425813a1f2bSShri Abhyankar   const PetscScalar *b;
2426b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
2427813a1f2bSShri Abhyankar   PetscInt           i, nz;
2428813a1f2bSShri Abhyankar 
2429813a1f2bSShri Abhyankar   PetscFunctionBegin;
24303ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2431813a1f2bSShri Abhyankar 
2432b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
24339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
24349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
2435813a1f2bSShri Abhyankar 
2436813a1f2bSShri Abhyankar   /* forward solve the lower triangular */
2437813a1f2bSShri Abhyankar   x[0] = b[0];
2438813a1f2bSShri Abhyankar   v    = aa;
2439813a1f2bSShri Abhyankar   vi   = aj;
2440813a1f2bSShri Abhyankar   for (i = 1; i < n; i++) {
2441813a1f2bSShri Abhyankar     nz  = ai[i + 1] - ai[i];
2442813a1f2bSShri Abhyankar     sum = b[i];
2443813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2444813a1f2bSShri Abhyankar     v += nz;
2445813a1f2bSShri Abhyankar     vi += nz;
2446813a1f2bSShri Abhyankar     x[i] = sum;
2447813a1f2bSShri Abhyankar   }
2448813a1f2bSShri Abhyankar 
2449813a1f2bSShri Abhyankar   /* backward solve the upper triangular */
245062fbd6afSShri Abhyankar   for (i = n - 1; i >= 0; i--) {
24513c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
24523c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
2453813a1f2bSShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
2454813a1f2bSShri Abhyankar     sum = x[i];
2455813a1f2bSShri Abhyankar     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
24563c0119dfSShri Abhyankar     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2457813a1f2bSShri Abhyankar   }
2458813a1f2bSShri Abhyankar 
24599566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2460b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
24619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
24629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
24633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2464813a1f2bSShri Abhyankar }
2465813a1f2bSShri Abhyankar 
MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)2466d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2467d71ae5a4SJacob Faibussowitsch {
2468f268cba8SShri Abhyankar   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2469f268cba8SShri Abhyankar   IS                 iscol = a->col, isrow = a->row;
2470f268cba8SShri Abhyankar   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2471f268cba8SShri Abhyankar   const PetscInt    *rout, *cout, *r, *c;
2472f268cba8SShri Abhyankar   PetscScalar       *x, *tmp, sum;
2473f268cba8SShri Abhyankar   const PetscScalar *b;
2474b65878eeSJunchao Zhang   const MatScalar   *aa, *v;
2475f268cba8SShri Abhyankar 
2476f268cba8SShri Abhyankar   PetscFunctionBegin;
24773ba16761SJacob Faibussowitsch   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2478f268cba8SShri Abhyankar 
2479b65878eeSJunchao Zhang   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
24809566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
24819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(xx, &x));
2482f268cba8SShri Abhyankar   tmp = a->solve_work;
2483f268cba8SShri Abhyankar 
24849371c9d4SSatish Balay   PetscCall(ISGetIndices(isrow, &rout));
24859371c9d4SSatish Balay   r = rout;
24869371c9d4SSatish Balay   PetscCall(ISGetIndices(iscol, &cout));
24879371c9d4SSatish Balay   c = cout;
2488f268cba8SShri Abhyankar 
2489f268cba8SShri Abhyankar   /* forward solve the lower triangular */
2490f268cba8SShri Abhyankar   tmp[0] = b[r[0]];
2491f268cba8SShri Abhyankar   v      = aa;
2492f268cba8SShri Abhyankar   vi     = aj;
2493f268cba8SShri Abhyankar   for (i = 1; i < n; i++) {
2494f268cba8SShri Abhyankar     nz  = ai[i + 1] - ai[i];
2495f268cba8SShri Abhyankar     sum = b[r[i]];
2496f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2497f268cba8SShri Abhyankar     tmp[i] = sum;
24989371c9d4SSatish Balay     v += nz;
24999371c9d4SSatish Balay     vi += nz;
2500f268cba8SShri Abhyankar   }
2501f268cba8SShri Abhyankar 
2502f268cba8SShri Abhyankar   /* backward solve the upper triangular */
2503f268cba8SShri Abhyankar   for (i = n - 1; i >= 0; i--) {
25043c0119dfSShri Abhyankar     v   = aa + adiag[i + 1] + 1;
25053c0119dfSShri Abhyankar     vi  = aj + adiag[i + 1] + 1;
2506f268cba8SShri Abhyankar     nz  = adiag[i] - adiag[i + 1] - 1;
2507f268cba8SShri Abhyankar     sum = tmp[i];
2508f268cba8SShri Abhyankar     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2509f268cba8SShri Abhyankar     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2510f268cba8SShri Abhyankar   }
2511f268cba8SShri Abhyankar 
25129566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &rout));
25139566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol, &cout));
2514b65878eeSJunchao Zhang   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
25159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
25169566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(xx, &x));
25179566063dSJacob Faibussowitsch   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
25183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2519f268cba8SShri Abhyankar }
2520