xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
15   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
16   const PetscInt    *ai = a->i, *aj = a->j;
17   const PetscScalar *aa = a->a;
18   PetscBool         *done;
19   PetscReal         best,past = 0,future;
20 
21   PetscFunctionBegin;
22   /* pick initial row */
23   best = -1;
24   for (i=0; i<n; i++) {
25     future = 0.0;
26     for (j=ai[i]; j<ai[i+1]; j++) {
27       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28       else              past  = PetscAbsScalar(aa[j]);
29     }
30     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31     if (past/future > best) {
32       best    = past/future;
33       current = i;
34     }
35   }
36 
37   PetscCall(PetscMalloc1(n,&done));
38   PetscCall(PetscArrayzero(done,n));
39   PetscCall(PetscMalloc1(n,&order));
40   order[0] = current;
41   for (i=0; i<n-1; i++) {
42     done[current] = PETSC_TRUE;
43     best          = -1;
44     /* loop over all neighbors of current pivot */
45     for (j=ai[current]; j<ai[current+1]; j++) {
46       jj = aj[j];
47       if (done[jj]) continue;
48       /* loop over columns of potential next row computing weights for below and above diagonal */
49       past = future = 0.0;
50       for (k=ai[jj]; k<ai[jj+1]; k++) {
51         kk = aj[k];
52         if (done[kk]) past += PetscAbsScalar(aa[k]);
53         else if (kk != jj) future += PetscAbsScalar(aa[k]);
54       }
55       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56       if (past/future > best) {
57         best       = past/future;
58         newcurrent = jj;
59       }
60     }
61     if (best == -1) { /* no neighbors to select from so select best of all that remain */
62       best = -1;
63       for (k=0; k<n; k++) {
64         if (done[k]) continue;
65         future = 0.0;
66         past   = 0.0;
67         for (j=ai[k]; j<ai[k+1]; j++) {
68           kk = aj[j];
69           if (done[kk])       past += PetscAbsScalar(aa[j]);
70           else if (kk != k) future += PetscAbsScalar(aa[j]);
71         }
72         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73         if (past/future > best) {
74           best       = past/future;
75           newcurrent = k;
76         }
77       }
78     }
79     PetscCheck(current != newcurrent,PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
80     current    = newcurrent;
81     order[i+1] = current;
82   }
83   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow));
84   *icol = *irow;
85   PetscCall(PetscObjectReference((PetscObject)*irow));
86   PetscCall(PetscFree(done));
87   PetscCall(PetscFree(order));
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
92 {
93   PetscFunctionBegin;
94   *type = MATSOLVERPETSC;
95   PetscFunctionReturn(0);
96 }
97 
98 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
99 {
100   PetscInt       n = A->rmap->n;
101 
102   PetscFunctionBegin;
103 #if defined(PETSC_USE_COMPLEX)
104   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || (ftype != MAT_FACTOR_CHOLESKY && ftype != MAT_FACTOR_ICC),PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
105 #endif
106   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B));
107   PetscCall(MatSetSizes(*B,n,n,n,n));
108   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
109     PetscCall(MatSetType(*B,MATSEQAIJ));
110 
111     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
112     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
113 
114     PetscCall(MatSetBlockSizesFromMats(*B,A,A));
115     PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]));
116     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]));
117     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
118   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
119     PetscCall(MatSetType(*B,MATSEQSBAIJ));
120     PetscCall(MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL));
121 
122     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
123     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
124     PetscCall(PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
125     PetscCall(PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]));
126   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
127   (*B)->factortype = ftype;
128 
129   PetscCall(PetscFree((*B)->solvertype));
130   PetscCall(PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype));
131   (*B)->canuseordering = PETSC_TRUE;
132   PetscCall(PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc));
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
137 {
138   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
139   IS                 isicol;
140   const PetscInt     *r,*ic;
141   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
142   PetscInt           *bi,*bj,*ajtmp;
143   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
144   PetscReal          f;
145   PetscInt           nlnk,*lnk,k,**bi_ptr;
146   PetscFreeSpaceList free_space=NULL,current_space=NULL;
147   PetscBT            lnkbt;
148   PetscBool          missing;
149 
150   PetscFunctionBegin;
151   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
152   PetscCall(MatMissingDiagonal(A,&missing,&i));
153   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
154 
155   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
156   PetscCall(ISGetIndices(isrow,&r));
157   PetscCall(ISGetIndices(isicol,&ic));
158 
159   /* get new row pointers */
160   PetscCall(PetscMalloc1(n+1,&bi));
161   bi[0] = 0;
162 
163   /* bdiag is location of diagonal in factor */
164   PetscCall(PetscMalloc1(n+1,&bdiag));
165   bdiag[0] = 0;
166 
167   /* linked list for storing column indices of the active row */
168   nlnk = n + 1;
169   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
170 
171   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
172 
173   /* initial FreeSpace size is f*(ai[n]+1) */
174   f             = info->fill;
175   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
176   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
177   current_space = free_space;
178 
179   for (i=0; i<n; i++) {
180     /* copy previous fill into linked list */
181     nzi = 0;
182     nnz = ai[r[i]+1] - ai[r[i]];
183     ajtmp = aj + ai[r[i]];
184     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
185     nzi  += nlnk;
186 
187     /* add pivot rows into linked list */
188     row = lnk[n];
189     while (row < i) {
190       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
191       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
192       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
193       nzi  += nlnk;
194       row   = lnk[row];
195     }
196     bi[i+1] = bi[i] + nzi;
197     im[i]   = nzi;
198 
199     /* mark bdiag */
200     nzbd = 0;
201     nnz  = nzi;
202     k    = lnk[n];
203     while (nnz-- && k < i) {
204       nzbd++;
205       k = lnk[k];
206     }
207     bdiag[i] = bi[i] + nzbd;
208 
209     /* if free space is not available, make more free space */
210     if (current_space->local_remaining<nzi) {
211       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
212       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
213       reallocs++;
214     }
215 
216     /* copy data into free space, then initialize lnk */
217     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
218 
219     bi_ptr[i]                       = current_space->array;
220     current_space->array           += nzi;
221     current_space->local_used      += nzi;
222     current_space->local_remaining -= nzi;
223   }
224 #if defined(PETSC_USE_INFO)
225   if (ai[n] != 0) {
226     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
227     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
228     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
229     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
230     PetscCall(PetscInfo(A,"for best performance.\n"));
231   } else {
232     PetscCall(PetscInfo(A,"Empty matrix\n"));
233   }
234 #endif
235 
236   PetscCall(ISRestoreIndices(isrow,&r));
237   PetscCall(ISRestoreIndices(isicol,&ic));
238 
239   /* destroy list of free space and other temporary array(s) */
240   PetscCall(PetscMalloc1(bi[n]+1,&bj));
241   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
242   PetscCall(PetscLLDestroy(lnk,lnkbt));
243   PetscCall(PetscFree2(bi_ptr,im));
244 
245   /* put together the new matrix */
246   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
247   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
248   b    = (Mat_SeqAIJ*)(B)->data;
249 
250   b->free_a       = PETSC_TRUE;
251   b->free_ij      = PETSC_TRUE;
252   b->singlemalloc = PETSC_FALSE;
253 
254   PetscCall(PetscMalloc1(bi[n]+1,&b->a));
255   b->j    = bj;
256   b->i    = bi;
257   b->diag = bdiag;
258   b->ilen = NULL;
259   b->imax = NULL;
260   b->row  = isrow;
261   b->col  = iscol;
262   PetscCall(PetscObjectReference((PetscObject)isrow));
263   PetscCall(PetscObjectReference((PetscObject)iscol));
264   b->icol = isicol;
265   PetscCall(PetscMalloc1(n+1,&b->solve_work));
266 
267   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
268   PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar))));
269   b->maxnz = b->nz = bi[n];
270 
271   (B)->factortype            = MAT_FACTOR_LU;
272   (B)->info.factor_mallocs   = reallocs;
273   (B)->info.fill_ratio_given = f;
274 
275   if (ai[n]) {
276     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
277   } else {
278     (B)->info.fill_ratio_needed = 0.0;
279   }
280   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
281   if (a->inode.size) {
282     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
283   }
284   PetscFunctionReturn(0);
285 }
286 
287 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
288 {
289   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
290   IS                 isicol;
291   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
292   PetscInt           i,n=A->rmap->n;
293   PetscInt           *bi,*bj;
294   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
295   PetscReal          f;
296   PetscInt           nlnk,*lnk,k,**bi_ptr;
297   PetscFreeSpaceList free_space=NULL,current_space=NULL;
298   PetscBT            lnkbt;
299   PetscBool          missing;
300 
301   PetscFunctionBegin;
302   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
303   PetscCall(MatMissingDiagonal(A,&missing,&i));
304   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
305 
306   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
307   PetscCall(ISGetIndices(isrow,&r));
308   PetscCall(ISGetIndices(isicol,&ic));
309 
310   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311   PetscCall(PetscMalloc1(n+1,&bi));
312   PetscCall(PetscMalloc1(n+1,&bdiag));
313   bi[0] = bdiag[0] = 0;
314 
315   /* linked list for storing column indices of the active row */
316   nlnk = n + 1;
317   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
318 
319   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
320 
321   /* initial FreeSpace size is f*(ai[n]+1) */
322   f             = info->fill;
323   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
324   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
325   current_space = free_space;
326 
327   for (i=0; i<n; i++) {
328     /* copy previous fill into linked list */
329     nzi = 0;
330     nnz = ai[r[i]+1] - ai[r[i]];
331     ajtmp = aj + ai[r[i]];
332     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
333     nzi  += nlnk;
334 
335     /* add pivot rows into linked list */
336     row = lnk[n];
337     while (row < i) {
338       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
339       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
341       nzi  += nlnk;
342       row   = lnk[row];
343     }
344     bi[i+1] = bi[i] + nzi;
345     im[i]   = nzi;
346 
347     /* mark bdiag */
348     nzbd = 0;
349     nnz  = nzi;
350     k    = lnk[n];
351     while (nnz-- && k < i) {
352       nzbd++;
353       k = lnk[k];
354     }
355     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
356 
357     /* if free space is not available, make more free space */
358     if (current_space->local_remaining<nzi) {
359       /* estimated additional space needed */
360       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
361       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
362       reallocs++;
363     }
364 
365     /* copy data into free space, then initialize lnk */
366     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
367 
368     bi_ptr[i]                       = current_space->array;
369     current_space->array           += nzi;
370     current_space->local_used      += nzi;
371     current_space->local_remaining -= nzi;
372   }
373 
374   PetscCall(ISRestoreIndices(isrow,&r));
375   PetscCall(ISRestoreIndices(isicol,&ic));
376 
377   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
378   PetscCall(PetscMalloc1(bi[n]+1,&bj));
379   PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag));
380   PetscCall(PetscLLDestroy(lnk,lnkbt));
381   PetscCall(PetscFree2(bi_ptr,im));
382 
383   /* put together the new matrix */
384   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
385   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
386   b    = (Mat_SeqAIJ*)(B)->data;
387 
388   b->free_a       = PETSC_TRUE;
389   b->free_ij      = PETSC_TRUE;
390   b->singlemalloc = PETSC_FALSE;
391 
392   PetscCall(PetscMalloc1(bdiag[0]+1,&b->a));
393 
394   b->j    = bj;
395   b->i    = bi;
396   b->diag = bdiag;
397   b->ilen = NULL;
398   b->imax = NULL;
399   b->row  = isrow;
400   b->col  = iscol;
401   PetscCall(PetscObjectReference((PetscObject)isrow));
402   PetscCall(PetscObjectReference((PetscObject)iscol));
403   b->icol = isicol;
404   PetscCall(PetscMalloc1(n+1,&b->solve_work));
405 
406   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
407   PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar))));
408   b->maxnz = b->nz = bdiag[0]+1;
409 
410   B->factortype            = MAT_FACTOR_LU;
411   B->info.factor_mallocs   = reallocs;
412   B->info.fill_ratio_given = f;
413 
414   if (ai[n]) {
415     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
416   } else {
417     B->info.fill_ratio_needed = 0.0;
418   }
419 #if defined(PETSC_USE_INFO)
420   if (ai[n] != 0) {
421     PetscReal af = B->info.fill_ratio_needed;
422     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
423     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
424     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
425     PetscCall(PetscInfo(A,"for best performance.\n"));
426   } else {
427     PetscCall(PetscInfo(A,"Empty matrix\n"));
428   }
429 #endif
430   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
431   if (a->inode.size) {
432     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
433   }
434   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
435   PetscFunctionReturn(0);
436 }
437 
438 /*
439     Trouble in factorization, should we dump the original matrix?
440 */
441 PetscErrorCode MatFactorDumpMatrix(Mat A)
442 {
443   PetscBool      flg = PETSC_FALSE;
444 
445   PetscFunctionBegin;
446   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL));
447   if (flg) {
448     PetscViewer viewer;
449     char        filename[PETSC_MAX_PATH_LEN];
450 
451     PetscCall(PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank));
452     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer));
453     PetscCall(MatView(A,viewer));
454     PetscCall(PetscViewerDestroy(&viewer));
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
460 {
461   Mat             C     =B;
462   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
463   IS              isrow = b->row,isicol = b->icol;
464   const PetscInt  *r,*ic,*ics;
465   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
466   PetscInt        i,j,k,nz,nzL,row,*pj;
467   const PetscInt  *ajtmp,*bjtmp;
468   MatScalar       *rtmp,*pc,multiplier,*pv;
469   const MatScalar *aa=a->a,*v;
470   PetscBool       row_identity,col_identity;
471   FactorShiftCtx  sctx;
472   const PetscInt  *ddiag;
473   PetscReal       rs;
474   MatScalar       d;
475 
476   PetscFunctionBegin;
477   /* MatPivotSetUp(): initialize shift context sctx */
478   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
479 
480   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
481     ddiag          = a->diag;
482     sctx.shift_top = info->zeropivot;
483     for (i=0; i<n; i++) {
484       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
485       d  = (aa)[ddiag[i]];
486       rs = -PetscAbsScalar(d) - PetscRealPart(d);
487       v  = aa+ai[i];
488       nz = ai[i+1] - ai[i];
489       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
490       if (rs>sctx.shift_top) sctx.shift_top = rs;
491     }
492     sctx.shift_top *= 1.1;
493     sctx.nshift_max = 5;
494     sctx.shift_lo   = 0.;
495     sctx.shift_hi   = 1.;
496   }
497 
498   PetscCall(ISGetIndices(isrow,&r));
499   PetscCall(ISGetIndices(isicol,&ic));
500   PetscCall(PetscMalloc1(n+1,&rtmp));
501   ics  = ic;
502 
503   do {
504     sctx.newshift = PETSC_FALSE;
505     for (i=0; i<n; i++) {
506       /* zero rtmp */
507       /* L part */
508       nz    = bi[i+1] - bi[i];
509       bjtmp = bj + bi[i];
510       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
511 
512       /* U part */
513       nz    = bdiag[i]-bdiag[i+1];
514       bjtmp = bj + bdiag[i+1]+1;
515       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516 
517       /* load in initial (unfactored row) */
518       nz    = ai[r[i]+1] - ai[r[i]];
519       ajtmp = aj + ai[r[i]];
520       v     = aa + ai[r[i]];
521       for (j=0; j<nz; j++) {
522         rtmp[ics[ajtmp[j]]] = v[j];
523       }
524       /* ZeropivotApply() */
525       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
526 
527       /* elimination */
528       bjtmp = bj + bi[i];
529       row   = *bjtmp++;
530       nzL   = bi[i+1] - bi[i];
531       for (k=0; k < nzL; k++) {
532         pc = rtmp + row;
533         if (*pc != 0.0) {
534           pv         = b->a + bdiag[row];
535           multiplier = *pc * (*pv);
536           *pc        = multiplier;
537 
538           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
539           pv = b->a + bdiag[row+1]+1;
540           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
541 
542           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
543           PetscCall(PetscLogFlops(1+2.0*nz));
544         }
545         row = *bjtmp++;
546       }
547 
548       /* finished row so stick it into b->a */
549       rs = 0.0;
550       /* L part */
551       pv = b->a + bi[i];
552       pj = b->j + bi[i];
553       nz = bi[i+1] - bi[i];
554       for (j=0; j<nz; j++) {
555         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
556       }
557 
558       /* U part */
559       pv = b->a + bdiag[i+1]+1;
560       pj = b->j + bdiag[i+1]+1;
561       nz = bdiag[i] - bdiag[i+1]-1;
562       for (j=0; j<nz; j++) {
563         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
564       }
565 
566       sctx.rs = rs;
567       sctx.pv = rtmp[i];
568       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
569       if (sctx.newshift) break; /* break for-loop */
570       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
571 
572       /* Mark diagonal and invert diagonal for simpler triangular solves */
573       pv  = b->a + bdiag[i];
574       *pv = 1.0/rtmp[i];
575 
576     } /* endof for (i=0; i<n; i++) { */
577 
578     /* MatPivotRefine() */
579     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
580       /*
581        * if no shift in this attempt & shifting & started shifting & can refine,
582        * then try lower shift
583        */
584       sctx.shift_hi       = sctx.shift_fraction;
585       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
586       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
587       sctx.newshift       = PETSC_TRUE;
588       sctx.nshift++;
589     }
590   } while (sctx.newshift);
591 
592   PetscCall(PetscFree(rtmp));
593   PetscCall(ISRestoreIndices(isicol,&ic));
594   PetscCall(ISRestoreIndices(isrow,&r));
595 
596   PetscCall(ISIdentity(isrow,&row_identity));
597   PetscCall(ISIdentity(isicol,&col_identity));
598   if (b->inode.size) {
599     C->ops->solve = MatSolve_SeqAIJ_Inode;
600   } else if (row_identity && col_identity) {
601     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
602   } else {
603     C->ops->solve = MatSolve_SeqAIJ;
604   }
605   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
606   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
607   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
608   C->ops->matsolve          = MatMatSolve_SeqAIJ;
609   C->assembled              = PETSC_TRUE;
610   C->preallocated           = PETSC_TRUE;
611 
612   PetscCall(PetscLogFlops(C->cmap->n));
613 
614   /* MatShiftView(A,info,&sctx) */
615   if (sctx.nshift) {
616     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
618     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
620     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
622     }
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
628 {
629   Mat             C     =B;
630   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
631   IS              isrow = b->row,isicol = b->icol;
632   const PetscInt  *r,*ic,*ics;
633   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
634   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
635   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
636   MatScalar       *pv,*rtmp,*pc,multiplier,d;
637   const MatScalar *v,*aa=a->a;
638   PetscReal       rs=0.0;
639   FactorShiftCtx  sctx;
640   const PetscInt  *ddiag;
641   PetscBool       row_identity, col_identity;
642 
643   PetscFunctionBegin;
644   /* MatPivotSetUp(): initialize shift context sctx */
645   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
646 
647   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
648     ddiag          = a->diag;
649     sctx.shift_top = info->zeropivot;
650     for (i=0; i<n; i++) {
651       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
652       d  = (aa)[ddiag[i]];
653       rs = -PetscAbsScalar(d) - PetscRealPart(d);
654       v  = aa+ai[i];
655       nz = ai[i+1] - ai[i];
656       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
657       if (rs>sctx.shift_top) sctx.shift_top = rs;
658     }
659     sctx.shift_top *= 1.1;
660     sctx.nshift_max = 5;
661     sctx.shift_lo   = 0.;
662     sctx.shift_hi   = 1.;
663   }
664 
665   PetscCall(ISGetIndices(isrow,&r));
666   PetscCall(ISGetIndices(isicol,&ic));
667   PetscCall(PetscMalloc1(n+1,&rtmp));
668   ics  = ic;
669 
670   do {
671     sctx.newshift = PETSC_FALSE;
672     for (i=0; i<n; i++) {
673       nz    = bi[i+1] - bi[i];
674       bjtmp = bj + bi[i];
675       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
676 
677       /* load in initial (unfactored row) */
678       nz    = ai[r[i]+1] - ai[r[i]];
679       ajtmp = aj + ai[r[i]];
680       v     = aa + ai[r[i]];
681       for (j=0; j<nz; j++) {
682         rtmp[ics[ajtmp[j]]] = v[j];
683       }
684       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
685 
686       row = *bjtmp++;
687       while  (row < i) {
688         pc = rtmp + row;
689         if (*pc != 0.0) {
690           pv         = b->a + diag_offset[row];
691           pj         = b->j + diag_offset[row] + 1;
692           multiplier = *pc / *pv++;
693           *pc        = multiplier;
694           nz         = bi[row+1] - diag_offset[row] - 1;
695           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
696           PetscCall(PetscLogFlops(1+2.0*nz));
697         }
698         row = *bjtmp++;
699       }
700       /* finished row so stick it into b->a */
701       pv   = b->a + bi[i];
702       pj   = b->j + bi[i];
703       nz   = bi[i+1] - bi[i];
704       diag = diag_offset[i] - bi[i];
705       rs   = 0.0;
706       for (j=0; j<nz; j++) {
707         pv[j] = rtmp[pj[j]];
708         rs   += PetscAbsScalar(pv[j]);
709       }
710       rs -= PetscAbsScalar(pv[diag]);
711 
712       sctx.rs = rs;
713       sctx.pv = pv[diag];
714       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
715       if (sctx.newshift) break;
716       pv[diag] = sctx.pv;
717     }
718 
719     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
720       /*
721        * if no shift in this attempt & shifting & started shifting & can refine,
722        * then try lower shift
723        */
724       sctx.shift_hi       = sctx.shift_fraction;
725       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
726       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
727       sctx.newshift       = PETSC_TRUE;
728       sctx.nshift++;
729     }
730   } while (sctx.newshift);
731 
732   /* invert diagonal entries for simpler triangular solves */
733   for (i=0; i<n; i++) {
734     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
735   }
736   PetscCall(PetscFree(rtmp));
737   PetscCall(ISRestoreIndices(isicol,&ic));
738   PetscCall(ISRestoreIndices(isrow,&r));
739 
740   PetscCall(ISIdentity(isrow,&row_identity));
741   PetscCall(ISIdentity(isicol,&col_identity));
742   if (row_identity && col_identity) {
743     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
744   } else {
745     C->ops->solve = MatSolve_SeqAIJ_inplace;
746   }
747   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
748   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
749   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
750   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
751 
752   C->assembled    = PETSC_TRUE;
753   C->preallocated = PETSC_TRUE;
754 
755   PetscCall(PetscLogFlops(C->cmap->n));
756   if (sctx.nshift) {
757     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
758       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
759     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
760       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
761     }
762   }
763   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
764   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
765 
766   PetscCall(MatSeqAIJCheckInode(C));
767   PetscFunctionReturn(0);
768 }
769 
770 /*
771    This routine implements inplace ILU(0) with row or/and column permutations.
772    Input:
773      A - original matrix
774    Output;
775      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
776          a->j (col index) is permuted by the inverse of colperm, then sorted
777          a->a reordered accordingly with a->j
778          a->diag (ptr to diagonal elements) is updated.
779 */
780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
781 {
782   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
783   IS              isrow = a->row,isicol = a->icol;
784   const PetscInt  *r,*ic,*ics;
785   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
786   PetscInt        *ajtmp,nz,row;
787   PetscInt        *diag = a->diag,nbdiag,*pj;
788   PetscScalar     *rtmp,*pc,multiplier,d;
789   MatScalar       *pv,*v;
790   PetscReal       rs;
791   FactorShiftCtx  sctx;
792   const MatScalar *aa=a->a,*vtmp;
793 
794   PetscFunctionBegin;
795   PetscCheck(A == B,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
796 
797   /* MatPivotSetUp(): initialize shift context sctx */
798   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
799 
800   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
801     const PetscInt *ddiag = a->diag;
802     sctx.shift_top = info->zeropivot;
803     for (i=0; i<n; i++) {
804       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
805       d    = (aa)[ddiag[i]];
806       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
807       vtmp = aa+ai[i];
808       nz   = ai[i+1] - ai[i];
809       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
810       if (rs>sctx.shift_top) sctx.shift_top = rs;
811     }
812     sctx.shift_top *= 1.1;
813     sctx.nshift_max = 5;
814     sctx.shift_lo   = 0.;
815     sctx.shift_hi   = 1.;
816   }
817 
818   PetscCall(ISGetIndices(isrow,&r));
819   PetscCall(ISGetIndices(isicol,&ic));
820   PetscCall(PetscMalloc1(n+1,&rtmp));
821   PetscCall(PetscArrayzero(rtmp,n+1));
822   ics  = ic;
823 
824 #if defined(MV)
825   sctx.shift_top      = 0.;
826   sctx.nshift_max     = 0;
827   sctx.shift_lo       = 0.;
828   sctx.shift_hi       = 0.;
829   sctx.shift_fraction = 0.;
830 
831   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
832     sctx.shift_top = 0.;
833     for (i=0; i<n; i++) {
834       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
835       d  = (a->a)[diag[i]];
836       rs = -PetscAbsScalar(d) - PetscRealPart(d);
837       v  = a->a+ai[i];
838       nz = ai[i+1] - ai[i];
839       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
840       if (rs>sctx.shift_top) sctx.shift_top = rs;
841     }
842     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
843     sctx.shift_top *= 1.1;
844     sctx.nshift_max = 5;
845     sctx.shift_lo   = 0.;
846     sctx.shift_hi   = 1.;
847   }
848 
849   sctx.shift_amount = 0.;
850   sctx.nshift       = 0;
851 #endif
852 
853   do {
854     sctx.newshift = PETSC_FALSE;
855     for (i=0; i<n; i++) {
856       /* load in initial unfactored row */
857       nz    = ai[r[i]+1] - ai[r[i]];
858       ajtmp = aj + ai[r[i]];
859       v     = a->a + ai[r[i]];
860       /* sort permuted ajtmp and values v accordingly */
861       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
862       PetscCall(PetscSortIntWithScalarArray(nz,ajtmp,v));
863 
864       diag[r[i]] = ai[r[i]];
865       for (j=0; j<nz; j++) {
866         rtmp[ajtmp[j]] = v[j];
867         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
868       }
869       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
870 
871       row = *ajtmp++;
872       while  (row < i) {
873         pc = rtmp + row;
874         if (*pc != 0.0) {
875           pv = a->a + diag[r[row]];
876           pj = aj + diag[r[row]] + 1;
877 
878           multiplier = *pc / *pv++;
879           *pc        = multiplier;
880           nz         = ai[r[row]+1] - diag[r[row]] - 1;
881           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
882           PetscCall(PetscLogFlops(1+2.0*nz));
883         }
884         row = *ajtmp++;
885       }
886       /* finished row so overwrite it onto a->a */
887       pv     = a->a + ai[r[i]];
888       pj     = aj + ai[r[i]];
889       nz     = ai[r[i]+1] - ai[r[i]];
890       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
891 
892       rs = 0.0;
893       for (j=0; j<nz; j++) {
894         pv[j] = rtmp[pj[j]];
895         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
896       }
897 
898       sctx.rs = rs;
899       sctx.pv = pv[nbdiag];
900       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
901       if (sctx.newshift) break;
902       pv[nbdiag] = sctx.pv;
903     }
904 
905     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
906       /*
907        * if no shift in this attempt & shifting & started shifting & can refine,
908        * then try lower shift
909        */
910       sctx.shift_hi       = sctx.shift_fraction;
911       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
912       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
913       sctx.newshift       = PETSC_TRUE;
914       sctx.nshift++;
915     }
916   } while (sctx.newshift);
917 
918   /* invert diagonal entries for simpler triangular solves */
919   for (i=0; i<n; i++) {
920     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
921   }
922 
923   PetscCall(PetscFree(rtmp));
924   PetscCall(ISRestoreIndices(isicol,&ic));
925   PetscCall(ISRestoreIndices(isrow,&r));
926 
927   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
928   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
929   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
930   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
931 
932   A->assembled    = PETSC_TRUE;
933   A->preallocated = PETSC_TRUE;
934 
935   PetscCall(PetscLogFlops(A->cmap->n));
936   if (sctx.nshift) {
937     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
938       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
939     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
940       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 /* ----------------------------------------------------------- */
947 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
948 {
949   Mat            C;
950 
951   PetscFunctionBegin;
952   PetscCall(MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C));
953   PetscCall(MatLUFactorSymbolic(C,A,row,col,info));
954   PetscCall(MatLUFactorNumeric(C,A,info));
955 
956   A->ops->solve          = C->ops->solve;
957   A->ops->solvetranspose = C->ops->solvetranspose;
958 
959   PetscCall(MatHeaderMerge(A,&C));
960   PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol));
961   PetscFunctionReturn(0);
962 }
963 /* ----------------------------------------------------------- */
964 
965 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
966 {
967   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
968   IS                iscol = a->col,isrow = a->row;
969   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
970   PetscInt          nz;
971   const PetscInt    *rout,*cout,*r,*c;
972   PetscScalar       *x,*tmp,*tmps,sum;
973   const PetscScalar *b;
974   const MatScalar   *aa = a->a,*v;
975 
976   PetscFunctionBegin;
977   if (!n) PetscFunctionReturn(0);
978 
979   PetscCall(VecGetArrayRead(bb,&b));
980   PetscCall(VecGetArrayWrite(xx,&x));
981   tmp  = a->solve_work;
982 
983   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
984   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
985 
986   /* forward solve the lower triangular */
987   tmp[0] = b[*r++];
988   tmps   = tmp;
989   for (i=1; i<n; i++) {
990     v   = aa + ai[i];
991     vi  = aj + ai[i];
992     nz  = a->diag[i] - ai[i];
993     sum = b[*r++];
994     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
995     tmp[i] = sum;
996   }
997 
998   /* backward solve the upper triangular */
999   for (i=n-1; i>=0; i--) {
1000     v   = aa + a->diag[i] + 1;
1001     vi  = aj + a->diag[i] + 1;
1002     nz  = ai[i+1] - a->diag[i] - 1;
1003     sum = tmp[i];
1004     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1005     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1006   }
1007 
1008   PetscCall(ISRestoreIndices(isrow,&rout));
1009   PetscCall(ISRestoreIndices(iscol,&cout));
1010   PetscCall(VecRestoreArrayRead(bb,&b));
1011   PetscCall(VecRestoreArrayWrite(xx,&x));
1012   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1017 {
1018   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1019   IS                iscol = a->col,isrow = a->row;
1020   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1021   PetscInt          nz,neq,ldb,ldx;
1022   const PetscInt    *rout,*cout,*r,*c;
1023   PetscScalar       *x,*tmp = a->solve_work,*tmps,sum;
1024   const PetscScalar *b,*aa = a->a,*v;
1025   PetscBool         isdense;
1026 
1027   PetscFunctionBegin;
1028   if (!n) PetscFunctionReturn(0);
1029   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense));
1030   PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1031   if (X != B) {
1032     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense));
1033     PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1034   }
1035   PetscCall(MatDenseGetArrayRead(B,&b));
1036   PetscCall(MatDenseGetLDA(B,&ldb));
1037   PetscCall(MatDenseGetArray(X,&x));
1038   PetscCall(MatDenseGetLDA(X,&ldx));
1039   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1040   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1041   for (neq=0; neq<B->cmap->n; neq++) {
1042     /* forward solve the lower triangular */
1043     tmp[0] = b[r[0]];
1044     tmps   = tmp;
1045     for (i=1; i<n; i++) {
1046       v   = aa + ai[i];
1047       vi  = aj + ai[i];
1048       nz  = a->diag[i] - ai[i];
1049       sum = b[r[i]];
1050       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1051       tmp[i] = sum;
1052     }
1053     /* backward solve the upper triangular */
1054     for (i=n-1; i>=0; i--) {
1055       v   = aa + a->diag[i] + 1;
1056       vi  = aj + a->diag[i] + 1;
1057       nz  = ai[i+1] - a->diag[i] - 1;
1058       sum = tmp[i];
1059       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1060       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1061     }
1062     b += ldb;
1063     x += ldx;
1064   }
1065   PetscCall(ISRestoreIndices(isrow,&rout));
1066   PetscCall(ISRestoreIndices(iscol,&cout));
1067   PetscCall(MatDenseRestoreArrayRead(B,&b));
1068   PetscCall(MatDenseRestoreArray(X,&x));
1069   PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n)));
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1074 {
1075   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1076   IS                iscol = a->col,isrow = a->row;
1077   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1078   PetscInt          nz,neq,ldb,ldx;
1079   const PetscInt    *rout,*cout,*r,*c;
1080   PetscScalar       *x,*tmp = a->solve_work,sum;
1081   const PetscScalar *b,*aa = a->a,*v;
1082   PetscBool         isdense;
1083 
1084   PetscFunctionBegin;
1085   if (!n) PetscFunctionReturn(0);
1086   PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense));
1087   PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1088   if (X != B) {
1089     PetscCall(PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense));
1090     PetscCheck(isdense,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1091   }
1092   PetscCall(MatDenseGetArrayRead(B,&b));
1093   PetscCall(MatDenseGetLDA(B,&ldb));
1094   PetscCall(MatDenseGetArray(X,&x));
1095   PetscCall(MatDenseGetLDA(X,&ldx));
1096   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1097   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1098   for (neq=0; neq<B->cmap->n; neq++) {
1099     /* forward solve the lower triangular */
1100     tmp[0] = b[r[0]];
1101     v      = aa;
1102     vi     = aj;
1103     for (i=1; i<n; i++) {
1104       nz  = ai[i+1] - ai[i];
1105       sum = b[r[i]];
1106       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1107       tmp[i] = sum;
1108       v     += nz; vi += nz;
1109     }
1110     /* backward solve the upper triangular */
1111     for (i=n-1; i>=0; i--) {
1112       v   = aa + adiag[i+1]+1;
1113       vi  = aj + adiag[i+1]+1;
1114       nz  = adiag[i]-adiag[i+1]-1;
1115       sum = tmp[i];
1116       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1117       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1118     }
1119     b += ldb;
1120     x += ldx;
1121   }
1122   PetscCall(ISRestoreIndices(isrow,&rout));
1123   PetscCall(ISRestoreIndices(iscol,&cout));
1124   PetscCall(MatDenseRestoreArrayRead(B,&b));
1125   PetscCall(MatDenseRestoreArray(X,&x));
1126   PetscCall(PetscLogFlops(B->cmap->n*(2.0*a->nz - n)));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1131 {
1132   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1133   IS                iscol = a->col,isrow = a->row;
1134   const PetscInt    *r,*c,*rout,*cout;
1135   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1136   PetscInt          nz,row;
1137   PetscScalar       *x,*tmp,*tmps,sum;
1138   const PetscScalar *b;
1139   const MatScalar   *aa = a->a,*v;
1140 
1141   PetscFunctionBegin;
1142   if (!n) PetscFunctionReturn(0);
1143 
1144   PetscCall(VecGetArrayRead(bb,&b));
1145   PetscCall(VecGetArrayWrite(xx,&x));
1146   tmp  = a->solve_work;
1147 
1148   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1149   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
1150 
1151   /* forward solve the lower triangular */
1152   tmp[0] = b[*r++];
1153   tmps   = tmp;
1154   for (row=1; row<n; row++) {
1155     i   = rout[row]; /* permuted row */
1156     v   = aa + ai[i];
1157     vi  = aj + ai[i];
1158     nz  = a->diag[i] - ai[i];
1159     sum = b[*r++];
1160     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1161     tmp[row] = sum;
1162   }
1163 
1164   /* backward solve the upper triangular */
1165   for (row=n-1; row>=0; row--) {
1166     i   = rout[row]; /* permuted row */
1167     v   = aa + a->diag[i] + 1;
1168     vi  = aj + a->diag[i] + 1;
1169     nz  = ai[i+1] - a->diag[i] - 1;
1170     sum = tmp[row];
1171     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1172     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1173   }
1174 
1175   PetscCall(ISRestoreIndices(isrow,&rout));
1176   PetscCall(ISRestoreIndices(iscol,&cout));
1177   PetscCall(VecRestoreArrayRead(bb,&b));
1178   PetscCall(VecRestoreArrayWrite(xx,&x));
1179   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 /* ----------------------------------------------------------- */
1184 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1185 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1186 {
1187   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1188   PetscInt          n   = A->rmap->n;
1189   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1190   PetscScalar       *x;
1191   const PetscScalar *b;
1192   const MatScalar   *aa = a->a;
1193 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1194   PetscInt        adiag_i,i,nz,ai_i;
1195   const PetscInt  *vi;
1196   const MatScalar *v;
1197   PetscScalar     sum;
1198 #endif
1199 
1200   PetscFunctionBegin;
1201   if (!n) PetscFunctionReturn(0);
1202 
1203   PetscCall(VecGetArrayRead(bb,&b));
1204   PetscCall(VecGetArrayWrite(xx,&x));
1205 
1206 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1207   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1208 #else
1209   /* forward solve the lower triangular */
1210   x[0] = b[0];
1211   for (i=1; i<n; i++) {
1212     ai_i = ai[i];
1213     v    = aa + ai_i;
1214     vi   = aj + ai_i;
1215     nz   = adiag[i] - ai_i;
1216     sum  = b[i];
1217     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1218     x[i] = sum;
1219   }
1220 
1221   /* backward solve the upper triangular */
1222   for (i=n-1; i>=0; i--) {
1223     adiag_i = adiag[i];
1224     v       = aa + adiag_i + 1;
1225     vi      = aj + adiag_i + 1;
1226     nz      = ai[i+1] - adiag_i - 1;
1227     sum     = x[i];
1228     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1229     x[i] = sum*aa[adiag_i];
1230   }
1231 #endif
1232   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
1233   PetscCall(VecRestoreArrayRead(bb,&b));
1234   PetscCall(VecRestoreArrayWrite(xx,&x));
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1239 {
1240   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1241   IS                iscol = a->col,isrow = a->row;
1242   PetscInt          i, n = A->rmap->n,j;
1243   PetscInt          nz;
1244   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1245   PetscScalar       *x,*tmp,sum;
1246   const PetscScalar *b;
1247   const MatScalar   *aa = a->a,*v;
1248 
1249   PetscFunctionBegin;
1250   if (yy != xx) PetscCall(VecCopy(yy,xx));
1251 
1252   PetscCall(VecGetArrayRead(bb,&b));
1253   PetscCall(VecGetArray(xx,&x));
1254   tmp  = a->solve_work;
1255 
1256   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1257   PetscCall(ISGetIndices(iscol,&cout)); c = cout + (n-1);
1258 
1259   /* forward solve the lower triangular */
1260   tmp[0] = b[*r++];
1261   for (i=1; i<n; i++) {
1262     v   = aa + ai[i];
1263     vi  = aj + ai[i];
1264     nz  = a->diag[i] - ai[i];
1265     sum = b[*r++];
1266     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1267     tmp[i] = sum;
1268   }
1269 
1270   /* backward solve the upper triangular */
1271   for (i=n-1; i>=0; i--) {
1272     v   = aa + a->diag[i] + 1;
1273     vi  = aj + a->diag[i] + 1;
1274     nz  = ai[i+1] - a->diag[i] - 1;
1275     sum = tmp[i];
1276     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1277     tmp[i]   = sum*aa[a->diag[i]];
1278     x[*c--] += tmp[i];
1279   }
1280 
1281   PetscCall(ISRestoreIndices(isrow,&rout));
1282   PetscCall(ISRestoreIndices(iscol,&cout));
1283   PetscCall(VecRestoreArrayRead(bb,&b));
1284   PetscCall(VecRestoreArray(xx,&x));
1285   PetscCall(PetscLogFlops(2.0*a->nz));
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1290 {
1291   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1292   IS                iscol = a->col,isrow = a->row;
1293   PetscInt          i, n = A->rmap->n,j;
1294   PetscInt          nz;
1295   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1296   PetscScalar       *x,*tmp,sum;
1297   const PetscScalar *b;
1298   const MatScalar   *aa = a->a,*v;
1299 
1300   PetscFunctionBegin;
1301   if (yy != xx) PetscCall(VecCopy(yy,xx));
1302 
1303   PetscCall(VecGetArrayRead(bb,&b));
1304   PetscCall(VecGetArray(xx,&x));
1305   tmp  = a->solve_work;
1306 
1307   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1308   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1309 
1310   /* forward solve the lower triangular */
1311   tmp[0] = b[r[0]];
1312   v      = aa;
1313   vi     = aj;
1314   for (i=1; i<n; i++) {
1315     nz  = ai[i+1] - ai[i];
1316     sum = b[r[i]];
1317     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1318     tmp[i] = sum;
1319     v     += nz;
1320     vi    += nz;
1321   }
1322 
1323   /* backward solve the upper triangular */
1324   v  = aa + adiag[n-1];
1325   vi = aj + adiag[n-1];
1326   for (i=n-1; i>=0; i--) {
1327     nz  = adiag[i] - adiag[i+1] - 1;
1328     sum = tmp[i];
1329     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1330     tmp[i]   = sum*v[nz];
1331     x[c[i]] += tmp[i];
1332     v       += nz+1; vi += nz+1;
1333   }
1334 
1335   PetscCall(ISRestoreIndices(isrow,&rout));
1336   PetscCall(ISRestoreIndices(iscol,&cout));
1337   PetscCall(VecRestoreArrayRead(bb,&b));
1338   PetscCall(VecRestoreArray(xx,&x));
1339   PetscCall(PetscLogFlops(2.0*a->nz));
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1344 {
1345   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1346   IS                iscol = a->col,isrow = a->row;
1347   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1348   PetscInt          i,n = A->rmap->n,j;
1349   PetscInt          nz;
1350   PetscScalar       *x,*tmp,s1;
1351   const MatScalar   *aa = a->a,*v;
1352   const PetscScalar *b;
1353 
1354   PetscFunctionBegin;
1355   PetscCall(VecGetArrayRead(bb,&b));
1356   PetscCall(VecGetArrayWrite(xx,&x));
1357   tmp  = a->solve_work;
1358 
1359   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1360   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1361 
1362   /* copy the b into temp work space according to permutation */
1363   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1364 
1365   /* forward solve the U^T */
1366   for (i=0; i<n; i++) {
1367     v   = aa + diag[i];
1368     vi  = aj + diag[i] + 1;
1369     nz  = ai[i+1] - diag[i] - 1;
1370     s1  = tmp[i];
1371     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1372     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1373     tmp[i] = s1;
1374   }
1375 
1376   /* backward solve the L^T */
1377   for (i=n-1; i>=0; i--) {
1378     v  = aa + diag[i] - 1;
1379     vi = aj + diag[i] - 1;
1380     nz = diag[i] - ai[i];
1381     s1 = tmp[i];
1382     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1383   }
1384 
1385   /* copy tmp into x according to permutation */
1386   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1387 
1388   PetscCall(ISRestoreIndices(isrow,&rout));
1389   PetscCall(ISRestoreIndices(iscol,&cout));
1390   PetscCall(VecRestoreArrayRead(bb,&b));
1391   PetscCall(VecRestoreArrayWrite(xx,&x));
1392 
1393   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1398 {
1399   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1400   IS                iscol = a->col,isrow = a->row;
1401   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1402   PetscInt          i,n = A->rmap->n,j;
1403   PetscInt          nz;
1404   PetscScalar       *x,*tmp,s1;
1405   const MatScalar   *aa = a->a,*v;
1406   const PetscScalar *b;
1407 
1408   PetscFunctionBegin;
1409   PetscCall(VecGetArrayRead(bb,&b));
1410   PetscCall(VecGetArrayWrite(xx,&x));
1411   tmp  = a->solve_work;
1412 
1413   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1414   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1415 
1416   /* copy the b into temp work space according to permutation */
1417   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1418 
1419   /* forward solve the U^T */
1420   for (i=0; i<n; i++) {
1421     v   = aa + adiag[i+1] + 1;
1422     vi  = aj + adiag[i+1] + 1;
1423     nz  = adiag[i] - adiag[i+1] - 1;
1424     s1  = tmp[i];
1425     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1426     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1427     tmp[i] = s1;
1428   }
1429 
1430   /* backward solve the L^T */
1431   for (i=n-1; i>=0; i--) {
1432     v  = aa + ai[i];
1433     vi = aj + ai[i];
1434     nz = ai[i+1] - ai[i];
1435     s1 = tmp[i];
1436     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1437   }
1438 
1439   /* copy tmp into x according to permutation */
1440   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1441 
1442   PetscCall(ISRestoreIndices(isrow,&rout));
1443   PetscCall(ISRestoreIndices(iscol,&cout));
1444   PetscCall(VecRestoreArrayRead(bb,&b));
1445   PetscCall(VecRestoreArrayWrite(xx,&x));
1446 
1447   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1452 {
1453   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1454   IS                iscol = a->col,isrow = a->row;
1455   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1456   PetscInt          i,n = A->rmap->n,j;
1457   PetscInt          nz;
1458   PetscScalar       *x,*tmp,s1;
1459   const MatScalar   *aa = a->a,*v;
1460   const PetscScalar *b;
1461 
1462   PetscFunctionBegin;
1463   if (zz != xx) PetscCall(VecCopy(zz,xx));
1464   PetscCall(VecGetArrayRead(bb,&b));
1465   PetscCall(VecGetArray(xx,&x));
1466   tmp  = a->solve_work;
1467 
1468   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1469   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1470 
1471   /* copy the b into temp work space according to permutation */
1472   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1473 
1474   /* forward solve the U^T */
1475   for (i=0; i<n; i++) {
1476     v   = aa + diag[i];
1477     vi  = aj + diag[i] + 1;
1478     nz  = ai[i+1] - diag[i] - 1;
1479     s1  = tmp[i];
1480     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1481     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1482     tmp[i] = s1;
1483   }
1484 
1485   /* backward solve the L^T */
1486   for (i=n-1; i>=0; i--) {
1487     v  = aa + diag[i] - 1;
1488     vi = aj + diag[i] - 1;
1489     nz = diag[i] - ai[i];
1490     s1 = tmp[i];
1491     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1492   }
1493 
1494   /* copy tmp into x according to permutation */
1495   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1496 
1497   PetscCall(ISRestoreIndices(isrow,&rout));
1498   PetscCall(ISRestoreIndices(iscol,&cout));
1499   PetscCall(VecRestoreArrayRead(bb,&b));
1500   PetscCall(VecRestoreArray(xx,&x));
1501 
1502   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1507 {
1508   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1509   IS                iscol = a->col,isrow = a->row;
1510   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1511   PetscInt          i,n = A->rmap->n,j;
1512   PetscInt          nz;
1513   PetscScalar       *x,*tmp,s1;
1514   const MatScalar   *aa = a->a,*v;
1515   const PetscScalar *b;
1516 
1517   PetscFunctionBegin;
1518   if (zz != xx) PetscCall(VecCopy(zz,xx));
1519   PetscCall(VecGetArrayRead(bb,&b));
1520   PetscCall(VecGetArray(xx,&x));
1521   tmp  = a->solve_work;
1522 
1523   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
1524   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
1525 
1526   /* copy the b into temp work space according to permutation */
1527   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1528 
1529   /* forward solve the U^T */
1530   for (i=0; i<n; i++) {
1531     v   = aa + adiag[i+1] + 1;
1532     vi  = aj + adiag[i+1] + 1;
1533     nz  = adiag[i] - adiag[i+1] - 1;
1534     s1  = tmp[i];
1535     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1536     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1537     tmp[i] = s1;
1538   }
1539 
1540   /* backward solve the L^T */
1541   for (i=n-1; i>=0; i--) {
1542     v  = aa + ai[i];
1543     vi = aj + ai[i];
1544     nz = ai[i+1] - ai[i];
1545     s1 = tmp[i];
1546     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1547   }
1548 
1549   /* copy tmp into x according to permutation */
1550   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1551 
1552   PetscCall(ISRestoreIndices(isrow,&rout));
1553   PetscCall(ISRestoreIndices(iscol,&cout));
1554   PetscCall(VecRestoreArrayRead(bb,&b));
1555   PetscCall(VecRestoreArray(xx,&x));
1556 
1557   PetscCall(PetscLogFlops(2.0*a->nz-A->cmap->n));
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 /* ----------------------------------------------------------------*/
1562 
1563 /*
1564    ilu() under revised new data structure.
1565    Factored arrays bj and ba are stored as
1566      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1567 
1568    bi=fact->i is an array of size n+1, in which
1569      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1570      bi[n]:  points to L(n-1,n-1)+1
1571 
1572   bdiag=fact->diag is an array of size n+1,in which
1573      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1574      bdiag[n]: points to entry of U(n-1,0)-1
1575 
1576    U(i,:) contains bdiag[i] as its last entry, i.e.,
1577     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1578 */
1579 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1580 {
1581   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1582   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1583   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1584   IS             isicol;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1588   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE));
1589   b    = (Mat_SeqAIJ*)(fact)->data;
1590 
1591   /* allocate matrix arrays for new data structure */
1592   PetscCall(PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i));
1593   PetscCall(PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt)));
1594 
1595   b->singlemalloc = PETSC_TRUE;
1596   if (!b->diag) {
1597     PetscCall(PetscMalloc1(n+1,&b->diag));
1598     PetscCall(PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt)));
1599   }
1600   bdiag = b->diag;
1601 
1602   if (n > 0) {
1603     PetscCall(PetscArrayzero(b->a,ai[n]));
1604   }
1605 
1606   /* set bi and bj with new data structure */
1607   bi = b->i;
1608   bj = b->j;
1609 
1610   /* L part */
1611   bi[0] = 0;
1612   for (i=0; i<n; i++) {
1613     nz      = adiag[i] - ai[i];
1614     bi[i+1] = bi[i] + nz;
1615     aj      = a->j + ai[i];
1616     for (j=0; j<nz; j++) {
1617       bj[k++] = aj[j];
1618     }
1619   }
1620 
1621   /* U part */
1622   bdiag[n] = bi[n]-1;
1623   for (i=n-1; i>=0; i--) {
1624     nz = ai[i+1] - adiag[i] - 1;
1625     aj = a->j + adiag[i] + 1;
1626     for (j=0; j<nz; j++) {
1627       bj[k++] = aj[j];
1628     }
1629     /* diag[i] */
1630     bj[k++]  = i;
1631     bdiag[i] = bdiag[i+1] + nz + 1;
1632   }
1633 
1634   fact->factortype             = MAT_FACTOR_ILU;
1635   fact->info.factor_mallocs    = 0;
1636   fact->info.fill_ratio_given  = info->fill;
1637   fact->info.fill_ratio_needed = 1.0;
1638   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1639   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1640 
1641   b       = (Mat_SeqAIJ*)(fact)->data;
1642   b->row  = isrow;
1643   b->col  = iscol;
1644   b->icol = isicol;
1645   PetscCall(PetscMalloc1(fact->rmap->n+1,&b->solve_work));
1646   PetscCall(PetscObjectReference((PetscObject)isrow));
1647   PetscCall(PetscObjectReference((PetscObject)iscol));
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1652 {
1653   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1654   IS                 isicol;
1655   const PetscInt     *r,*ic;
1656   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1657   PetscInt           *bi,*cols,nnz,*cols_lvl;
1658   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1659   PetscInt           i,levels,diagonal_fill;
1660   PetscBool          col_identity,row_identity,missing;
1661   PetscReal          f;
1662   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1663   PetscBT            lnkbt;
1664   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1665   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1666   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1667 
1668   PetscFunctionBegin;
1669   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
1670   PetscCall(MatMissingDiagonal(A,&missing,&i));
1671   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
1672 
1673   levels = (PetscInt)info->levels;
1674   PetscCall(ISIdentity(isrow,&row_identity));
1675   PetscCall(ISIdentity(iscol,&col_identity));
1676   if (!levels && row_identity && col_identity) {
1677     /* special case: ilu(0) with natural ordering */
1678     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info));
1679     if (a->inode.size) {
1680       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1681     }
1682     PetscFunctionReturn(0);
1683   }
1684 
1685   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1686   PetscCall(ISGetIndices(isrow,&r));
1687   PetscCall(ISGetIndices(isicol,&ic));
1688 
1689   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1690   PetscCall(PetscMalloc1(n+1,&bi));
1691   PetscCall(PetscMalloc1(n+1,&bdiag));
1692   bi[0] = bdiag[0] = 0;
1693   PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr));
1694 
1695   /* create a linked list for storing column indices of the active row */
1696   nlnk = n + 1;
1697   PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt));
1698 
1699   /* initial FreeSpace size is f*(ai[n]+1) */
1700   f                 = info->fill;
1701   diagonal_fill     = (PetscInt)info->diagonal_fill;
1702   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
1703   current_space     = free_space;
1704   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl));
1705   current_space_lvl = free_space_lvl;
1706   for (i=0; i<n; i++) {
1707     nzi = 0;
1708     /* copy current row into linked list */
1709     nnz = ai[r[i]+1] - ai[r[i]];
1710     PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
1711     cols   = aj + ai[r[i]];
1712     lnk[i] = -1; /* marker to indicate if diagonal exists */
1713     PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt));
1714     nzi   += nlnk;
1715 
1716     /* make sure diagonal entry is included */
1717     if (diagonal_fill && lnk[i] == -1) {
1718       fm = n;
1719       while (lnk[fm] < i) fm = lnk[fm];
1720       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1721       lnk[fm]    = i;
1722       lnk_lvl[i] = 0;
1723       nzi++; dcount++;
1724     }
1725 
1726     /* add pivot rows into the active row */
1727     nzbd = 0;
1728     prow = lnk[n];
1729     while (prow < i) {
1730       nnz      = bdiag[prow];
1731       cols     = bj_ptr[prow] + nnz + 1;
1732       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1733       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1734       PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow));
1735       nzi     += nlnk;
1736       prow     = lnk[prow];
1737       nzbd++;
1738     }
1739     bdiag[i] = nzbd;
1740     bi[i+1]  = bi[i] + nzi;
1741     /* if free space is not available, make more free space */
1742     if (current_space->local_remaining<nzi) {
1743       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1744       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
1745       PetscCall(PetscFreeSpaceGet(nnz,&current_space_lvl));
1746       reallocs++;
1747     }
1748 
1749     /* copy data into free_space and free_space_lvl, then initialize lnk */
1750     PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
1751     bj_ptr[i]    = current_space->array;
1752     bjlvl_ptr[i] = current_space_lvl->array;
1753 
1754     /* make sure the active row i has diagonal entry */
1755     PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1756 
1757     current_space->array               += nzi;
1758     current_space->local_used          += nzi;
1759     current_space->local_remaining     -= nzi;
1760     current_space_lvl->array           += nzi;
1761     current_space_lvl->local_used      += nzi;
1762     current_space_lvl->local_remaining -= nzi;
1763   }
1764 
1765   PetscCall(ISRestoreIndices(isrow,&r));
1766   PetscCall(ISRestoreIndices(isicol,&ic));
1767   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1768   PetscCall(PetscMalloc1(bi[n]+1,&bj));
1769   PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag));
1770 
1771   PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
1772   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1773   PetscCall(PetscFree2(bj_ptr,bjlvl_ptr));
1774 
1775 #if defined(PETSC_USE_INFO)
1776   {
1777     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1778     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
1779     PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af));
1780     PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af));
1781     PetscCall(PetscInfo(A,"for best performance.\n"));
1782     if (diagonal_fill) {
1783       PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount));
1784     }
1785   }
1786 #endif
1787   /* put together the new matrix */
1788   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL));
1789   PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol));
1790   b    = (Mat_SeqAIJ*)(fact)->data;
1791 
1792   b->free_a       = PETSC_TRUE;
1793   b->free_ij      = PETSC_TRUE;
1794   b->singlemalloc = PETSC_FALSE;
1795 
1796   PetscCall(PetscMalloc1(bdiag[0]+1,&b->a));
1797 
1798   b->j    = bj;
1799   b->i    = bi;
1800   b->diag = bdiag;
1801   b->ilen = NULL;
1802   b->imax = NULL;
1803   b->row  = isrow;
1804   b->col  = iscol;
1805   PetscCall(PetscObjectReference((PetscObject)isrow));
1806   PetscCall(PetscObjectReference((PetscObject)iscol));
1807   b->icol = isicol;
1808 
1809   PetscCall(PetscMalloc1(n+1,&b->solve_work));
1810   /* In b structure:  Free imax, ilen, old a, old j.
1811      Allocate bdiag, solve_work, new a, new j */
1812   PetscCall(PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar))));
1813   b->maxnz = b->nz = bdiag[0]+1;
1814 
1815   (fact)->info.factor_mallocs    = reallocs;
1816   (fact)->info.fill_ratio_given  = f;
1817   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1818   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1819   if (a->inode.size) {
1820     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1821   }
1822   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1827 {
1828   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1829   IS                 isicol;
1830   const PetscInt     *r,*ic;
1831   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1832   PetscInt           *bi,*cols,nnz,*cols_lvl;
1833   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1834   PetscInt           i,levels,diagonal_fill;
1835   PetscBool          col_identity,row_identity;
1836   PetscReal          f;
1837   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1838   PetscBT            lnkbt;
1839   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1840   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1841   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1842   PetscBool          missing;
1843 
1844   PetscFunctionBegin;
1845   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
1846   PetscCall(MatMissingDiagonal(A,&missing,&i));
1847   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
1848 
1849   f             = info->fill;
1850   levels        = (PetscInt)info->levels;
1851   diagonal_fill = (PetscInt)info->diagonal_fill;
1852 
1853   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
1854 
1855   PetscCall(ISIdentity(isrow,&row_identity));
1856   PetscCall(ISIdentity(iscol,&col_identity));
1857   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1858     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE));
1859 
1860     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1861     if (a->inode.size) {
1862       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1863     }
1864     fact->factortype               = MAT_FACTOR_ILU;
1865     (fact)->info.factor_mallocs    = 0;
1866     (fact)->info.fill_ratio_given  = info->fill;
1867     (fact)->info.fill_ratio_needed = 1.0;
1868 
1869     b    = (Mat_SeqAIJ*)(fact)->data;
1870     b->row  = isrow;
1871     b->col  = iscol;
1872     b->icol = isicol;
1873     PetscCall(PetscMalloc1((fact)->rmap->n+1,&b->solve_work));
1874     PetscCall(PetscObjectReference((PetscObject)isrow));
1875     PetscCall(PetscObjectReference((PetscObject)iscol));
1876     PetscFunctionReturn(0);
1877   }
1878 
1879   PetscCall(ISGetIndices(isrow,&r));
1880   PetscCall(ISGetIndices(isicol,&ic));
1881 
1882   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1883   PetscCall(PetscMalloc1(n+1,&bi));
1884   PetscCall(PetscMalloc1(n+1,&bdiag));
1885   bi[0] = bdiag[0] = 0;
1886 
1887   PetscCall(PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr));
1888 
1889   /* create a linked list for storing column indices of the active row */
1890   nlnk = n + 1;
1891   PetscCall(PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt));
1892 
1893   /* initial FreeSpace size is f*(ai[n]+1) */
1894   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
1895   current_space     = free_space;
1896   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl));
1897   current_space_lvl = free_space_lvl;
1898 
1899   for (i=0; i<n; i++) {
1900     nzi = 0;
1901     /* copy current row into linked list */
1902     nnz = ai[r[i]+1] - ai[r[i]];
1903     PetscCheck(nnz,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
1904     cols   = aj + ai[r[i]];
1905     lnk[i] = -1; /* marker to indicate if diagonal exists */
1906     PetscCall(PetscIncompleteLLInit(nnz,cols,n,ic,&nlnk,lnk,lnk_lvl,lnkbt));
1907     nzi   += nlnk;
1908 
1909     /* make sure diagonal entry is included */
1910     if (diagonal_fill && lnk[i] == -1) {
1911       fm = n;
1912       while (lnk[fm] < i) fm = lnk[fm];
1913       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1914       lnk[fm]    = i;
1915       lnk_lvl[i] = 0;
1916       nzi++; dcount++;
1917     }
1918 
1919     /* add pivot rows into the active row */
1920     nzbd = 0;
1921     prow = lnk[n];
1922     while (prow < i) {
1923       nnz      = bdiag[prow];
1924       cols     = bj_ptr[prow] + nnz + 1;
1925       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1926       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1927       PetscCall(PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,&nlnk,lnk,lnk_lvl,lnkbt,prow));
1928       nzi     += nlnk;
1929       prow     = lnk[prow];
1930       nzbd++;
1931     }
1932     bdiag[i] = nzbd;
1933     bi[i+1]  = bi[i] + nzi;
1934 
1935     /* if free space is not available, make more free space */
1936     if (current_space->local_remaining<nzi) {
1937       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1938       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
1939       PetscCall(PetscFreeSpaceGet(nnz,&current_space_lvl));
1940       reallocs++;
1941     }
1942 
1943     /* copy data into free_space and free_space_lvl, then initialize lnk */
1944     PetscCall(PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
1945     bj_ptr[i]    = current_space->array;
1946     bjlvl_ptr[i] = current_space_lvl->array;
1947 
1948     /* make sure the active row i has diagonal entry */
1949     PetscCheck(*(bj_ptr[i]+bdiag[i]) == i,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %" PetscInt_FMT " has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1950 
1951     current_space->array               += nzi;
1952     current_space->local_used          += nzi;
1953     current_space->local_remaining     -= nzi;
1954     current_space_lvl->array           += nzi;
1955     current_space_lvl->local_used      += nzi;
1956     current_space_lvl->local_remaining -= nzi;
1957   }
1958 
1959   PetscCall(ISRestoreIndices(isrow,&r));
1960   PetscCall(ISRestoreIndices(isicol,&ic));
1961 
1962   /* destroy list of free space and other temporary arrays */
1963   PetscCall(PetscMalloc1(bi[n]+1,&bj));
1964   PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); /* copy free_space -> bj */
1965   PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
1966   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1967   PetscCall(PetscFree2(bj_ptr,bjlvl_ptr));
1968 
1969 #if defined(PETSC_USE_INFO)
1970   {
1971     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1972     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
1973     PetscCall(PetscInfo(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af));
1974     PetscCall(PetscInfo(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af));
1975     PetscCall(PetscInfo(A,"for best performance.\n"));
1976     if (diagonal_fill) {
1977       PetscCall(PetscInfo(A,"Detected and replaced %" PetscInt_FMT " missing diagonals\n",dcount));
1978     }
1979   }
1980 #endif
1981 
1982   /* put together the new matrix */
1983   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL));
1984   PetscCall(PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol));
1985   b    = (Mat_SeqAIJ*)(fact)->data;
1986 
1987   b->free_a       = PETSC_TRUE;
1988   b->free_ij      = PETSC_TRUE;
1989   b->singlemalloc = PETSC_FALSE;
1990 
1991   PetscCall(PetscMalloc1(bi[n],&b->a));
1992   b->j = bj;
1993   b->i = bi;
1994   for (i=0; i<n; i++) bdiag[i] += bi[i];
1995   b->diag = bdiag;
1996   b->ilen = NULL;
1997   b->imax = NULL;
1998   b->row  = isrow;
1999   b->col  = iscol;
2000   PetscCall(PetscObjectReference((PetscObject)isrow));
2001   PetscCall(PetscObjectReference((PetscObject)iscol));
2002   b->icol = isicol;
2003   PetscCall(PetscMalloc1(n+1,&b->solve_work));
2004   /* In b structure:  Free imax, ilen, old a, old j.
2005      Allocate bdiag, solve_work, new a, new j */
2006   PetscCall(PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar))));
2007   b->maxnz = b->nz = bi[n];
2008 
2009   (fact)->info.factor_mallocs    = reallocs;
2010   (fact)->info.fill_ratio_given  = f;
2011   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2012   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2013   if (a->inode.size) {
2014     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2020 {
2021   Mat            C = B;
2022   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2023   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2024   IS             ip=b->row,iip = b->icol;
2025   const PetscInt *rip,*riip;
2026   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2027   PetscInt       *ai=a->i,*aj=a->j;
2028   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2029   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2030   PetscBool      perm_identity;
2031   FactorShiftCtx sctx;
2032   PetscReal      rs;
2033   MatScalar      d,*v;
2034 
2035   PetscFunctionBegin;
2036   /* MatPivotSetUp(): initialize shift context sctx */
2037   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
2038 
2039   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2040     sctx.shift_top = info->zeropivot;
2041     for (i=0; i<mbs; i++) {
2042       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2043       d  = (aa)[a->diag[i]];
2044       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2045       v  = aa+ai[i];
2046       nz = ai[i+1] - ai[i];
2047       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2048       if (rs>sctx.shift_top) sctx.shift_top = rs;
2049     }
2050     sctx.shift_top *= 1.1;
2051     sctx.nshift_max = 5;
2052     sctx.shift_lo   = 0.;
2053     sctx.shift_hi   = 1.;
2054   }
2055 
2056   PetscCall(ISGetIndices(ip,&rip));
2057   PetscCall(ISGetIndices(iip,&riip));
2058 
2059   /* allocate working arrays
2060      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2061      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2062   */
2063   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r));
2064 
2065   do {
2066     sctx.newshift = PETSC_FALSE;
2067 
2068     for (i=0; i<mbs; i++) c2r[i] = mbs;
2069     if (mbs) il[0] = 0;
2070 
2071     for (k = 0; k<mbs; k++) {
2072       /* zero rtmp */
2073       nz    = bi[k+1] - bi[k];
2074       bjtmp = bj + bi[k];
2075       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2076 
2077       /* load in initial unfactored row */
2078       bval = ba + bi[k];
2079       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2080       for (j = jmin; j < jmax; j++) {
2081         col = riip[aj[j]];
2082         if (col >= k) { /* only take upper triangular entry */
2083           rtmp[col] = aa[j];
2084           *bval++   = 0.0; /* for in-place factorization */
2085         }
2086       }
2087       /* shift the diagonal of the matrix: ZeropivotApply() */
2088       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2089 
2090       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2091       dk = rtmp[k];
2092       i  = c2r[k]; /* first row to be added to k_th row  */
2093 
2094       while (i < k) {
2095         nexti = c2r[i]; /* next row to be added to k_th row */
2096 
2097         /* compute multiplier, update diag(k) and U(i,k) */
2098         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2099         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2100         dk     += uikdi*ba[ili]; /* update diag[k] */
2101         ba[ili] = uikdi; /* -U(i,k) */
2102 
2103         /* add multiple of row i to k-th row */
2104         jmin = ili + 1; jmax = bi[i+1];
2105         if (jmin < jmax) {
2106           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2107           /* update il and c2r for row i */
2108           il[i] = jmin;
2109           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2110         }
2111         i = nexti;
2112       }
2113 
2114       /* copy data into U(k,:) */
2115       rs   = 0.0;
2116       jmin = bi[k]; jmax = bi[k+1]-1;
2117       if (jmin < jmax) {
2118         for (j=jmin; j<jmax; j++) {
2119           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2120         }
2121         /* add the k-th row into il and c2r */
2122         il[k] = jmin;
2123         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2124       }
2125 
2126       /* MatPivotCheck() */
2127       sctx.rs = rs;
2128       sctx.pv = dk;
2129       PetscCall(MatPivotCheck(B,A,info,&sctx,i));
2130       if (sctx.newshift) break;
2131       dk = sctx.pv;
2132 
2133       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2134     }
2135   } while (sctx.newshift);
2136 
2137   PetscCall(PetscFree3(rtmp,il,c2r));
2138   PetscCall(ISRestoreIndices(ip,&rip));
2139   PetscCall(ISRestoreIndices(iip,&riip));
2140 
2141   PetscCall(ISIdentity(ip,&perm_identity));
2142   if (perm_identity) {
2143     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2144     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2145     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2146     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2147   } else {
2148     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2149     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2150     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2151     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2152   }
2153 
2154   C->assembled    = PETSC_TRUE;
2155   C->preallocated = PETSC_TRUE;
2156 
2157   PetscCall(PetscLogFlops(C->rmap->n));
2158 
2159   /* MatPivotView() */
2160   if (sctx.nshift) {
2161     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2162       PetscCall(PetscInfo(A,"number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top));
2163     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2164       PetscCall(PetscInfo(A,"number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2165     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2166       PetscCall(PetscInfo(A,"number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n",sctx.nshift,(double)info->shiftamount));
2167     }
2168   }
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2173 {
2174   Mat            C = B;
2175   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2176   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2177   IS             ip=b->row,iip = b->icol;
2178   const PetscInt *rip,*riip;
2179   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2180   PetscInt       *ai=a->i,*aj=a->j;
2181   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2182   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2183   PetscBool      perm_identity;
2184   FactorShiftCtx sctx;
2185   PetscReal      rs;
2186   MatScalar      d,*v;
2187 
2188   PetscFunctionBegin;
2189   /* MatPivotSetUp(): initialize shift context sctx */
2190   PetscCall(PetscMemzero(&sctx,sizeof(FactorShiftCtx)));
2191 
2192   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2193     sctx.shift_top = info->zeropivot;
2194     for (i=0; i<mbs; i++) {
2195       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2196       d  = (aa)[a->diag[i]];
2197       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2198       v  = aa+ai[i];
2199       nz = ai[i+1] - ai[i];
2200       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2201       if (rs>sctx.shift_top) sctx.shift_top = rs;
2202     }
2203     sctx.shift_top *= 1.1;
2204     sctx.nshift_max = 5;
2205     sctx.shift_lo   = 0.;
2206     sctx.shift_hi   = 1.;
2207   }
2208 
2209   PetscCall(ISGetIndices(ip,&rip));
2210   PetscCall(ISGetIndices(iip,&riip));
2211 
2212   /* initialization */
2213   PetscCall(PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl));
2214 
2215   do {
2216     sctx.newshift = PETSC_FALSE;
2217 
2218     for (i=0; i<mbs; i++) jl[i] = mbs;
2219     il[0] = 0;
2220 
2221     for (k = 0; k<mbs; k++) {
2222       /* zero rtmp */
2223       nz    = bi[k+1] - bi[k];
2224       bjtmp = bj + bi[k];
2225       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2226 
2227       bval = ba + bi[k];
2228       /* initialize k-th row by the perm[k]-th row of A */
2229       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2230       for (j = jmin; j < jmax; j++) {
2231         col = riip[aj[j]];
2232         if (col >= k) { /* only take upper triangular entry */
2233           rtmp[col] = aa[j];
2234           *bval++   = 0.0; /* for in-place factorization */
2235         }
2236       }
2237       /* shift the diagonal of the matrix */
2238       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2239 
2240       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2241       dk = rtmp[k];
2242       i  = jl[k]; /* first row to be added to k_th row  */
2243 
2244       while (i < k) {
2245         nexti = jl[i]; /* next row to be added to k_th row */
2246 
2247         /* compute multiplier, update diag(k) and U(i,k) */
2248         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2249         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2250         dk     += uikdi*ba[ili];
2251         ba[ili] = uikdi; /* -U(i,k) */
2252 
2253         /* add multiple of row i to k-th row */
2254         jmin = ili + 1; jmax = bi[i+1];
2255         if (jmin < jmax) {
2256           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2257           /* update il and jl for row i */
2258           il[i] = jmin;
2259           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2260         }
2261         i = nexti;
2262       }
2263 
2264       /* shift the diagonals when zero pivot is detected */
2265       /* compute rs=sum of abs(off-diagonal) */
2266       rs   = 0.0;
2267       jmin = bi[k]+1;
2268       nz   = bi[k+1] - jmin;
2269       bcol = bj + jmin;
2270       for (j=0; j<nz; j++) {
2271         rs += PetscAbsScalar(rtmp[bcol[j]]);
2272       }
2273 
2274       sctx.rs = rs;
2275       sctx.pv = dk;
2276       PetscCall(MatPivotCheck(B,A,info,&sctx,k));
2277       if (sctx.newshift) break;
2278       dk = sctx.pv;
2279 
2280       /* copy data into U(k,:) */
2281       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2282       jmin      = bi[k]+1; jmax = bi[k+1];
2283       if (jmin < jmax) {
2284         for (j=jmin; j<jmax; j++) {
2285           col = bj[j]; ba[j] = rtmp[col];
2286         }
2287         /* add the k-th row into il and jl */
2288         il[k] = jmin;
2289         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2290       }
2291     }
2292   } while (sctx.newshift);
2293 
2294   PetscCall(PetscFree3(rtmp,il,jl));
2295   PetscCall(ISRestoreIndices(ip,&rip));
2296   PetscCall(ISRestoreIndices(iip,&riip));
2297 
2298   PetscCall(ISIdentity(ip,&perm_identity));
2299   if (perm_identity) {
2300     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2301     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2302     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2303     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2304   } else {
2305     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2306     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2307     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2308     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2309   }
2310 
2311   C->assembled    = PETSC_TRUE;
2312   C->preallocated = PETSC_TRUE;
2313 
2314   PetscCall(PetscLogFlops(C->rmap->n));
2315   if (sctx.nshift) {
2316     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2317       PetscCall(PetscInfo(A,"number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2318     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2319       PetscCall(PetscInfo(A,"number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount));
2320     }
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 /*
2326    icc() under revised new data structure.
2327    Factored arrays bj and ba are stored as
2328      U(0,:),...,U(i,:),U(n-1,:)
2329 
2330    ui=fact->i is an array of size n+1, in which
2331    ui+
2332      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2333      ui[n]:  points to U(n-1,n-1)+1
2334 
2335   udiag=fact->diag is an array of size n,in which
2336      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2337 
2338    U(i,:) contains udiag[i] as its last entry, i.e.,
2339     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2340 */
2341 
2342 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2343 {
2344   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2345   Mat_SeqSBAIJ       *b;
2346   PetscBool          perm_identity,missing;
2347   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2348   const PetscInt     *rip,*riip;
2349   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2350   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2351   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2352   PetscReal          fill          =info->fill,levels=info->levels;
2353   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2354   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2355   PetscBT            lnkbt;
2356   IS                 iperm;
2357 
2358   PetscFunctionBegin;
2359   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2360   PetscCall(MatMissingDiagonal(A,&missing,&d));
2361   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
2362   PetscCall(ISIdentity(perm,&perm_identity));
2363   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2364 
2365   PetscCall(PetscMalloc1(am+1,&ui));
2366   PetscCall(PetscMalloc1(am+1,&udiag));
2367   ui[0] = 0;
2368 
2369   /* ICC(0) without matrix ordering: simply rearrange column indices */
2370   if (!levels && perm_identity) {
2371     for (i=0; i<am; i++) {
2372       ncols    = ai[i+1] - a->diag[i];
2373       ui[i+1]  = ui[i] + ncols;
2374       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2375     }
2376     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2377     cols = uj;
2378     for (i=0; i<am; i++) {
2379       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2380       ncols = ai[i+1] - a->diag[i] -1;
2381       for (j=0; j<ncols; j++) *cols++ = aj[j];
2382       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2383     }
2384   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2385     PetscCall(ISGetIndices(iperm,&riip));
2386     PetscCall(ISGetIndices(perm,&rip));
2387 
2388     /* initialization */
2389     PetscCall(PetscMalloc1(am+1,&ajtmp));
2390 
2391     /* jl: linked list for storing indices of the pivot rows
2392        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2393     PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il));
2394     for (i=0; i<am; i++) {
2395       jl[i] = am; il[i] = 0;
2396     }
2397 
2398     /* create and initialize a linked list for storing column indices of the active row k */
2399     nlnk = am + 1;
2400     PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt));
2401 
2402     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2403     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space));
2404     current_space     = free_space;
2405     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl));
2406     current_space_lvl = free_space_lvl;
2407 
2408     for (k=0; k<am; k++) {  /* for each active row k */
2409       /* initialize lnk by the column indices of row rip[k] of A */
2410       nzk   = 0;
2411       ncols = ai[rip[k]+1] - ai[rip[k]];
2412       PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2413       ncols_upper = 0;
2414       for (j=0; j<ncols; j++) {
2415         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2416         if (riip[i] >= k) { /* only take upper triangular entry */
2417           ajtmp[ncols_upper] = i;
2418           ncols_upper++;
2419         }
2420       }
2421       PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt));
2422       nzk += nlnk;
2423 
2424       /* update lnk by computing fill-in for each pivot row to be merged in */
2425       prow = jl[k]; /* 1st pivot row */
2426 
2427       while (prow < k) {
2428         nextprow = jl[prow];
2429 
2430         /* merge prow into k-th row */
2431         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2432         jmax  = ui[prow+1];
2433         ncols = jmax-jmin;
2434         i     = jmin - ui[prow];
2435         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2436         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2437         j     = *(uj - 1);
2438         PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j));
2439         nzk  += nlnk;
2440 
2441         /* update il and jl for prow */
2442         if (jmin < jmax) {
2443           il[prow] = jmin;
2444           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2445         }
2446         prow = nextprow;
2447       }
2448 
2449       /* if free space is not available, make more free space */
2450       if (current_space->local_remaining<nzk) {
2451         i    = am - k + 1; /* num of unfactored rows */
2452         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2453         PetscCall(PetscFreeSpaceGet(i,&current_space));
2454         PetscCall(PetscFreeSpaceGet(i,&current_space_lvl));
2455         reallocs++;
2456       }
2457 
2458       /* copy data into free_space and free_space_lvl, then initialize lnk */
2459       PetscCheck(nzk != 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k);
2460       PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
2461 
2462       /* add the k-th row into il and jl */
2463       if (nzk > 1) {
2464         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2465         jl[k] = jl[i]; jl[i] = k;
2466         il[k] = ui[k] + 1;
2467       }
2468       uj_ptr[k]     = current_space->array;
2469       uj_lvl_ptr[k] = current_space_lvl->array;
2470 
2471       current_space->array           += nzk;
2472       current_space->local_used      += nzk;
2473       current_space->local_remaining -= nzk;
2474 
2475       current_space_lvl->array           += nzk;
2476       current_space_lvl->local_used      += nzk;
2477       current_space_lvl->local_remaining -= nzk;
2478 
2479       ui[k+1] = ui[k] + nzk;
2480     }
2481 
2482     PetscCall(ISRestoreIndices(perm,&rip));
2483     PetscCall(ISRestoreIndices(iperm,&riip));
2484     PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il));
2485     PetscCall(PetscFree(ajtmp));
2486 
2487     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2488     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2489     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor  */
2490     PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
2491     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2492 
2493   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2494 
2495   /* put together the new matrix in MATSEQSBAIJ format */
2496   b               = (Mat_SeqSBAIJ*)(fact)->data;
2497   b->singlemalloc = PETSC_FALSE;
2498 
2499   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2500 
2501   b->j             = uj;
2502   b->i             = ui;
2503   b->diag          = udiag;
2504   b->free_diag     = PETSC_TRUE;
2505   b->ilen          = NULL;
2506   b->imax          = NULL;
2507   b->row           = perm;
2508   b->col           = perm;
2509   PetscCall(PetscObjectReference((PetscObject)perm));
2510   PetscCall(PetscObjectReference((PetscObject)perm));
2511   b->icol          = iperm;
2512   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2513 
2514   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2515   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar))));
2516 
2517   b->maxnz   = b->nz = ui[am];
2518   b->free_a  = PETSC_TRUE;
2519   b->free_ij = PETSC_TRUE;
2520 
2521   fact->info.factor_mallocs   = reallocs;
2522   fact->info.fill_ratio_given = fill;
2523   if (ai[am] != 0) {
2524     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2525     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2526   } else {
2527     fact->info.fill_ratio_needed = 0.0;
2528   }
2529 #if defined(PETSC_USE_INFO)
2530   if (ai[am] != 0) {
2531     PetscReal af = fact->info.fill_ratio_needed;
2532     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2533     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2534     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2535   } else {
2536     PetscCall(PetscInfo(A,"Empty matrix\n"));
2537   }
2538 #endif
2539   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2544 {
2545   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2546   Mat_SeqSBAIJ       *b;
2547   PetscBool          perm_identity,missing;
2548   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2549   const PetscInt     *rip,*riip;
2550   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2551   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2552   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2553   PetscReal          fill          =info->fill,levels=info->levels;
2554   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2555   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2556   PetscBT            lnkbt;
2557   IS                 iperm;
2558 
2559   PetscFunctionBegin;
2560   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2561   PetscCall(MatMissingDiagonal(A,&missing,&d));
2562   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,d);
2563   PetscCall(ISIdentity(perm,&perm_identity));
2564   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2565 
2566   PetscCall(PetscMalloc1(am+1,&ui));
2567   PetscCall(PetscMalloc1(am+1,&udiag));
2568   ui[0] = 0;
2569 
2570   /* ICC(0) without matrix ordering: simply copies fill pattern */
2571   if (!levels && perm_identity) {
2572 
2573     for (i=0; i<am; i++) {
2574       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2575       udiag[i] = ui[i];
2576     }
2577     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2578     cols = uj;
2579     for (i=0; i<am; i++) {
2580       aj    = a->j + a->diag[i];
2581       ncols = ui[i+1] - ui[i];
2582       for (j=0; j<ncols; j++) *cols++ = *aj++;
2583     }
2584   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2585     PetscCall(ISGetIndices(iperm,&riip));
2586     PetscCall(ISGetIndices(perm,&rip));
2587 
2588     /* initialization */
2589     PetscCall(PetscMalloc1(am+1,&ajtmp));
2590 
2591     /* jl: linked list for storing indices of the pivot rows
2592        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2593     PetscCall(PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il));
2594     for (i=0; i<am; i++) {
2595       jl[i] = am; il[i] = 0;
2596     }
2597 
2598     /* create and initialize a linked list for storing column indices of the active row k */
2599     nlnk = am + 1;
2600     PetscCall(PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt));
2601 
2602     /* initial FreeSpace size is fill*(ai[am]+1) */
2603     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space));
2604     current_space     = free_space;
2605     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl));
2606     current_space_lvl = free_space_lvl;
2607 
2608     for (k=0; k<am; k++) {  /* for each active row k */
2609       /* initialize lnk by the column indices of row rip[k] of A */
2610       nzk   = 0;
2611       ncols = ai[rip[k]+1] - ai[rip[k]];
2612       PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2613       ncols_upper = 0;
2614       for (j=0; j<ncols; j++) {
2615         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2616         if (riip[i] >= k) { /* only take upper triangular entry */
2617           ajtmp[ncols_upper] = i;
2618           ncols_upper++;
2619         }
2620       }
2621       PetscCall(PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,&nlnk,lnk,lnk_lvl,lnkbt));
2622       nzk += nlnk;
2623 
2624       /* update lnk by computing fill-in for each pivot row to be merged in */
2625       prow = jl[k]; /* 1st pivot row */
2626 
2627       while (prow < k) {
2628         nextprow = jl[prow];
2629 
2630         /* merge prow into k-th row */
2631         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2632         jmax  = ui[prow+1];
2633         ncols = jmax-jmin;
2634         i     = jmin - ui[prow];
2635         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2636         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2637         j     = *(uj - 1);
2638         PetscCall(PetscICCLLAddSorted(ncols,cols,levels,uj,am,&nlnk,lnk,lnk_lvl,lnkbt,j));
2639         nzk  += nlnk;
2640 
2641         /* update il and jl for prow */
2642         if (jmin < jmax) {
2643           il[prow] = jmin;
2644           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2645         }
2646         prow = nextprow;
2647       }
2648 
2649       /* if free space is not available, make more free space */
2650       if (current_space->local_remaining<nzk) {
2651         i    = am - k + 1; /* num of unfactored rows */
2652         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2653         PetscCall(PetscFreeSpaceGet(i,&current_space));
2654         PetscCall(PetscFreeSpaceGet(i,&current_space_lvl));
2655         reallocs++;
2656       }
2657 
2658       /* copy data into free_space and free_space_lvl, then initialize lnk */
2659       PetscCheck(nzk,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %" PetscInt_FMT " in ICC matrix factor",k);
2660       PetscCall(PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt));
2661 
2662       /* add the k-th row into il and jl */
2663       if (nzk > 1) {
2664         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2665         jl[k] = jl[i]; jl[i] = k;
2666         il[k] = ui[k] + 1;
2667       }
2668       uj_ptr[k]     = current_space->array;
2669       uj_lvl_ptr[k] = current_space_lvl->array;
2670 
2671       current_space->array           += nzk;
2672       current_space->local_used      += nzk;
2673       current_space->local_remaining -= nzk;
2674 
2675       current_space_lvl->array           += nzk;
2676       current_space_lvl->local_used      += nzk;
2677       current_space_lvl->local_remaining -= nzk;
2678 
2679       ui[k+1] = ui[k] + nzk;
2680     }
2681 
2682 #if defined(PETSC_USE_INFO)
2683     if (ai[am] != 0) {
2684       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2685       PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2686       PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2687       PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2688     } else {
2689       PetscCall(PetscInfo(A,"Empty matrix\n"));
2690     }
2691 #endif
2692 
2693     PetscCall(ISRestoreIndices(perm,&rip));
2694     PetscCall(ISRestoreIndices(iperm,&riip));
2695     PetscCall(PetscFree4(uj_ptr,uj_lvl_ptr,jl,il));
2696     PetscCall(PetscFree(ajtmp));
2697 
2698     /* destroy list of free space and other temporary array(s) */
2699     PetscCall(PetscMalloc1(ui[am]+1,&uj));
2700     PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
2701     PetscCall(PetscIncompleteLLDestroy(lnk,lnkbt));
2702     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2703 
2704   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2705 
2706   /* put together the new matrix in MATSEQSBAIJ format */
2707 
2708   b               = (Mat_SeqSBAIJ*)fact->data;
2709   b->singlemalloc = PETSC_FALSE;
2710 
2711   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2712 
2713   b->j         = uj;
2714   b->i         = ui;
2715   b->diag      = udiag;
2716   b->free_diag = PETSC_TRUE;
2717   b->ilen      = NULL;
2718   b->imax      = NULL;
2719   b->row       = perm;
2720   b->col       = perm;
2721 
2722   PetscCall(PetscObjectReference((PetscObject)perm));
2723   PetscCall(PetscObjectReference((PetscObject)perm));
2724 
2725   b->icol          = iperm;
2726   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2727   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2728   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))));
2729   b->maxnz         = b->nz = ui[am];
2730   b->free_a        = PETSC_TRUE;
2731   b->free_ij       = PETSC_TRUE;
2732 
2733   fact->info.factor_mallocs   = reallocs;
2734   fact->info.fill_ratio_given = fill;
2735   if (ai[am] != 0) {
2736     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2737   } else {
2738     fact->info.fill_ratio_needed = 0.0;
2739   }
2740   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2745 {
2746   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2747   Mat_SeqSBAIJ       *b;
2748   PetscBool          perm_identity,missing;
2749   PetscReal          fill = info->fill;
2750   const PetscInt     *rip,*riip;
2751   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2752   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2753   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2754   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2755   PetscBT            lnkbt;
2756   IS                 iperm;
2757 
2758   PetscFunctionBegin;
2759   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2760   PetscCall(MatMissingDiagonal(A,&missing,&i));
2761   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
2762 
2763   /* check whether perm is the identity mapping */
2764   PetscCall(ISIdentity(perm,&perm_identity));
2765   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2766   PetscCall(ISGetIndices(iperm,&riip));
2767   PetscCall(ISGetIndices(perm,&rip));
2768 
2769   /* initialization */
2770   PetscCall(PetscMalloc1(am+1,&ui));
2771   PetscCall(PetscMalloc1(am+1,&udiag));
2772   ui[0] = 0;
2773 
2774   /* jl: linked list for storing indices of the pivot rows
2775      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2776   PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols));
2777   for (i=0; i<am; i++) {
2778     jl[i] = am; il[i] = 0;
2779   }
2780 
2781   /* create and initialize a linked list for storing column indices of the active row k */
2782   nlnk = am + 1;
2783   PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt));
2784 
2785   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2786   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space));
2787   current_space = free_space;
2788 
2789   for (k=0; k<am; k++) {  /* for each active row k */
2790     /* initialize lnk by the column indices of row rip[k] of A */
2791     nzk   = 0;
2792     ncols = ai[rip[k]+1] - ai[rip[k]];
2793     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2794     ncols_upper = 0;
2795     for (j=0; j<ncols; j++) {
2796       i = riip[*(aj + ai[rip[k]] + j)];
2797       if (i >= k) { /* only take upper triangular entry */
2798         cols[ncols_upper] = i;
2799         ncols_upper++;
2800       }
2801     }
2802     PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt));
2803     nzk += nlnk;
2804 
2805     /* update lnk by computing fill-in for each pivot row to be merged in */
2806     prow = jl[k]; /* 1st pivot row */
2807 
2808     while (prow < k) {
2809       nextprow = jl[prow];
2810       /* merge prow into k-th row */
2811       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2812       jmax   = ui[prow+1];
2813       ncols  = jmax-jmin;
2814       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2815       PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt));
2816       nzk   += nlnk;
2817 
2818       /* update il and jl for prow */
2819       if (jmin < jmax) {
2820         il[prow] = jmin;
2821         j        = *uj_ptr;
2822         jl[prow] = jl[j];
2823         jl[j]    = prow;
2824       }
2825       prow = nextprow;
2826     }
2827 
2828     /* if free space is not available, make more free space */
2829     if (current_space->local_remaining<nzk) {
2830       i    = am - k + 1; /* num of unfactored rows */
2831       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2832       PetscCall(PetscFreeSpaceGet(i,&current_space));
2833       reallocs++;
2834     }
2835 
2836     /* copy data into free space, then initialize lnk */
2837     PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt));
2838 
2839     /* add the k-th row into il and jl */
2840     if (nzk > 1) {
2841       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2842       jl[k] = jl[i]; jl[i] = k;
2843       il[k] = ui[k] + 1;
2844     }
2845     ui_ptr[k] = current_space->array;
2846 
2847     current_space->array           += nzk;
2848     current_space->local_used      += nzk;
2849     current_space->local_remaining -= nzk;
2850 
2851     ui[k+1] = ui[k] + nzk;
2852   }
2853 
2854   PetscCall(ISRestoreIndices(perm,&rip));
2855   PetscCall(ISRestoreIndices(iperm,&riip));
2856   PetscCall(PetscFree4(ui_ptr,jl,il,cols));
2857 
2858   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2859   PetscCall(PetscMalloc1(ui[am]+1,&uj));
2860   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag)); /* store matrix factor */
2861   PetscCall(PetscLLDestroy(lnk,lnkbt));
2862 
2863   /* put together the new matrix in MATSEQSBAIJ format */
2864 
2865   b               = (Mat_SeqSBAIJ*)fact->data;
2866   b->singlemalloc = PETSC_FALSE;
2867   b->free_a       = PETSC_TRUE;
2868   b->free_ij      = PETSC_TRUE;
2869 
2870   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
2871 
2872   b->j         = uj;
2873   b->i         = ui;
2874   b->diag      = udiag;
2875   b->free_diag = PETSC_TRUE;
2876   b->ilen      = NULL;
2877   b->imax      = NULL;
2878   b->row       = perm;
2879   b->col       = perm;
2880 
2881   PetscCall(PetscObjectReference((PetscObject)perm));
2882   PetscCall(PetscObjectReference((PetscObject)perm));
2883 
2884   b->icol          = iperm;
2885   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2886 
2887   PetscCall(PetscMalloc1(am+1,&b->solve_work));
2888   PetscCall(PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar))));
2889 
2890   b->maxnz = b->nz = ui[am];
2891 
2892   fact->info.factor_mallocs   = reallocs;
2893   fact->info.fill_ratio_given = fill;
2894   if (ai[am] != 0) {
2895     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2896     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2897   } else {
2898     fact->info.fill_ratio_needed = 0.0;
2899   }
2900 #if defined(PETSC_USE_INFO)
2901   if (ai[am] != 0) {
2902     PetscReal af = fact->info.fill_ratio_needed;
2903     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
2904     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
2905     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
2906   } else {
2907     PetscCall(PetscInfo(A,"Empty matrix\n"));
2908   }
2909 #endif
2910   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2915 {
2916   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2917   Mat_SeqSBAIJ       *b;
2918   PetscBool          perm_identity,missing;
2919   PetscReal          fill = info->fill;
2920   const PetscInt     *rip,*riip;
2921   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2922   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2923   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2924   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2925   PetscBT            lnkbt;
2926   IS                 iperm;
2927 
2928   PetscFunctionBegin;
2929   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT,A->rmap->n,A->cmap->n);
2930   PetscCall(MatMissingDiagonal(A,&missing,&i));
2931   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
2932 
2933   /* check whether perm is the identity mapping */
2934   PetscCall(ISIdentity(perm,&perm_identity));
2935   PetscCall(ISInvertPermutation(perm,PETSC_DECIDE,&iperm));
2936   PetscCall(ISGetIndices(iperm,&riip));
2937   PetscCall(ISGetIndices(perm,&rip));
2938 
2939   /* initialization */
2940   PetscCall(PetscMalloc1(am+1,&ui));
2941   ui[0] = 0;
2942 
2943   /* jl: linked list for storing indices of the pivot rows
2944      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2945   PetscCall(PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols));
2946   for (i=0; i<am; i++) {
2947     jl[i] = am; il[i] = 0;
2948   }
2949 
2950   /* create and initialize a linked list for storing column indices of the active row k */
2951   nlnk = am + 1;
2952   PetscCall(PetscLLCreate(am,am,nlnk,lnk,lnkbt));
2953 
2954   /* initial FreeSpace size is fill*(ai[am]+1) */
2955   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space));
2956   current_space = free_space;
2957 
2958   for (k=0; k<am; k++) {  /* for each active row k */
2959     /* initialize lnk by the column indices of row rip[k] of A */
2960     nzk   = 0;
2961     ncols = ai[rip[k]+1] - ai[rip[k]];
2962     PetscCheck(ncols,PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,rip[k],k);
2963     ncols_upper = 0;
2964     for (j=0; j<ncols; j++) {
2965       i = riip[*(aj + ai[rip[k]] + j)];
2966       if (i >= k) { /* only take upper triangular entry */
2967         cols[ncols_upper] = i;
2968         ncols_upper++;
2969       }
2970     }
2971     PetscCall(PetscLLAdd(ncols_upper,cols,am,&nlnk,lnk,lnkbt));
2972     nzk += nlnk;
2973 
2974     /* update lnk by computing fill-in for each pivot row to be merged in */
2975     prow = jl[k]; /* 1st pivot row */
2976 
2977     while (prow < k) {
2978       nextprow = jl[prow];
2979       /* merge prow into k-th row */
2980       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2981       jmax   = ui[prow+1];
2982       ncols  = jmax-jmin;
2983       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2984       PetscCall(PetscLLAddSorted(ncols,uj_ptr,am,&nlnk,lnk,lnkbt));
2985       nzk   += nlnk;
2986 
2987       /* update il and jl for prow */
2988       if (jmin < jmax) {
2989         il[prow] = jmin;
2990         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2991       }
2992       prow = nextprow;
2993     }
2994 
2995     /* if free space is not available, make more free space */
2996     if (current_space->local_remaining<nzk) {
2997       i    = am - k + 1; /* num of unfactored rows */
2998       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2999       PetscCall(PetscFreeSpaceGet(i,&current_space));
3000       reallocs++;
3001     }
3002 
3003     /* copy data into free space, then initialize lnk */
3004     PetscCall(PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt));
3005 
3006     /* add the k-th row into il and jl */
3007     if (nzk-1 > 0) {
3008       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3009       jl[k] = jl[i]; jl[i] = k;
3010       il[k] = ui[k] + 1;
3011     }
3012     ui_ptr[k] = current_space->array;
3013 
3014     current_space->array           += nzk;
3015     current_space->local_used      += nzk;
3016     current_space->local_remaining -= nzk;
3017 
3018     ui[k+1] = ui[k] + nzk;
3019   }
3020 
3021 #if defined(PETSC_USE_INFO)
3022   if (ai[am] != 0) {
3023     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3024     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af));
3025     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
3026     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af));
3027   } else {
3028     PetscCall(PetscInfo(A,"Empty matrix\n"));
3029   }
3030 #endif
3031 
3032   PetscCall(ISRestoreIndices(perm,&rip));
3033   PetscCall(ISRestoreIndices(iperm,&riip));
3034   PetscCall(PetscFree4(ui_ptr,jl,il,cols));
3035 
3036   /* destroy list of free space and other temporary array(s) */
3037   PetscCall(PetscMalloc1(ui[am]+1,&uj));
3038   PetscCall(PetscFreeSpaceContiguous(&free_space,uj));
3039   PetscCall(PetscLLDestroy(lnk,lnkbt));
3040 
3041   /* put together the new matrix in MATSEQSBAIJ format */
3042 
3043   b               = (Mat_SeqSBAIJ*)fact->data;
3044   b->singlemalloc = PETSC_FALSE;
3045   b->free_a       = PETSC_TRUE;
3046   b->free_ij      = PETSC_TRUE;
3047 
3048   PetscCall(PetscMalloc1(ui[am]+1,&b->a));
3049 
3050   b->j    = uj;
3051   b->i    = ui;
3052   b->diag = NULL;
3053   b->ilen = NULL;
3054   b->imax = NULL;
3055   b->row  = perm;
3056   b->col  = perm;
3057 
3058   PetscCall(PetscObjectReference((PetscObject)perm));
3059   PetscCall(PetscObjectReference((PetscObject)perm));
3060 
3061   b->icol          = iperm;
3062   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3063 
3064   PetscCall(PetscMalloc1(am+1,&b->solve_work));
3065   PetscCall(PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar))));
3066   b->maxnz = b->nz = ui[am];
3067 
3068   fact->info.factor_mallocs   = reallocs;
3069   fact->info.fill_ratio_given = fill;
3070   if (ai[am] != 0) {
3071     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3072   } else {
3073     fact->info.fill_ratio_needed = 0.0;
3074   }
3075   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3080 {
3081   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3082   PetscInt          n   = A->rmap->n;
3083   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3084   PetscScalar       *x,sum;
3085   const PetscScalar *b;
3086   const MatScalar   *aa = a->a,*v;
3087   PetscInt          i,nz;
3088 
3089   PetscFunctionBegin;
3090   if (!n) PetscFunctionReturn(0);
3091 
3092   PetscCall(VecGetArrayRead(bb,&b));
3093   PetscCall(VecGetArrayWrite(xx,&x));
3094 
3095   /* forward solve the lower triangular */
3096   x[0] = b[0];
3097   v    = aa;
3098   vi   = aj;
3099   for (i=1; i<n; i++) {
3100     nz  = ai[i+1] - ai[i];
3101     sum = b[i];
3102     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3103     v   += nz;
3104     vi  += nz;
3105     x[i] = sum;
3106   }
3107 
3108   /* backward solve the upper triangular */
3109   for (i=n-1; i>=0; i--) {
3110     v   = aa + adiag[i+1] + 1;
3111     vi  = aj + adiag[i+1] + 1;
3112     nz  = adiag[i] - adiag[i+1]-1;
3113     sum = x[i];
3114     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3115     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3116   }
3117 
3118   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
3119   PetscCall(VecRestoreArrayRead(bb,&b));
3120   PetscCall(VecRestoreArrayWrite(xx,&x));
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3125 {
3126   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3127   IS                iscol = a->col,isrow = a->row;
3128   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3129   const PetscInt    *rout,*cout,*r,*c;
3130   PetscScalar       *x,*tmp,sum;
3131   const PetscScalar *b;
3132   const MatScalar   *aa = a->a,*v;
3133 
3134   PetscFunctionBegin;
3135   if (!n) PetscFunctionReturn(0);
3136 
3137   PetscCall(VecGetArrayRead(bb,&b));
3138   PetscCall(VecGetArrayWrite(xx,&x));
3139   tmp  = a->solve_work;
3140 
3141   PetscCall(ISGetIndices(isrow,&rout)); r = rout;
3142   PetscCall(ISGetIndices(iscol,&cout)); c = cout;
3143 
3144   /* forward solve the lower triangular */
3145   tmp[0] = b[r[0]];
3146   v      = aa;
3147   vi     = aj;
3148   for (i=1; i<n; i++) {
3149     nz  = ai[i+1] - ai[i];
3150     sum = b[r[i]];
3151     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3152     tmp[i] = sum;
3153     v     += nz; vi += nz;
3154   }
3155 
3156   /* backward solve the upper triangular */
3157   for (i=n-1; i>=0; i--) {
3158     v   = aa + adiag[i+1]+1;
3159     vi  = aj + adiag[i+1]+1;
3160     nz  = adiag[i]-adiag[i+1]-1;
3161     sum = tmp[i];
3162     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3163     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3164   }
3165 
3166   PetscCall(ISRestoreIndices(isrow,&rout));
3167   PetscCall(ISRestoreIndices(iscol,&cout));
3168   PetscCall(VecRestoreArrayRead(bb,&b));
3169   PetscCall(VecRestoreArrayWrite(xx,&x));
3170   PetscCall(PetscLogFlops(2.0*a->nz - A->cmap->n));
3171   PetscFunctionReturn(0);
3172 }
3173 
3174 /*
3175     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3176 */
3177 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3178 {
3179   Mat            B = *fact;
3180   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3181   IS             isicol;
3182   const PetscInt *r,*ic;
3183   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3184   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3185   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3186   PetscInt       nlnk,*lnk;
3187   PetscBT        lnkbt;
3188   PetscBool      row_identity,icol_identity;
3189   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3190   const PetscInt *ics;
3191   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3192   PetscReal      dt     =info->dt,shift=info->shiftamount;
3193   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3194   PetscBool      missing;
3195 
3196   PetscFunctionBegin;
3197   if (dt      == PETSC_DEFAULT) dt = 0.005;
3198   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3199 
3200   /* ------- symbolic factorization, can be reused ---------*/
3201   PetscCall(MatMissingDiagonal(A,&missing,&i));
3202   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
3203   adiag=a->diag;
3204 
3205   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
3206 
3207   /* bdiag is location of diagonal in factor */
3208   PetscCall(PetscMalloc1(n+1,&bdiag));     /* becomes b->diag */
3209   PetscCall(PetscMalloc1(n+1,&bdiag_rev)); /* temporary */
3210 
3211   /* allocate row pointers bi */
3212   PetscCall(PetscMalloc1(2*n+2,&bi));
3213 
3214   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3215   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3216   nnz_max = ai[n]+2*n*dtcount+2;
3217 
3218   PetscCall(PetscMalloc1(nnz_max+1,&bj));
3219   PetscCall(PetscMalloc1(nnz_max+1,&ba));
3220 
3221   /* put together the new matrix */
3222   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL));
3223   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
3224   b    = (Mat_SeqAIJ*)B->data;
3225 
3226   b->free_a       = PETSC_TRUE;
3227   b->free_ij      = PETSC_TRUE;
3228   b->singlemalloc = PETSC_FALSE;
3229 
3230   b->a    = ba;
3231   b->j    = bj;
3232   b->i    = bi;
3233   b->diag = bdiag;
3234   b->ilen = NULL;
3235   b->imax = NULL;
3236   b->row  = isrow;
3237   b->col  = iscol;
3238   PetscCall(PetscObjectReference((PetscObject)isrow));
3239   PetscCall(PetscObjectReference((PetscObject)iscol));
3240   b->icol = isicol;
3241 
3242   PetscCall(PetscMalloc1(n+1,&b->solve_work));
3243   PetscCall(PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar))));
3244   b->maxnz = nnz_max;
3245 
3246   B->factortype            = MAT_FACTOR_ILUDT;
3247   B->info.factor_mallocs   = 0;
3248   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3249   /* ------- end of symbolic factorization ---------*/
3250 
3251   PetscCall(ISGetIndices(isrow,&r));
3252   PetscCall(ISGetIndices(isicol,&ic));
3253   ics  = ic;
3254 
3255   /* linked list for storing column indices of the active row */
3256   nlnk = n + 1;
3257   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
3258 
3259   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3260   PetscCall(PetscMalloc2(n,&im,n,&jtmp));
3261   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3262   PetscCall(PetscMalloc2(n,&rtmp,n,&vtmp));
3263   PetscCall(PetscArrayzero(rtmp,n));
3264 
3265   bi[0]        = 0;
3266   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3267   bdiag_rev[n] = bdiag[0];
3268   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3269   for (i=0; i<n; i++) {
3270     /* copy initial fill into linked list */
3271     nzi = ai[r[i]+1] - ai[r[i]];
3272     PetscCheck(nzi,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT,r[i],i);
3273     nzi_al = adiag[r[i]] - ai[r[i]];
3274     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3275     ajtmp  = aj + ai[r[i]];
3276     PetscCall(PetscLLAddPerm(nzi,ajtmp,ic,n,&nlnk,lnk,lnkbt));
3277 
3278     /* load in initial (unfactored row) */
3279     aatmp = a->a + ai[r[i]];
3280     for (j=0; j<nzi; j++) {
3281       rtmp[ics[*ajtmp++]] = *aatmp++;
3282     }
3283 
3284     /* add pivot rows into linked list */
3285     row = lnk[n];
3286     while (row < i) {
3287       nzi_bl = bi[row+1] - bi[row] + 1;
3288       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3289       PetscCall(PetscLLAddSortedLU(bjtmp,row,&nlnk,lnk,lnkbt,i,nzi_bl,im));
3290       nzi   += nlnk;
3291       row    = lnk[row];
3292     }
3293 
3294     /* copy data from lnk into jtmp, then initialize lnk */
3295     PetscCall(PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt));
3296 
3297     /* numerical factorization */
3298     bjtmp = jtmp;
3299     row   = *bjtmp++; /* 1st pivot row */
3300     while (row < i) {
3301       pc         = rtmp + row;
3302       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3303       multiplier = (*pc) * (*pv);
3304       *pc        = multiplier;
3305       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3306         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3307         pv = ba + bdiag[row+1] + 1;
3308         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3309         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3310         PetscCall(PetscLogFlops(1+2.0*nz));
3311       }
3312       row = *bjtmp++;
3313     }
3314 
3315     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3316     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3317     nzi_bl   = 0; j = 0;
3318     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3319       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3320       nzi_bl++; j++;
3321     }
3322     nzi_bu = nzi - nzi_bl -1;
3323     while (j < nzi) {
3324       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3325       j++;
3326     }
3327 
3328     bjtmp = bj + bi[i];
3329     batmp = ba + bi[i];
3330     /* apply level dropping rule to L part */
3331     ncut = nzi_al + dtcount;
3332     if (ncut < nzi_bl) {
3333       PetscCall(PetscSortSplit(ncut,nzi_bl,vtmp,jtmp));
3334       PetscCall(PetscSortIntWithScalarArray(ncut,jtmp,vtmp));
3335     } else {
3336       ncut = nzi_bl;
3337     }
3338     for (j=0; j<ncut; j++) {
3339       bjtmp[j] = jtmp[j];
3340       batmp[j] = vtmp[j];
3341     }
3342     bi[i+1] = bi[i] + ncut;
3343     nzi     = ncut + 1;
3344 
3345     /* apply level dropping rule to U part */
3346     ncut = nzi_au + dtcount;
3347     if (ncut < nzi_bu) {
3348       PetscCall(PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1));
3349       PetscCall(PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1));
3350     } else {
3351       ncut = nzi_bu;
3352     }
3353     nzi += ncut;
3354 
3355     /* mark bdiagonal */
3356     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3357     bdiag_rev[n-i-1] = bdiag[i+1];
3358     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3359     bjtmp            = bj + bdiag[i];
3360     batmp            = ba + bdiag[i];
3361     *bjtmp           = i;
3362     *batmp           = diag_tmp; /* rtmp[i]; */
3363     if (*batmp == 0.0) {
3364       *batmp = dt+shift;
3365     }
3366     *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */
3367 
3368     bjtmp = bj + bdiag[i+1]+1;
3369     batmp = ba + bdiag[i+1]+1;
3370     for (k=0; k<ncut; k++) {
3371       bjtmp[k] = jtmp[nzi_bl+1+k];
3372       batmp[k] = vtmp[nzi_bl+1+k];
3373     }
3374 
3375     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3376   } /* for (i=0; i<n; i++) */
3377   PetscCheck(bi[n] < bdiag[n],PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT,bi[n],bdiag[n]);
3378 
3379   PetscCall(ISRestoreIndices(isrow,&r));
3380   PetscCall(ISRestoreIndices(isicol,&ic));
3381 
3382   PetscCall(PetscLLDestroy(lnk,lnkbt));
3383   PetscCall(PetscFree2(im,jtmp));
3384   PetscCall(PetscFree2(rtmp,vtmp));
3385   PetscCall(PetscFree(bdiag_rev));
3386 
3387   PetscCall(PetscLogFlops(B->cmap->n));
3388   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3389 
3390   PetscCall(ISIdentity(isrow,&row_identity));
3391   PetscCall(ISIdentity(isicol,&icol_identity));
3392   if (row_identity && icol_identity) {
3393     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3394   } else {
3395     B->ops->solve = MatSolve_SeqAIJ;
3396   }
3397 
3398   B->ops->solveadd          = NULL;
3399   B->ops->solvetranspose    = NULL;
3400   B->ops->solvetransposeadd = NULL;
3401   B->ops->matsolve          = NULL;
3402   B->assembled              = PETSC_TRUE;
3403   B->preallocated           = PETSC_TRUE;
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 /* a wraper of MatILUDTFactor_SeqAIJ() */
3408 /*
3409     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3410 */
3411 
3412 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3413 {
3414   PetscFunctionBegin;
3415   PetscCall(MatILUDTFactor_SeqAIJ(A,row,col,info,&fact));
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 /*
3420    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3421    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3422 */
3423 /*
3424     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3425 */
3426 
3427 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3428 {
3429   Mat            C     =fact;
3430   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3431   IS             isrow = b->row,isicol = b->icol;
3432   const PetscInt *r,*ic,*ics;
3433   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3434   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3435   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3436   PetscReal      dt=info->dt,shift=info->shiftamount;
3437   PetscBool      row_identity, col_identity;
3438 
3439   PetscFunctionBegin;
3440   PetscCall(ISGetIndices(isrow,&r));
3441   PetscCall(ISGetIndices(isicol,&ic));
3442   PetscCall(PetscMalloc1(n+1,&rtmp));
3443   ics  = ic;
3444 
3445   for (i=0; i<n; i++) {
3446     /* initialize rtmp array */
3447     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3448     bjtmp = bj + bi[i];
3449     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3450     rtmp[i] = 0.0;
3451     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3452     bjtmp   = bj + bdiag[i+1] + 1;
3453     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3454 
3455     /* load in initial unfactored row of A */
3456     nz    = ai[r[i]+1] - ai[r[i]];
3457     ajtmp = aj + ai[r[i]];
3458     v     = aa + ai[r[i]];
3459     for (j=0; j<nz; j++) {
3460       rtmp[ics[*ajtmp++]] = v[j];
3461     }
3462 
3463     /* numerical factorization */
3464     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3465     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3466     k     = 0;
3467     while (k < nzl) {
3468       row        = *bjtmp++;
3469       pc         = rtmp + row;
3470       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3471       multiplier = (*pc) * (*pv);
3472       *pc        = multiplier;
3473       if (PetscAbsScalar(multiplier) > dt) {
3474         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3475         pv = b->a + bdiag[row+1] + 1;
3476         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3477         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3478         PetscCall(PetscLogFlops(1+2.0*nz));
3479       }
3480       k++;
3481     }
3482 
3483     /* finished row so stick it into b->a */
3484     /* L-part */
3485     pv  = b->a + bi[i];
3486     pj  = bj + bi[i];
3487     nzl = bi[i+1] - bi[i];
3488     for (j=0; j<nzl; j++) {
3489       pv[j] = rtmp[pj[j]];
3490     }
3491 
3492     /* diagonal: invert diagonal entries for simpler triangular solves */
3493     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3494     b->a[bdiag[i]] = 1.0/rtmp[i];
3495 
3496     /* U-part */
3497     pv  = b->a + bdiag[i+1] + 1;
3498     pj  = bj + bdiag[i+1] + 1;
3499     nzu = bdiag[i] - bdiag[i+1] - 1;
3500     for (j=0; j<nzu; j++) {
3501       pv[j] = rtmp[pj[j]];
3502     }
3503   }
3504 
3505   PetscCall(PetscFree(rtmp));
3506   PetscCall(ISRestoreIndices(isicol,&ic));
3507   PetscCall(ISRestoreIndices(isrow,&r));
3508 
3509   PetscCall(ISIdentity(isrow,&row_identity));
3510   PetscCall(ISIdentity(isicol,&col_identity));
3511   if (row_identity && col_identity) {
3512     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3513   } else {
3514     C->ops->solve = MatSolve_SeqAIJ;
3515   }
3516   C->ops->solveadd          = NULL;
3517   C->ops->solvetranspose    = NULL;
3518   C->ops->solvetransposeadd = NULL;
3519   C->ops->matsolve          = NULL;
3520   C->assembled              = PETSC_TRUE;
3521   C->preallocated           = PETSC_TRUE;
3522 
3523   PetscCall(PetscLogFlops(C->cmap->n));
3524   PetscFunctionReturn(0);
3525 }
3526