xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision f2ed2dc71a2ab9ffda85eae8afa0cbea9ed570de)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
15   PetscErrorCode    ierr;
16   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17   const PetscInt    *ai = a->i, *aj = a->j;
18   const PetscScalar *aa = a->a;
19   PetscBool         *done;
20   PetscReal         best,past = 0,future;
21 
22   PetscFunctionBegin;
23   /* pick initial row */
24   best = -1;
25   for (i=0; i<n; i++) {
26     future = 0.0;
27     for (j=ai[i]; j<ai[i+1]; j++) {
28       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29       else              past  = PetscAbsScalar(aa[j]);
30     }
31     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32     if (past/future > best) {
33       best    = past/future;
34       current = i;
35     }
36   }
37 
38   ierr     = PetscMalloc1(n,&done);CHKERRQ(ierr);
39   ierr     = PetscArrayzero(done,n);CHKERRQ(ierr);
40   ierr     = PetscMalloc1(n,&order);CHKERRQ(ierr);
41   order[0] = current;
42   for (i=0; i<n-1; i++) {
43     done[current] = PETSC_TRUE;
44     best          = -1;
45     /* loop over all neighbors of current pivot */
46     for (j=ai[current]; j<ai[current+1]; j++) {
47       jj = aj[j];
48       if (done[jj]) continue;
49       /* loop over columns of potential next row computing weights for below and above diagonal */
50       past = future = 0.0;
51       for (k=ai[jj]; k<ai[jj+1]; k++) {
52         kk = aj[k];
53         if (done[kk]) past += PetscAbsScalar(aa[k]);
54         else if (kk != jj) future += PetscAbsScalar(aa[k]);
55       }
56       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57       if (past/future > best) {
58         best       = past/future;
59         newcurrent = jj;
60       }
61     }
62     if (best == -1) { /* no neighbors to select from so select best of all that remain */
63       best = -1;
64       for (k=0; k<n; k++) {
65         if (done[k]) continue;
66         future = 0.0;
67         past   = 0.0;
68         for (j=ai[k]; j<ai[k+1]; j++) {
69           kk = aj[j];
70           if (done[kk])       past += PetscAbsScalar(aa[j]);
71           else if (kk != k) future += PetscAbsScalar(aa[j]);
72         }
73         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74         if (past/future > best) {
75           best       = past/future;
76           newcurrent = k;
77         }
78       }
79     }
80     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81     current    = newcurrent;
82     order[i+1] = current;
83   }
84   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
85   *icol = *irow;
86   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
87   ierr  = PetscFree(done);CHKERRQ(ierr);
88   ierr  = PetscFree(order);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
93 {
94   PetscInt       n = A->rmap->n;
95   PetscErrorCode ierr;
96 
97   PetscFunctionBegin;
98 #if defined(PETSC_USE_COMPLEX)
99   if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
100 #endif
101   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
102   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
103   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
104     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
105 
106     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
107     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
108 
109     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
110   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
111     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
112     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
113 
114     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
115     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
116   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
117   (*B)->factortype = ftype;
118 
119   ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr);
120   ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr);
121   (*B)->useordering = PETSC_TRUE;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
126 {
127   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
128   IS                 isicol;
129   PetscErrorCode     ierr;
130   const PetscInt     *r,*ic;
131   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
132   PetscInt           *bi,*bj,*ajtmp;
133   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
134   PetscReal          f;
135   PetscInt           nlnk,*lnk,k,**bi_ptr;
136   PetscFreeSpaceList free_space=NULL,current_space=NULL;
137   PetscBT            lnkbt;
138   PetscBool          missing;
139 
140   PetscFunctionBegin;
141   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
142   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
143   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
144 
145   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
146   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
147   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
148 
149   /* get new row pointers */
150   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
151   bi[0] = 0;
152 
153   /* bdiag is location of diagonal in factor */
154   ierr     = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
155   bdiag[0] = 0;
156 
157   /* linked list for storing column indices of the active row */
158   nlnk = n + 1;
159   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
160 
161   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
162 
163   /* initial FreeSpace size is f*(ai[n]+1) */
164   f             = info->fill;
165   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
166   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
167   current_space = free_space;
168 
169   for (i=0; i<n; i++) {
170     /* copy previous fill into linked list */
171     nzi = 0;
172     nnz = ai[r[i]+1] - ai[r[i]];
173     ajtmp = aj + ai[r[i]];
174     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
175     nzi  += nlnk;
176 
177     /* add pivot rows into linked list */
178     row = lnk[n];
179     while (row < i) {
180       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
181       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
182       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
183       nzi  += nlnk;
184       row   = lnk[row];
185     }
186     bi[i+1] = bi[i] + nzi;
187     im[i]   = nzi;
188 
189     /* mark bdiag */
190     nzbd = 0;
191     nnz  = nzi;
192     k    = lnk[n];
193     while (nnz-- && k < i) {
194       nzbd++;
195       k = lnk[k];
196     }
197     bdiag[i] = bi[i] + nzbd;
198 
199     /* if free space is not available, make more free space */
200     if (current_space->local_remaining<nzi) {
201       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
202       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
203       reallocs++;
204     }
205 
206     /* copy data into free space, then initialize lnk */
207     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
208 
209     bi_ptr[i]                       = current_space->array;
210     current_space->array           += nzi;
211     current_space->local_used      += nzi;
212     current_space->local_remaining -= nzi;
213   }
214 #if defined(PETSC_USE_INFO)
215   if (ai[n] != 0) {
216     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
217     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
218     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
219     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
220     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
221   } else {
222     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
223   }
224 #endif
225 
226   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
227   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
228 
229   /* destroy list of free space and other temporary array(s) */
230   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
231   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
232   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
233   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
234 
235   /* put together the new matrix */
236   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
237   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
238   b    = (Mat_SeqAIJ*)(B)->data;
239 
240   b->free_a       = PETSC_TRUE;
241   b->free_ij      = PETSC_TRUE;
242   b->singlemalloc = PETSC_FALSE;
243 
244   ierr    = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr);
245   b->j    = bj;
246   b->i    = bi;
247   b->diag = bdiag;
248   b->ilen = 0;
249   b->imax = 0;
250   b->row  = isrow;
251   b->col  = iscol;
252   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
253   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
254   b->icol = isicol;
255   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
256 
257   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
258   ierr     = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
259   b->maxnz = b->nz = bi[n];
260 
261   (B)->factortype            = MAT_FACTOR_LU;
262   (B)->info.factor_mallocs   = reallocs;
263   (B)->info.fill_ratio_given = f;
264 
265   if (ai[n]) {
266     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
267   } else {
268     (B)->info.fill_ratio_needed = 0.0;
269   }
270   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
271   if (a->inode.size) {
272     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
278 {
279   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
280   IS                 isicol;
281   PetscErrorCode     ierr;
282   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
283   PetscInt           i,n=A->rmap->n;
284   PetscInt           *bi,*bj;
285   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
286   PetscReal          f;
287   PetscInt           nlnk,*lnk,k,**bi_ptr;
288   PetscFreeSpaceList free_space=NULL,current_space=NULL;
289   PetscBT            lnkbt;
290   PetscBool          missing;
291 
292   PetscFunctionBegin;
293   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
294   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
295   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
296 
297   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
298   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
299   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
300 
301   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
302   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
303   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
304   bi[0] = bdiag[0] = 0;
305 
306   /* linked list for storing column indices of the active row */
307   nlnk = n + 1;
308   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
309 
310   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
311 
312   /* initial FreeSpace size is f*(ai[n]+1) */
313   f             = info->fill;
314   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
315   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
316   current_space = free_space;
317 
318   for (i=0; i<n; i++) {
319     /* copy previous fill into linked list */
320     nzi = 0;
321     nnz = ai[r[i]+1] - ai[r[i]];
322     ajtmp = aj + ai[r[i]];
323     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
324     nzi  += nlnk;
325 
326     /* add pivot rows into linked list */
327     row = lnk[n];
328     while (row < i) {
329       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
330       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
331       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
332       nzi  += nlnk;
333       row   = lnk[row];
334     }
335     bi[i+1] = bi[i] + nzi;
336     im[i]   = nzi;
337 
338     /* mark bdiag */
339     nzbd = 0;
340     nnz  = nzi;
341     k    = lnk[n];
342     while (nnz-- && k < i) {
343       nzbd++;
344       k = lnk[k];
345     }
346     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
347 
348     /* if free space is not available, make more free space */
349     if (current_space->local_remaining<nzi) {
350       /* estimated additional space needed */
351       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
352       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
353       reallocs++;
354     }
355 
356     /* copy data into free space, then initialize lnk */
357     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
358 
359     bi_ptr[i]                       = current_space->array;
360     current_space->array           += nzi;
361     current_space->local_used      += nzi;
362     current_space->local_remaining -= nzi;
363   }
364 
365   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
366   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
367 
368   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
369   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
370   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
371   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
372   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
373 
374   /* put together the new matrix */
375   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
376   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
377   b    = (Mat_SeqAIJ*)(B)->data;
378 
379   b->free_a       = PETSC_TRUE;
380   b->free_ij      = PETSC_TRUE;
381   b->singlemalloc = PETSC_FALSE;
382 
383   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
384 
385   b->j    = bj;
386   b->i    = bi;
387   b->diag = bdiag;
388   b->ilen = 0;
389   b->imax = 0;
390   b->row  = isrow;
391   b->col  = iscol;
392   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
393   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
394   b->icol = isicol;
395   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
396 
397   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
398   ierr     = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
399   b->maxnz = b->nz = bdiag[0]+1;
400 
401   B->factortype            = MAT_FACTOR_LU;
402   B->info.factor_mallocs   = reallocs;
403   B->info.fill_ratio_given = f;
404 
405   if (ai[n]) {
406     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
407   } else {
408     B->info.fill_ratio_needed = 0.0;
409   }
410 #if defined(PETSC_USE_INFO)
411   if (ai[n] != 0) {
412     PetscReal af = B->info.fill_ratio_needed;
413     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
414     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
415     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
416     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
417   } else {
418     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
419   }
420 #endif
421   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
422   if (a->inode.size) {
423     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
424   }
425   ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 /*
430     Trouble in factorization, should we dump the original matrix?
431 */
432 PetscErrorCode MatFactorDumpMatrix(Mat A)
433 {
434   PetscErrorCode ierr;
435   PetscBool      flg = PETSC_FALSE;
436 
437   PetscFunctionBegin;
438   ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
439   if (flg) {
440     PetscViewer viewer;
441     char        filename[PETSC_MAX_PATH_LEN];
442 
443     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
444     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
445     ierr = MatView(A,viewer);CHKERRQ(ierr);
446     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
452 {
453   Mat             C     =B;
454   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
455   IS              isrow = b->row,isicol = b->icol;
456   PetscErrorCode  ierr;
457   const PetscInt  *r,*ic,*ics;
458   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
459   PetscInt        i,j,k,nz,nzL,row,*pj;
460   const PetscInt  *ajtmp,*bjtmp;
461   MatScalar       *rtmp,*pc,multiplier,*pv;
462   const MatScalar *aa=a->a,*v;
463   PetscBool       row_identity,col_identity;
464   FactorShiftCtx  sctx;
465   const PetscInt  *ddiag;
466   PetscReal       rs;
467   MatScalar       d;
468 
469   PetscFunctionBegin;
470   /* MatPivotSetUp(): initialize shift context sctx */
471   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
472 
473   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474     ddiag          = a->diag;
475     sctx.shift_top = info->zeropivot;
476     for (i=0; i<n; i++) {
477       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478       d  = (aa)[ddiag[i]];
479       rs = -PetscAbsScalar(d) - PetscRealPart(d);
480       v  = aa+ai[i];
481       nz = ai[i+1] - ai[i];
482       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
483       if (rs>sctx.shift_top) sctx.shift_top = rs;
484     }
485     sctx.shift_top *= 1.1;
486     sctx.nshift_max = 5;
487     sctx.shift_lo   = 0.;
488     sctx.shift_hi   = 1.;
489   }
490 
491   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
492   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
493   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
494   ics  = ic;
495 
496   do {
497     sctx.newshift = PETSC_FALSE;
498     for (i=0; i<n; i++) {
499       /* zero rtmp */
500       /* L part */
501       nz    = bi[i+1] - bi[i];
502       bjtmp = bj + bi[i];
503       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
504 
505       /* U part */
506       nz    = bdiag[i]-bdiag[i+1];
507       bjtmp = bj + bdiag[i+1]+1;
508       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
509 
510       /* load in initial (unfactored row) */
511       nz    = ai[r[i]+1] - ai[r[i]];
512       ajtmp = aj + ai[r[i]];
513       v     = aa + ai[r[i]];
514       for (j=0; j<nz; j++) {
515         rtmp[ics[ajtmp[j]]] = v[j];
516       }
517       /* ZeropivotApply() */
518       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
519 
520       /* elimination */
521       bjtmp = bj + bi[i];
522       row   = *bjtmp++;
523       nzL   = bi[i+1] - bi[i];
524       for (k=0; k < nzL; k++) {
525         pc = rtmp + row;
526         if (*pc != 0.0) {
527           pv         = b->a + bdiag[row];
528           multiplier = *pc * (*pv);
529           *pc        = multiplier;
530 
531           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
532           pv = b->a + bdiag[row+1]+1;
533           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
534 
535           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
536           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
537         }
538         row = *bjtmp++;
539       }
540 
541       /* finished row so stick it into b->a */
542       rs = 0.0;
543       /* L part */
544       pv = b->a + bi[i];
545       pj = b->j + bi[i];
546       nz = bi[i+1] - bi[i];
547       for (j=0; j<nz; j++) {
548         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
549       }
550 
551       /* U part */
552       pv = b->a + bdiag[i+1]+1;
553       pj = b->j + bdiag[i+1]+1;
554       nz = bdiag[i] - bdiag[i+1]-1;
555       for (j=0; j<nz; j++) {
556         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
557       }
558 
559       sctx.rs = rs;
560       sctx.pv = rtmp[i];
561       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
562       if (sctx.newshift) break; /* break for-loop */
563       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
564 
565       /* Mark diagonal and invert diagonal for simplier triangular solves */
566       pv  = b->a + bdiag[i];
567       *pv = 1.0/rtmp[i];
568 
569     } /* endof for (i=0; i<n; i++) { */
570 
571     /* MatPivotRefine() */
572     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
573       /*
574        * if no shift in this attempt & shifting & started shifting & can refine,
575        * then try lower shift
576        */
577       sctx.shift_hi       = sctx.shift_fraction;
578       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
579       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
580       sctx.newshift       = PETSC_TRUE;
581       sctx.nshift++;
582     }
583   } while (sctx.newshift);
584 
585   ierr = PetscFree(rtmp);CHKERRQ(ierr);
586   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
587   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
588 
589   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
590   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
591   if (b->inode.size) {
592     C->ops->solve = MatSolve_SeqAIJ_Inode;
593   } else if (row_identity && col_identity) {
594     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595   } else {
596     C->ops->solve = MatSolve_SeqAIJ;
597   }
598   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
599   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
600   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601   C->ops->matsolve          = MatMatSolve_SeqAIJ;
602   C->assembled              = PETSC_TRUE;
603   C->preallocated           = PETSC_TRUE;
604 
605   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
606 
607   /* MatShiftView(A,info,&sctx) */
608   if (sctx.nshift) {
609     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
610       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
611     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
612       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
613     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
614       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
621 {
622   Mat             C     =B;
623   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
624   IS              isrow = b->row,isicol = b->icol;
625   PetscErrorCode  ierr;
626   const PetscInt  *r,*ic,*ics;
627   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
628   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
629   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
630   MatScalar       *pv,*rtmp,*pc,multiplier,d;
631   const MatScalar *v,*aa=a->a;
632   PetscReal       rs=0.0;
633   FactorShiftCtx  sctx;
634   const PetscInt  *ddiag;
635   PetscBool       row_identity, col_identity;
636 
637   PetscFunctionBegin;
638   /* MatPivotSetUp(): initialize shift context sctx */
639   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
640 
641   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
642     ddiag          = a->diag;
643     sctx.shift_top = info->zeropivot;
644     for (i=0; i<n; i++) {
645       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
646       d  = (aa)[ddiag[i]];
647       rs = -PetscAbsScalar(d) - PetscRealPart(d);
648       v  = aa+ai[i];
649       nz = ai[i+1] - ai[i];
650       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
651       if (rs>sctx.shift_top) sctx.shift_top = rs;
652     }
653     sctx.shift_top *= 1.1;
654     sctx.nshift_max = 5;
655     sctx.shift_lo   = 0.;
656     sctx.shift_hi   = 1.;
657   }
658 
659   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
660   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
661   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
662   ics  = ic;
663 
664   do {
665     sctx.newshift = PETSC_FALSE;
666     for (i=0; i<n; i++) {
667       nz    = bi[i+1] - bi[i];
668       bjtmp = bj + bi[i];
669       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
670 
671       /* load in initial (unfactored row) */
672       nz    = ai[r[i]+1] - ai[r[i]];
673       ajtmp = aj + ai[r[i]];
674       v     = aa + ai[r[i]];
675       for (j=0; j<nz; j++) {
676         rtmp[ics[ajtmp[j]]] = v[j];
677       }
678       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
679 
680       row = *bjtmp++;
681       while  (row < i) {
682         pc = rtmp + row;
683         if (*pc != 0.0) {
684           pv         = b->a + diag_offset[row];
685           pj         = b->j + diag_offset[row] + 1;
686           multiplier = *pc / *pv++;
687           *pc        = multiplier;
688           nz         = bi[row+1] - diag_offset[row] - 1;
689           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
690           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
691         }
692         row = *bjtmp++;
693       }
694       /* finished row so stick it into b->a */
695       pv   = b->a + bi[i];
696       pj   = b->j + bi[i];
697       nz   = bi[i+1] - bi[i];
698       diag = diag_offset[i] - bi[i];
699       rs   = 0.0;
700       for (j=0; j<nz; j++) {
701         pv[j] = rtmp[pj[j]];
702         rs   += PetscAbsScalar(pv[j]);
703       }
704       rs -= PetscAbsScalar(pv[diag]);
705 
706       sctx.rs = rs;
707       sctx.pv = pv[diag];
708       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
709       if (sctx.newshift) break;
710       pv[diag] = sctx.pv;
711     }
712 
713     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
714       /*
715        * if no shift in this attempt & shifting & started shifting & can refine,
716        * then try lower shift
717        */
718       sctx.shift_hi       = sctx.shift_fraction;
719       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
720       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
721       sctx.newshift       = PETSC_TRUE;
722       sctx.nshift++;
723     }
724   } while (sctx.newshift);
725 
726   /* invert diagonal entries for simplier triangular solves */
727   for (i=0; i<n; i++) {
728     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
729   }
730   ierr = PetscFree(rtmp);CHKERRQ(ierr);
731   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
732   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
733 
734   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
735   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
736   if (row_identity && col_identity) {
737     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738   } else {
739     C->ops->solve = MatSolve_SeqAIJ_inplace;
740   }
741   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
742   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
743   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
745 
746   C->assembled    = PETSC_TRUE;
747   C->preallocated = PETSC_TRUE;
748 
749   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
750   if (sctx.nshift) {
751     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
752       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
753     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
754       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
755     }
756   }
757   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
758   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
759 
760   ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 /*
765    This routine implements inplace ILU(0) with row or/and column permutations.
766    Input:
767      A - original matrix
768    Output;
769      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
770          a->j (col index) is permuted by the inverse of colperm, then sorted
771          a->a reordered accordingly with a->j
772          a->diag (ptr to diagonal elements) is updated.
773 */
774 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
775 {
776   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
777   IS              isrow = a->row,isicol = a->icol;
778   PetscErrorCode  ierr;
779   const PetscInt  *r,*ic,*ics;
780   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
781   PetscInt        *ajtmp,nz,row;
782   PetscInt        *diag = a->diag,nbdiag,*pj;
783   PetscScalar     *rtmp,*pc,multiplier,d;
784   MatScalar       *pv,*v;
785   PetscReal       rs;
786   FactorShiftCtx  sctx;
787   const MatScalar *aa=a->a,*vtmp;
788 
789   PetscFunctionBegin;
790   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
791 
792   /* MatPivotSetUp(): initialize shift context sctx */
793   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
794 
795   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
796     const PetscInt *ddiag = a->diag;
797     sctx.shift_top = info->zeropivot;
798     for (i=0; i<n; i++) {
799       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
800       d    = (aa)[ddiag[i]];
801       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
802       vtmp = aa+ai[i];
803       nz   = ai[i+1] - ai[i];
804       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
805       if (rs>sctx.shift_top) sctx.shift_top = rs;
806     }
807     sctx.shift_top *= 1.1;
808     sctx.nshift_max = 5;
809     sctx.shift_lo   = 0.;
810     sctx.shift_hi   = 1.;
811   }
812 
813   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
814   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
815   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
816   ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr);
817   ics  = ic;
818 
819 #if defined(MV)
820   sctx.shift_top      = 0.;
821   sctx.nshift_max     = 0;
822   sctx.shift_lo       = 0.;
823   sctx.shift_hi       = 0.;
824   sctx.shift_fraction = 0.;
825 
826   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
827     sctx.shift_top = 0.;
828     for (i=0; i<n; i++) {
829       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
830       d  = (a->a)[diag[i]];
831       rs = -PetscAbsScalar(d) - PetscRealPart(d);
832       v  = a->a+ai[i];
833       nz = ai[i+1] - ai[i];
834       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
835       if (rs>sctx.shift_top) sctx.shift_top = rs;
836     }
837     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
838     sctx.shift_top *= 1.1;
839     sctx.nshift_max = 5;
840     sctx.shift_lo   = 0.;
841     sctx.shift_hi   = 1.;
842   }
843 
844   sctx.shift_amount = 0.;
845   sctx.nshift       = 0;
846 #endif
847 
848   do {
849     sctx.newshift = PETSC_FALSE;
850     for (i=0; i<n; i++) {
851       /* load in initial unfactored row */
852       nz    = ai[r[i]+1] - ai[r[i]];
853       ajtmp = aj + ai[r[i]];
854       v     = a->a + ai[r[i]];
855       /* sort permuted ajtmp and values v accordingly */
856       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
857       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
858 
859       diag[r[i]] = ai[r[i]];
860       for (j=0; j<nz; j++) {
861         rtmp[ajtmp[j]] = v[j];
862         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
863       }
864       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
865 
866       row = *ajtmp++;
867       while  (row < i) {
868         pc = rtmp + row;
869         if (*pc != 0.0) {
870           pv = a->a + diag[r[row]];
871           pj = aj + diag[r[row]] + 1;
872 
873           multiplier = *pc / *pv++;
874           *pc        = multiplier;
875           nz         = ai[r[row]+1] - diag[r[row]] - 1;
876           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
877           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
878         }
879         row = *ajtmp++;
880       }
881       /* finished row so overwrite it onto a->a */
882       pv     = a->a + ai[r[i]];
883       pj     = aj + ai[r[i]];
884       nz     = ai[r[i]+1] - ai[r[i]];
885       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
886 
887       rs = 0.0;
888       for (j=0; j<nz; j++) {
889         pv[j] = rtmp[pj[j]];
890         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
891       }
892 
893       sctx.rs = rs;
894       sctx.pv = pv[nbdiag];
895       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
896       if (sctx.newshift) break;
897       pv[nbdiag] = sctx.pv;
898     }
899 
900     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
901       /*
902        * if no shift in this attempt & shifting & started shifting & can refine,
903        * then try lower shift
904        */
905       sctx.shift_hi       = sctx.shift_fraction;
906       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
907       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
908       sctx.newshift       = PETSC_TRUE;
909       sctx.nshift++;
910     }
911   } while (sctx.newshift);
912 
913   /* invert diagonal entries for simplier triangular solves */
914   for (i=0; i<n; i++) {
915     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
916   }
917 
918   ierr = PetscFree(rtmp);CHKERRQ(ierr);
919   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
920   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
921 
922   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
923   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
924   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
925   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
926 
927   A->assembled    = PETSC_TRUE;
928   A->preallocated = PETSC_TRUE;
929 
930   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
931   if (sctx.nshift) {
932     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
934     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
936     }
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 /* ----------------------------------------------------------- */
942 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
943 {
944   PetscErrorCode ierr;
945   Mat            C;
946 
947   PetscFunctionBegin;
948   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
949   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
950   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
951 
952   A->ops->solve          = C->ops->solve;
953   A->ops->solvetranspose = C->ops->solvetranspose;
954 
955   ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
956   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 /* ----------------------------------------------------------- */
960 
961 
962 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
963 {
964   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
965   IS                iscol = a->col,isrow = a->row;
966   PetscErrorCode    ierr;
967   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
968   PetscInt          nz;
969   const PetscInt    *rout,*cout,*r,*c;
970   PetscScalar       *x,*tmp,*tmps,sum;
971   const PetscScalar *b;
972   const MatScalar   *aa = a->a,*v;
973 
974   PetscFunctionBegin;
975   if (!n) PetscFunctionReturn(0);
976 
977   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
978   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
979   tmp  = a->solve_work;
980 
981   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
982   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
983 
984   /* forward solve the lower triangular */
985   tmp[0] = b[*r++];
986   tmps   = tmp;
987   for (i=1; i<n; i++) {
988     v   = aa + ai[i];
989     vi  = aj + ai[i];
990     nz  = a->diag[i] - ai[i];
991     sum = b[*r++];
992     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
993     tmp[i] = sum;
994   }
995 
996   /* backward solve the upper triangular */
997   for (i=n-1; i>=0; i--) {
998     v   = aa + a->diag[i] + 1;
999     vi  = aj + a->diag[i] + 1;
1000     nz  = ai[i+1] - a->diag[i] - 1;
1001     sum = tmp[i];
1002     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1003     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1004   }
1005 
1006   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1007   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1008   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1009   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1010   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1015 {
1016   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1017   IS                iscol = a->col,isrow = a->row;
1018   PetscErrorCode    ierr;
1019   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1020   PetscInt          nz,neq,ldb,ldx;
1021   const PetscInt    *rout,*cout,*r,*c;
1022   PetscScalar       *x,*tmp = a->solve_work,*tmps,sum;
1023   const PetscScalar *b,*aa = a->a,*v;
1024   PetscBool         isdense;
1025 
1026   PetscFunctionBegin;
1027   if (!n) PetscFunctionReturn(0);
1028   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1029   if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1030   if (X != B) {
1031     ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1032     if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1033   }
1034   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1035   ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr);
1036   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1037   ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr);
1038   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1039   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1040   for (neq=0; neq<B->cmap->n; neq++) {
1041     /* forward solve the lower triangular */
1042     tmp[0] = b[r[0]];
1043     tmps   = tmp;
1044     for (i=1; i<n; i++) {
1045       v   = aa + ai[i];
1046       vi  = aj + ai[i];
1047       nz  = a->diag[i] - ai[i];
1048       sum = b[r[i]];
1049       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1050       tmp[i] = sum;
1051     }
1052     /* backward solve the upper triangular */
1053     for (i=n-1; i>=0; i--) {
1054       v   = aa + a->diag[i] + 1;
1055       vi  = aj + a->diag[i] + 1;
1056       nz  = ai[i+1] - a->diag[i] - 1;
1057       sum = tmp[i];
1058       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1059       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1060     }
1061     b += ldb;
1062     x += ldx;
1063   }
1064   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1065   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1066   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1067   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1068   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1073 {
1074   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1075   IS                iscol = a->col,isrow = a->row;
1076   PetscErrorCode    ierr;
1077   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1078   PetscInt          nz,neq,ldb,ldx;
1079   const PetscInt    *rout,*cout,*r,*c;
1080   PetscScalar       *x,*tmp = a->solve_work,sum;
1081   const PetscScalar *b,*aa = a->a,*v;
1082   PetscBool         isdense;
1083 
1084   PetscFunctionBegin;
1085   if (!n) PetscFunctionReturn(0);
1086   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1087   if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1088   if (X != B) {
1089     ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1090     if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1091   }
1092   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1093   ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr);
1094   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1095   ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr);
1096   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1097   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1098   for (neq=0; neq<B->cmap->n; neq++) {
1099     /* forward solve the lower triangular */
1100     tmp[0] = b[r[0]];
1101     v      = aa;
1102     vi     = aj;
1103     for (i=1; i<n; i++) {
1104       nz  = ai[i+1] - ai[i];
1105       sum = b[r[i]];
1106       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1107       tmp[i] = sum;
1108       v     += nz; vi += nz;
1109     }
1110     /* backward solve the upper triangular */
1111     for (i=n-1; i>=0; i--) {
1112       v   = aa + adiag[i+1]+1;
1113       vi  = aj + adiag[i+1]+1;
1114       nz  = adiag[i]-adiag[i+1]-1;
1115       sum = tmp[i];
1116       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1117       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1118     }
1119     b += ldb;
1120     x += ldx;
1121   }
1122   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1123   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1124   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1125   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1126   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1131 {
1132   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1133   IS                iscol = a->col,isrow = a->row;
1134   PetscErrorCode    ierr;
1135   const PetscInt    *r,*c,*rout,*cout;
1136   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1137   PetscInt          nz,row;
1138   PetscScalar       *x,*tmp,*tmps,sum;
1139   const PetscScalar *b;
1140   const MatScalar   *aa = a->a,*v;
1141 
1142   PetscFunctionBegin;
1143   if (!n) PetscFunctionReturn(0);
1144 
1145   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1146   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1147   tmp  = a->solve_work;
1148 
1149   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1150   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1151 
1152   /* forward solve the lower triangular */
1153   tmp[0] = b[*r++];
1154   tmps   = tmp;
1155   for (row=1; row<n; row++) {
1156     i   = rout[row]; /* permuted row */
1157     v   = aa + ai[i];
1158     vi  = aj + ai[i];
1159     nz  = a->diag[i] - ai[i];
1160     sum = b[*r++];
1161     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1162     tmp[row] = sum;
1163   }
1164 
1165   /* backward solve the upper triangular */
1166   for (row=n-1; row>=0; row--) {
1167     i   = rout[row]; /* permuted row */
1168     v   = aa + a->diag[i] + 1;
1169     vi  = aj + a->diag[i] + 1;
1170     nz  = ai[i+1] - a->diag[i] - 1;
1171     sum = tmp[row];
1172     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1173     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1174   }
1175 
1176   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1177   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1178   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1179   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1180   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 /* ----------------------------------------------------------- */
1185 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1186 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1187 {
1188   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1189   PetscErrorCode    ierr;
1190   PetscInt          n   = A->rmap->n;
1191   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1192   PetscScalar       *x;
1193   const PetscScalar *b;
1194   const MatScalar   *aa = a->a;
1195 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1196   PetscInt        adiag_i,i,nz,ai_i;
1197   const PetscInt  *vi;
1198   const MatScalar *v;
1199   PetscScalar     sum;
1200 #endif
1201 
1202   PetscFunctionBegin;
1203   if (!n) PetscFunctionReturn(0);
1204 
1205   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1206   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1207 
1208 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1209   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1210 #else
1211   /* forward solve the lower triangular */
1212   x[0] = b[0];
1213   for (i=1; i<n; i++) {
1214     ai_i = ai[i];
1215     v    = aa + ai_i;
1216     vi   = aj + ai_i;
1217     nz   = adiag[i] - ai_i;
1218     sum  = b[i];
1219     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1220     x[i] = sum;
1221   }
1222 
1223   /* backward solve the upper triangular */
1224   for (i=n-1; i>=0; i--) {
1225     adiag_i = adiag[i];
1226     v       = aa + adiag_i + 1;
1227     vi      = aj + adiag_i + 1;
1228     nz      = ai[i+1] - adiag_i - 1;
1229     sum     = x[i];
1230     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1231     x[i] = sum*aa[adiag_i];
1232   }
1233 #endif
1234   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1236   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1241 {
1242   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1243   IS                iscol = a->col,isrow = a->row;
1244   PetscErrorCode    ierr;
1245   PetscInt          i, n = A->rmap->n,j;
1246   PetscInt          nz;
1247   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1248   PetscScalar       *x,*tmp,sum;
1249   const PetscScalar *b;
1250   const MatScalar   *aa = a->a,*v;
1251 
1252   PetscFunctionBegin;
1253   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1254 
1255   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1256   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1257   tmp  = a->solve_work;
1258 
1259   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1260   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1261 
1262   /* forward solve the lower triangular */
1263   tmp[0] = b[*r++];
1264   for (i=1; i<n; i++) {
1265     v   = aa + ai[i];
1266     vi  = aj + ai[i];
1267     nz  = a->diag[i] - ai[i];
1268     sum = b[*r++];
1269     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1270     tmp[i] = sum;
1271   }
1272 
1273   /* backward solve the upper triangular */
1274   for (i=n-1; i>=0; i--) {
1275     v   = aa + a->diag[i] + 1;
1276     vi  = aj + a->diag[i] + 1;
1277     nz  = ai[i+1] - a->diag[i] - 1;
1278     sum = tmp[i];
1279     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1280     tmp[i]   = sum*aa[a->diag[i]];
1281     x[*c--] += tmp[i];
1282   }
1283 
1284   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1285   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1286   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1287   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1288   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1293 {
1294   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1295   IS                iscol = a->col,isrow = a->row;
1296   PetscErrorCode    ierr;
1297   PetscInt          i, n = A->rmap->n,j;
1298   PetscInt          nz;
1299   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1300   PetscScalar       *x,*tmp,sum;
1301   const PetscScalar *b;
1302   const MatScalar   *aa = a->a,*v;
1303 
1304   PetscFunctionBegin;
1305   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1306 
1307   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1308   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1309   tmp  = a->solve_work;
1310 
1311   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1312   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1313 
1314   /* forward solve the lower triangular */
1315   tmp[0] = b[r[0]];
1316   v      = aa;
1317   vi     = aj;
1318   for (i=1; i<n; i++) {
1319     nz  = ai[i+1] - ai[i];
1320     sum = b[r[i]];
1321     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1322     tmp[i] = sum;
1323     v     += nz;
1324     vi    += nz;
1325   }
1326 
1327   /* backward solve the upper triangular */
1328   v  = aa + adiag[n-1];
1329   vi = aj + adiag[n-1];
1330   for (i=n-1; i>=0; i--) {
1331     nz  = adiag[i] - adiag[i+1] - 1;
1332     sum = tmp[i];
1333     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1334     tmp[i]   = sum*v[nz];
1335     x[c[i]] += tmp[i];
1336     v       += nz+1; vi += nz+1;
1337   }
1338 
1339   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1340   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1341   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1342   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1343   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1348 {
1349   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1350   IS                iscol = a->col,isrow = a->row;
1351   PetscErrorCode    ierr;
1352   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1353   PetscInt          i,n = A->rmap->n,j;
1354   PetscInt          nz;
1355   PetscScalar       *x,*tmp,s1;
1356   const MatScalar   *aa = a->a,*v;
1357   const PetscScalar *b;
1358 
1359   PetscFunctionBegin;
1360   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1361   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1362   tmp  = a->solve_work;
1363 
1364   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1365   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1366 
1367   /* copy the b into temp work space according to permutation */
1368   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1369 
1370   /* forward solve the U^T */
1371   for (i=0; i<n; i++) {
1372     v   = aa + diag[i];
1373     vi  = aj + diag[i] + 1;
1374     nz  = ai[i+1] - diag[i] - 1;
1375     s1  = tmp[i];
1376     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1377     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1378     tmp[i] = s1;
1379   }
1380 
1381   /* backward solve the L^T */
1382   for (i=n-1; i>=0; i--) {
1383     v  = aa + diag[i] - 1;
1384     vi = aj + diag[i] - 1;
1385     nz = diag[i] - ai[i];
1386     s1 = tmp[i];
1387     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1388   }
1389 
1390   /* copy tmp into x according to permutation */
1391   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1392 
1393   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1394   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1395   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1396   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1397 
1398   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1403 {
1404   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1405   IS                iscol = a->col,isrow = a->row;
1406   PetscErrorCode    ierr;
1407   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1408   PetscInt          i,n = A->rmap->n,j;
1409   PetscInt          nz;
1410   PetscScalar       *x,*tmp,s1;
1411   const MatScalar   *aa = a->a,*v;
1412   const PetscScalar *b;
1413 
1414   PetscFunctionBegin;
1415   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1416   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1417   tmp  = a->solve_work;
1418 
1419   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1420   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1421 
1422   /* copy the b into temp work space according to permutation */
1423   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1424 
1425   /* forward solve the U^T */
1426   for (i=0; i<n; i++) {
1427     v   = aa + adiag[i+1] + 1;
1428     vi  = aj + adiag[i+1] + 1;
1429     nz  = adiag[i] - adiag[i+1] - 1;
1430     s1  = tmp[i];
1431     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1432     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1433     tmp[i] = s1;
1434   }
1435 
1436   /* backward solve the L^T */
1437   for (i=n-1; i>=0; i--) {
1438     v  = aa + ai[i];
1439     vi = aj + ai[i];
1440     nz = ai[i+1] - ai[i];
1441     s1 = tmp[i];
1442     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1443   }
1444 
1445   /* copy tmp into x according to permutation */
1446   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1447 
1448   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1449   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1451   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1452 
1453   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1458 {
1459   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1460   IS                iscol = a->col,isrow = a->row;
1461   PetscErrorCode    ierr;
1462   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1463   PetscInt          i,n = A->rmap->n,j;
1464   PetscInt          nz;
1465   PetscScalar       *x,*tmp,s1;
1466   const MatScalar   *aa = a->a,*v;
1467   const PetscScalar *b;
1468 
1469   PetscFunctionBegin;
1470   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1471   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1472   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1473   tmp  = a->solve_work;
1474 
1475   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1476   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1477 
1478   /* copy the b into temp work space according to permutation */
1479   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1480 
1481   /* forward solve the U^T */
1482   for (i=0; i<n; i++) {
1483     v   = aa + diag[i];
1484     vi  = aj + diag[i] + 1;
1485     nz  = ai[i+1] - diag[i] - 1;
1486     s1  = tmp[i];
1487     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1488     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1489     tmp[i] = s1;
1490   }
1491 
1492   /* backward solve the L^T */
1493   for (i=n-1; i>=0; i--) {
1494     v  = aa + diag[i] - 1;
1495     vi = aj + diag[i] - 1;
1496     nz = diag[i] - ai[i];
1497     s1 = tmp[i];
1498     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1499   }
1500 
1501   /* copy tmp into x according to permutation */
1502   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1503 
1504   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1505   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1506   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1507   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1508 
1509   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1514 {
1515   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1516   IS                iscol = a->col,isrow = a->row;
1517   PetscErrorCode    ierr;
1518   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1519   PetscInt          i,n = A->rmap->n,j;
1520   PetscInt          nz;
1521   PetscScalar       *x,*tmp,s1;
1522   const MatScalar   *aa = a->a,*v;
1523   const PetscScalar *b;
1524 
1525   PetscFunctionBegin;
1526   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1527   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1528   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1529   tmp  = a->solve_work;
1530 
1531   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1532   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1533 
1534   /* copy the b into temp work space according to permutation */
1535   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1536 
1537   /* forward solve the U^T */
1538   for (i=0; i<n; i++) {
1539     v   = aa + adiag[i+1] + 1;
1540     vi  = aj + adiag[i+1] + 1;
1541     nz  = adiag[i] - adiag[i+1] - 1;
1542     s1  = tmp[i];
1543     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1544     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1545     tmp[i] = s1;
1546   }
1547 
1548 
1549   /* backward solve the L^T */
1550   for (i=n-1; i>=0; i--) {
1551     v  = aa + ai[i];
1552     vi = aj + ai[i];
1553     nz = ai[i+1] - ai[i];
1554     s1 = tmp[i];
1555     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1556   }
1557 
1558   /* copy tmp into x according to permutation */
1559   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1560 
1561   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1562   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1563   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1564   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1565 
1566   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /* ----------------------------------------------------------------*/
1571 
1572 /*
1573    ilu() under revised new data structure.
1574    Factored arrays bj and ba are stored as
1575      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1576 
1577    bi=fact->i is an array of size n+1, in which
1578    bi+
1579      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1580      bi[n]:  points to L(n-1,n-1)+1
1581 
1582   bdiag=fact->diag is an array of size n+1,in which
1583      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1584      bdiag[n]: points to entry of U(n-1,0)-1
1585 
1586    U(i,:) contains bdiag[i] as its last entry, i.e.,
1587     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1588 */
1589 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1590 {
1591   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1592   PetscErrorCode ierr;
1593   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1594   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1595   IS             isicol;
1596 
1597   PetscFunctionBegin;
1598   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1599   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1600   b    = (Mat_SeqAIJ*)(fact)->data;
1601 
1602   /* allocate matrix arrays for new data structure */
1603   ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr);
1604   ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1605 
1606   b->singlemalloc = PETSC_TRUE;
1607   if (!b->diag) {
1608     ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr);
1609     ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1610   }
1611   bdiag = b->diag;
1612 
1613   if (n > 0) {
1614     ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr);
1615   }
1616 
1617   /* set bi and bj with new data structure */
1618   bi = b->i;
1619   bj = b->j;
1620 
1621   /* L part */
1622   bi[0] = 0;
1623   for (i=0; i<n; i++) {
1624     nz      = adiag[i] - ai[i];
1625     bi[i+1] = bi[i] + nz;
1626     aj      = a->j + ai[i];
1627     for (j=0; j<nz; j++) {
1628       /*   *bj = aj[j]; bj++; */
1629       bj[k++] = aj[j];
1630     }
1631   }
1632 
1633   /* U part */
1634   bdiag[n] = bi[n]-1;
1635   for (i=n-1; i>=0; i--) {
1636     nz = ai[i+1] - adiag[i] - 1;
1637     aj = a->j + adiag[i] + 1;
1638     for (j=0; j<nz; j++) {
1639       /*      *bj = aj[j]; bj++; */
1640       bj[k++] = aj[j];
1641     }
1642     /* diag[i] */
1643     /*    *bj = i; bj++; */
1644     bj[k++]  = i;
1645     bdiag[i] = bdiag[i+1] + nz + 1;
1646   }
1647 
1648   fact->factortype             = MAT_FACTOR_ILU;
1649   fact->info.factor_mallocs    = 0;
1650   fact->info.fill_ratio_given  = info->fill;
1651   fact->info.fill_ratio_needed = 1.0;
1652   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1653   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1654 
1655   b       = (Mat_SeqAIJ*)(fact)->data;
1656   b->row  = isrow;
1657   b->col  = iscol;
1658   b->icol = isicol;
1659   ierr    = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1660   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1661   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1666 {
1667   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1668   IS                 isicol;
1669   PetscErrorCode     ierr;
1670   const PetscInt     *r,*ic;
1671   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1672   PetscInt           *bi,*cols,nnz,*cols_lvl;
1673   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1674   PetscInt           i,levels,diagonal_fill;
1675   PetscBool          col_identity,row_identity,missing;
1676   PetscReal          f;
1677   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1678   PetscBT            lnkbt;
1679   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1680   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1681   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1682 
1683   PetscFunctionBegin;
1684   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1685   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1686   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1687 
1688   levels = (PetscInt)info->levels;
1689   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1690   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1691   if (!levels && row_identity && col_identity) {
1692     /* special case: ilu(0) with natural ordering */
1693     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1694     if (a->inode.size) {
1695       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1696     }
1697     PetscFunctionReturn(0);
1698   }
1699 
1700   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1701   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1702   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1703 
1704   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1705   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1706   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1707   bi[0] = bdiag[0] = 0;
1708   ierr  = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1709 
1710   /* create a linked list for storing column indices of the active row */
1711   nlnk = n + 1;
1712   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1713 
1714   /* initial FreeSpace size is f*(ai[n]+1) */
1715   f                 = info->fill;
1716   diagonal_fill     = (PetscInt)info->diagonal_fill;
1717   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1718   current_space     = free_space;
1719   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1720   current_space_lvl = free_space_lvl;
1721   for (i=0; i<n; i++) {
1722     nzi = 0;
1723     /* copy current row into linked list */
1724     nnz = ai[r[i]+1] - ai[r[i]];
1725     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1726     cols   = aj + ai[r[i]];
1727     lnk[i] = -1; /* marker to indicate if diagonal exists */
1728     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1729     nzi   += nlnk;
1730 
1731     /* make sure diagonal entry is included */
1732     if (diagonal_fill && lnk[i] == -1) {
1733       fm = n;
1734       while (lnk[fm] < i) fm = lnk[fm];
1735       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1736       lnk[fm]    = i;
1737       lnk_lvl[i] = 0;
1738       nzi++; dcount++;
1739     }
1740 
1741     /* add pivot rows into the active row */
1742     nzbd = 0;
1743     prow = lnk[n];
1744     while (prow < i) {
1745       nnz      = bdiag[prow];
1746       cols     = bj_ptr[prow] + nnz + 1;
1747       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1748       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1749       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1750       nzi     += nlnk;
1751       prow     = lnk[prow];
1752       nzbd++;
1753     }
1754     bdiag[i] = nzbd;
1755     bi[i+1]  = bi[i] + nzi;
1756     /* if free space is not available, make more free space */
1757     if (current_space->local_remaining<nzi) {
1758       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1759       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1760       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1761       reallocs++;
1762     }
1763 
1764     /* copy data into free_space and free_space_lvl, then initialize lnk */
1765     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1766     bj_ptr[i]    = current_space->array;
1767     bjlvl_ptr[i] = current_space_lvl->array;
1768 
1769     /* make sure the active row i has diagonal entry */
1770     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1771 
1772     current_space->array               += nzi;
1773     current_space->local_used          += nzi;
1774     current_space->local_remaining     -= nzi;
1775     current_space_lvl->array           += nzi;
1776     current_space_lvl->local_used      += nzi;
1777     current_space_lvl->local_remaining -= nzi;
1778   }
1779 
1780   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1781   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1782   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1783   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1784   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1785 
1786   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1787   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1788   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1789 
1790 #if defined(PETSC_USE_INFO)
1791   {
1792     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1793     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1794     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1795     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1796     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1797     if (diagonal_fill) {
1798       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1799     }
1800   }
1801 #endif
1802   /* put together the new matrix */
1803   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1804   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
1805   b    = (Mat_SeqAIJ*)(fact)->data;
1806 
1807   b->free_a       = PETSC_TRUE;
1808   b->free_ij      = PETSC_TRUE;
1809   b->singlemalloc = PETSC_FALSE;
1810 
1811   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
1812 
1813   b->j    = bj;
1814   b->i    = bi;
1815   b->diag = bdiag;
1816   b->ilen = 0;
1817   b->imax = 0;
1818   b->row  = isrow;
1819   b->col  = iscol;
1820   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1821   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1822   b->icol = isicol;
1823 
1824   ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
1825   /* In b structure:  Free imax, ilen, old a, old j.
1826      Allocate bdiag, solve_work, new a, new j */
1827   ierr     = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1828   b->maxnz = b->nz = bdiag[0]+1;
1829 
1830   (fact)->info.factor_mallocs    = reallocs;
1831   (fact)->info.fill_ratio_given  = f;
1832   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1833   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1834   if (a->inode.size) {
1835     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1836   }
1837   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1842 {
1843   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1844   IS                 isicol;
1845   PetscErrorCode     ierr;
1846   const PetscInt     *r,*ic;
1847   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1848   PetscInt           *bi,*cols,nnz,*cols_lvl;
1849   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1850   PetscInt           i,levels,diagonal_fill;
1851   PetscBool          col_identity,row_identity;
1852   PetscReal          f;
1853   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1854   PetscBT            lnkbt;
1855   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1856   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1857   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1858   PetscBool          missing;
1859 
1860   PetscFunctionBegin;
1861   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1862   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1863   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1864 
1865   f             = info->fill;
1866   levels        = (PetscInt)info->levels;
1867   diagonal_fill = (PetscInt)info->diagonal_fill;
1868 
1869   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1870 
1871   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1872   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1873   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1874     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1875 
1876     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1877     if (a->inode.size) {
1878       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1879     }
1880     fact->factortype               = MAT_FACTOR_ILU;
1881     (fact)->info.factor_mallocs    = 0;
1882     (fact)->info.fill_ratio_given  = info->fill;
1883     (fact)->info.fill_ratio_needed = 1.0;
1884 
1885     b    = (Mat_SeqAIJ*)(fact)->data;
1886     b->row  = isrow;
1887     b->col  = iscol;
1888     b->icol = isicol;
1889     ierr    = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1890     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1891     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1892     PetscFunctionReturn(0);
1893   }
1894 
1895   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1896   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1897 
1898   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1899   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1900   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1901   bi[0] = bdiag[0] = 0;
1902 
1903   ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1904 
1905   /* create a linked list for storing column indices of the active row */
1906   nlnk = n + 1;
1907   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1908 
1909   /* initial FreeSpace size is f*(ai[n]+1) */
1910   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1911   current_space     = free_space;
1912   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1913   current_space_lvl = free_space_lvl;
1914 
1915   for (i=0; i<n; i++) {
1916     nzi = 0;
1917     /* copy current row into linked list */
1918     nnz = ai[r[i]+1] - ai[r[i]];
1919     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1920     cols   = aj + ai[r[i]];
1921     lnk[i] = -1; /* marker to indicate if diagonal exists */
1922     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1923     nzi   += nlnk;
1924 
1925     /* make sure diagonal entry is included */
1926     if (diagonal_fill && lnk[i] == -1) {
1927       fm = n;
1928       while (lnk[fm] < i) fm = lnk[fm];
1929       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1930       lnk[fm]    = i;
1931       lnk_lvl[i] = 0;
1932       nzi++; dcount++;
1933     }
1934 
1935     /* add pivot rows into the active row */
1936     nzbd = 0;
1937     prow = lnk[n];
1938     while (prow < i) {
1939       nnz      = bdiag[prow];
1940       cols     = bj_ptr[prow] + nnz + 1;
1941       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1942       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1943       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1944       nzi     += nlnk;
1945       prow     = lnk[prow];
1946       nzbd++;
1947     }
1948     bdiag[i] = nzbd;
1949     bi[i+1]  = bi[i] + nzi;
1950 
1951     /* if free space is not available, make more free space */
1952     if (current_space->local_remaining<nzi) {
1953       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1954       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1955       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1956       reallocs++;
1957     }
1958 
1959     /* copy data into free_space and free_space_lvl, then initialize lnk */
1960     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1961     bj_ptr[i]    = current_space->array;
1962     bjlvl_ptr[i] = current_space_lvl->array;
1963 
1964     /* make sure the active row i has diagonal entry */
1965     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1966 
1967     current_space->array               += nzi;
1968     current_space->local_used          += nzi;
1969     current_space->local_remaining     -= nzi;
1970     current_space_lvl->array           += nzi;
1971     current_space_lvl->local_used      += nzi;
1972     current_space_lvl->local_remaining -= nzi;
1973   }
1974 
1975   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1976   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1977 
1978   /* destroy list of free space and other temporary arrays */
1979   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1980   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
1981   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1982   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1983   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1984 
1985 #if defined(PETSC_USE_INFO)
1986   {
1987     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1988     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1989     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1990     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1991     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1992     if (diagonal_fill) {
1993       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1994     }
1995   }
1996 #endif
1997 
1998   /* put together the new matrix */
1999   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2000   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
2001   b    = (Mat_SeqAIJ*)(fact)->data;
2002 
2003   b->free_a       = PETSC_TRUE;
2004   b->free_ij      = PETSC_TRUE;
2005   b->singlemalloc = PETSC_FALSE;
2006 
2007   ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr);
2008   b->j = bj;
2009   b->i = bi;
2010   for (i=0; i<n; i++) bdiag[i] += bi[i];
2011   b->diag = bdiag;
2012   b->ilen = 0;
2013   b->imax = 0;
2014   b->row  = isrow;
2015   b->col  = iscol;
2016   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2017   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2018   b->icol = isicol;
2019   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
2020   /* In b structure:  Free imax, ilen, old a, old j.
2021      Allocate bdiag, solve_work, new a, new j */
2022   ierr     = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2023   b->maxnz = b->nz = bi[n];
2024 
2025   (fact)->info.factor_mallocs    = reallocs;
2026   (fact)->info.fill_ratio_given  = f;
2027   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2028   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2029   if (a->inode.size) {
2030     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2036 {
2037   Mat            C = B;
2038   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2039   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2040   IS             ip=b->row,iip = b->icol;
2041   PetscErrorCode ierr;
2042   const PetscInt *rip,*riip;
2043   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2044   PetscInt       *ai=a->i,*aj=a->j;
2045   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2046   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2047   PetscBool      perm_identity;
2048   FactorShiftCtx sctx;
2049   PetscReal      rs;
2050   MatScalar      d,*v;
2051 
2052   PetscFunctionBegin;
2053   /* MatPivotSetUp(): initialize shift context sctx */
2054   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2055 
2056   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2057     sctx.shift_top = info->zeropivot;
2058     for (i=0; i<mbs; i++) {
2059       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2060       d  = (aa)[a->diag[i]];
2061       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2062       v  = aa+ai[i];
2063       nz = ai[i+1] - ai[i];
2064       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2065       if (rs>sctx.shift_top) sctx.shift_top = rs;
2066     }
2067     sctx.shift_top *= 1.1;
2068     sctx.nshift_max = 5;
2069     sctx.shift_lo   = 0.;
2070     sctx.shift_hi   = 1.;
2071   }
2072 
2073   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2074   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2075 
2076   /* allocate working arrays
2077      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2078      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2079   */
2080   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
2081 
2082   do {
2083     sctx.newshift = PETSC_FALSE;
2084 
2085     for (i=0; i<mbs; i++) c2r[i] = mbs;
2086     if (mbs) il[0] = 0;
2087 
2088     for (k = 0; k<mbs; k++) {
2089       /* zero rtmp */
2090       nz    = bi[k+1] - bi[k];
2091       bjtmp = bj + bi[k];
2092       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2093 
2094       /* load in initial unfactored row */
2095       bval = ba + bi[k];
2096       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2097       for (j = jmin; j < jmax; j++) {
2098         col = riip[aj[j]];
2099         if (col >= k) { /* only take upper triangular entry */
2100           rtmp[col] = aa[j];
2101           *bval++   = 0.0; /* for in-place factorization */
2102         }
2103       }
2104       /* shift the diagonal of the matrix: ZeropivotApply() */
2105       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2106 
2107       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2108       dk = rtmp[k];
2109       i  = c2r[k]; /* first row to be added to k_th row  */
2110 
2111       while (i < k) {
2112         nexti = c2r[i]; /* next row to be added to k_th row */
2113 
2114         /* compute multiplier, update diag(k) and U(i,k) */
2115         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2116         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2117         dk     += uikdi*ba[ili]; /* update diag[k] */
2118         ba[ili] = uikdi; /* -U(i,k) */
2119 
2120         /* add multiple of row i to k-th row */
2121         jmin = ili + 1; jmax = bi[i+1];
2122         if (jmin < jmax) {
2123           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2124           /* update il and c2r for row i */
2125           il[i] = jmin;
2126           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2127         }
2128         i = nexti;
2129       }
2130 
2131       /* copy data into U(k,:) */
2132       rs   = 0.0;
2133       jmin = bi[k]; jmax = bi[k+1]-1;
2134       if (jmin < jmax) {
2135         for (j=jmin; j<jmax; j++) {
2136           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2137         }
2138         /* add the k-th row into il and c2r */
2139         il[k] = jmin;
2140         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2141       }
2142 
2143       /* MatPivotCheck() */
2144       sctx.rs = rs;
2145       sctx.pv = dk;
2146       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
2147       if (sctx.newshift) break;
2148       dk = sctx.pv;
2149 
2150       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2151     }
2152   } while (sctx.newshift);
2153 
2154   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2155   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2156   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2157 
2158   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2159   if (perm_identity) {
2160     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2161     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2162     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2163     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2164   } else {
2165     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2166     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2167     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2168     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2169   }
2170 
2171   C->assembled    = PETSC_TRUE;
2172   C->preallocated = PETSC_TRUE;
2173 
2174   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2175 
2176   /* MatPivotView() */
2177   if (sctx.nshift) {
2178     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2179       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
2180     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2181       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2182     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2183       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
2184     }
2185   }
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2190 {
2191   Mat            C = B;
2192   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2193   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2194   IS             ip=b->row,iip = b->icol;
2195   PetscErrorCode ierr;
2196   const PetscInt *rip,*riip;
2197   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2198   PetscInt       *ai=a->i,*aj=a->j;
2199   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2200   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2201   PetscBool      perm_identity;
2202   FactorShiftCtx sctx;
2203   PetscReal      rs;
2204   MatScalar      d,*v;
2205 
2206   PetscFunctionBegin;
2207   /* MatPivotSetUp(): initialize shift context sctx */
2208   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2209 
2210   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2211     sctx.shift_top = info->zeropivot;
2212     for (i=0; i<mbs; i++) {
2213       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2214       d  = (aa)[a->diag[i]];
2215       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2216       v  = aa+ai[i];
2217       nz = ai[i+1] - ai[i];
2218       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2219       if (rs>sctx.shift_top) sctx.shift_top = rs;
2220     }
2221     sctx.shift_top *= 1.1;
2222     sctx.nshift_max = 5;
2223     sctx.shift_lo   = 0.;
2224     sctx.shift_hi   = 1.;
2225   }
2226 
2227   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2228   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2229 
2230   /* initialization */
2231   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
2232 
2233   do {
2234     sctx.newshift = PETSC_FALSE;
2235 
2236     for (i=0; i<mbs; i++) jl[i] = mbs;
2237     il[0] = 0;
2238 
2239     for (k = 0; k<mbs; k++) {
2240       /* zero rtmp */
2241       nz    = bi[k+1] - bi[k];
2242       bjtmp = bj + bi[k];
2243       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2244 
2245       bval = ba + bi[k];
2246       /* initialize k-th row by the perm[k]-th row of A */
2247       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2248       for (j = jmin; j < jmax; j++) {
2249         col = riip[aj[j]];
2250         if (col >= k) { /* only take upper triangular entry */
2251           rtmp[col] = aa[j];
2252           *bval++   = 0.0; /* for in-place factorization */
2253         }
2254       }
2255       /* shift the diagonal of the matrix */
2256       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2257 
2258       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2259       dk = rtmp[k];
2260       i  = jl[k]; /* first row to be added to k_th row  */
2261 
2262       while (i < k) {
2263         nexti = jl[i]; /* next row to be added to k_th row */
2264 
2265         /* compute multiplier, update diag(k) and U(i,k) */
2266         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2267         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2268         dk     += uikdi*ba[ili];
2269         ba[ili] = uikdi; /* -U(i,k) */
2270 
2271         /* add multiple of row i to k-th row */
2272         jmin = ili + 1; jmax = bi[i+1];
2273         if (jmin < jmax) {
2274           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2275           /* update il and jl for row i */
2276           il[i] = jmin;
2277           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2278         }
2279         i = nexti;
2280       }
2281 
2282       /* shift the diagonals when zero pivot is detected */
2283       /* compute rs=sum of abs(off-diagonal) */
2284       rs   = 0.0;
2285       jmin = bi[k]+1;
2286       nz   = bi[k+1] - jmin;
2287       bcol = bj + jmin;
2288       for (j=0; j<nz; j++) {
2289         rs += PetscAbsScalar(rtmp[bcol[j]]);
2290       }
2291 
2292       sctx.rs = rs;
2293       sctx.pv = dk;
2294       ierr    = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr);
2295       if (sctx.newshift) break;
2296       dk = sctx.pv;
2297 
2298       /* copy data into U(k,:) */
2299       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2300       jmin      = bi[k]+1; jmax = bi[k+1];
2301       if (jmin < jmax) {
2302         for (j=jmin; j<jmax; j++) {
2303           col = bj[j]; ba[j] = rtmp[col];
2304         }
2305         /* add the k-th row into il and jl */
2306         il[k] = jmin;
2307         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2308       }
2309     }
2310   } while (sctx.newshift);
2311 
2312   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2313   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2314   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2315 
2316   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2317   if (perm_identity) {
2318     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2319     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2320     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2321     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2322   } else {
2323     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2324     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2325     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2326     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2327   }
2328 
2329   C->assembled    = PETSC_TRUE;
2330   C->preallocated = PETSC_TRUE;
2331 
2332   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2333   if (sctx.nshift) {
2334     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2335       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2336     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2337       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2338     }
2339   }
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 /*
2344    icc() under revised new data structure.
2345    Factored arrays bj and ba are stored as
2346      U(0,:),...,U(i,:),U(n-1,:)
2347 
2348    ui=fact->i is an array of size n+1, in which
2349    ui+
2350      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2351      ui[n]:  points to U(n-1,n-1)+1
2352 
2353   udiag=fact->diag is an array of size n,in which
2354      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2355 
2356    U(i,:) contains udiag[i] as its last entry, i.e.,
2357     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2358 */
2359 
2360 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2361 {
2362   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2363   Mat_SeqSBAIJ       *b;
2364   PetscErrorCode     ierr;
2365   PetscBool          perm_identity,missing;
2366   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2367   const PetscInt     *rip,*riip;
2368   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2369   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2370   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2371   PetscReal          fill          =info->fill,levels=info->levels;
2372   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2373   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2374   PetscBT            lnkbt;
2375   IS                 iperm;
2376 
2377   PetscFunctionBegin;
2378   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2379   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2380   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2381   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2382   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2383 
2384   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2385   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2386   ui[0] = 0;
2387 
2388   /* ICC(0) without matrix ordering: simply rearrange column indices */
2389   if (!levels && perm_identity) {
2390     for (i=0; i<am; i++) {
2391       ncols    = ai[i+1] - a->diag[i];
2392       ui[i+1]  = ui[i] + ncols;
2393       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2394     }
2395     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2396     cols = uj;
2397     for (i=0; i<am; i++) {
2398       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2399       ncols = ai[i+1] - a->diag[i] -1;
2400       for (j=0; j<ncols; j++) *cols++ = aj[j];
2401       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2402     }
2403   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2404     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2405     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2406 
2407     /* initialization */
2408     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2409 
2410     /* jl: linked list for storing indices of the pivot rows
2411        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2412     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2413     for (i=0; i<am; i++) {
2414       jl[i] = am; il[i] = 0;
2415     }
2416 
2417     /* create and initialize a linked list for storing column indices of the active row k */
2418     nlnk = am + 1;
2419     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2420 
2421     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2422     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2423     current_space     = free_space;
2424     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2425     current_space_lvl = free_space_lvl;
2426 
2427     for (k=0; k<am; k++) {  /* for each active row k */
2428       /* initialize lnk by the column indices of row rip[k] of A */
2429       nzk   = 0;
2430       ncols = ai[rip[k]+1] - ai[rip[k]];
2431       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2432       ncols_upper = 0;
2433       for (j=0; j<ncols; j++) {
2434         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2435         if (riip[i] >= k) { /* only take upper triangular entry */
2436           ajtmp[ncols_upper] = i;
2437           ncols_upper++;
2438         }
2439       }
2440       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2441       nzk += nlnk;
2442 
2443       /* update lnk by computing fill-in for each pivot row to be merged in */
2444       prow = jl[k]; /* 1st pivot row */
2445 
2446       while (prow < k) {
2447         nextprow = jl[prow];
2448 
2449         /* merge prow into k-th row */
2450         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2451         jmax  = ui[prow+1];
2452         ncols = jmax-jmin;
2453         i     = jmin - ui[prow];
2454         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2455         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2456         j     = *(uj - 1);
2457         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2458         nzk  += nlnk;
2459 
2460         /* update il and jl for prow */
2461         if (jmin < jmax) {
2462           il[prow] = jmin;
2463           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2464         }
2465         prow = nextprow;
2466       }
2467 
2468       /* if free space is not available, make more free space */
2469       if (current_space->local_remaining<nzk) {
2470         i    = am - k + 1; /* num of unfactored rows */
2471         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2472         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2473         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2474         reallocs++;
2475       }
2476 
2477       /* copy data into free_space and free_space_lvl, then initialize lnk */
2478       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2479       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2480 
2481       /* add the k-th row into il and jl */
2482       if (nzk > 1) {
2483         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2484         jl[k] = jl[i]; jl[i] = k;
2485         il[k] = ui[k] + 1;
2486       }
2487       uj_ptr[k]     = current_space->array;
2488       uj_lvl_ptr[k] = current_space_lvl->array;
2489 
2490       current_space->array           += nzk;
2491       current_space->local_used      += nzk;
2492       current_space->local_remaining -= nzk;
2493 
2494       current_space_lvl->array           += nzk;
2495       current_space_lvl->local_used      += nzk;
2496       current_space_lvl->local_remaining -= nzk;
2497 
2498       ui[k+1] = ui[k] + nzk;
2499     }
2500 
2501     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2502     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2503     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2504     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2505 
2506     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2507     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2508     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2509     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2510     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2511 
2512   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2513 
2514   /* put together the new matrix in MATSEQSBAIJ format */
2515   b               = (Mat_SeqSBAIJ*)(fact)->data;
2516   b->singlemalloc = PETSC_FALSE;
2517 
2518   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2519 
2520   b->j             = uj;
2521   b->i             = ui;
2522   b->diag          = udiag;
2523   b->free_diag     = PETSC_TRUE;
2524   b->ilen          = 0;
2525   b->imax          = 0;
2526   b->row           = perm;
2527   b->col           = perm;
2528   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2529   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2530   b->icol          = iperm;
2531   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2532 
2533   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2534   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2535 
2536   b->maxnz   = b->nz = ui[am];
2537   b->free_a  = PETSC_TRUE;
2538   b->free_ij = PETSC_TRUE;
2539 
2540   fact->info.factor_mallocs   = reallocs;
2541   fact->info.fill_ratio_given = fill;
2542   if (ai[am] != 0) {
2543     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2544     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2545   } else {
2546     fact->info.fill_ratio_needed = 0.0;
2547   }
2548 #if defined(PETSC_USE_INFO)
2549   if (ai[am] != 0) {
2550     PetscReal af = fact->info.fill_ratio_needed;
2551     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2552     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2553     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2554   } else {
2555     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2556   }
2557 #endif
2558   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2563 {
2564   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2565   Mat_SeqSBAIJ       *b;
2566   PetscErrorCode     ierr;
2567   PetscBool          perm_identity,missing;
2568   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2569   const PetscInt     *rip,*riip;
2570   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2571   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2572   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2573   PetscReal          fill          =info->fill,levels=info->levels;
2574   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2575   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2576   PetscBT            lnkbt;
2577   IS                 iperm;
2578 
2579   PetscFunctionBegin;
2580   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2581   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2582   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2583   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2584   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2585 
2586   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2587   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2588   ui[0] = 0;
2589 
2590   /* ICC(0) without matrix ordering: simply copies fill pattern */
2591   if (!levels && perm_identity) {
2592 
2593     for (i=0; i<am; i++) {
2594       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2595       udiag[i] = ui[i];
2596     }
2597     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2598     cols = uj;
2599     for (i=0; i<am; i++) {
2600       aj    = a->j + a->diag[i];
2601       ncols = ui[i+1] - ui[i];
2602       for (j=0; j<ncols; j++) *cols++ = *aj++;
2603     }
2604   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2605     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2606     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2607 
2608     /* initialization */
2609     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2610 
2611     /* jl: linked list for storing indices of the pivot rows
2612        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2613     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2614     for (i=0; i<am; i++) {
2615       jl[i] = am; il[i] = 0;
2616     }
2617 
2618     /* create and initialize a linked list for storing column indices of the active row k */
2619     nlnk = am + 1;
2620     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2621 
2622     /* initial FreeSpace size is fill*(ai[am]+1) */
2623     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2624     current_space     = free_space;
2625     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2626     current_space_lvl = free_space_lvl;
2627 
2628     for (k=0; k<am; k++) {  /* for each active row k */
2629       /* initialize lnk by the column indices of row rip[k] of A */
2630       nzk   = 0;
2631       ncols = ai[rip[k]+1] - ai[rip[k]];
2632       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2633       ncols_upper = 0;
2634       for (j=0; j<ncols; j++) {
2635         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2636         if (riip[i] >= k) { /* only take upper triangular entry */
2637           ajtmp[ncols_upper] = i;
2638           ncols_upper++;
2639         }
2640       }
2641       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2642       nzk += nlnk;
2643 
2644       /* update lnk by computing fill-in for each pivot row to be merged in */
2645       prow = jl[k]; /* 1st pivot row */
2646 
2647       while (prow < k) {
2648         nextprow = jl[prow];
2649 
2650         /* merge prow into k-th row */
2651         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2652         jmax  = ui[prow+1];
2653         ncols = jmax-jmin;
2654         i     = jmin - ui[prow];
2655         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2656         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2657         j     = *(uj - 1);
2658         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2659         nzk  += nlnk;
2660 
2661         /* update il and jl for prow */
2662         if (jmin < jmax) {
2663           il[prow] = jmin;
2664           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2665         }
2666         prow = nextprow;
2667       }
2668 
2669       /* if free space is not available, make more free space */
2670       if (current_space->local_remaining<nzk) {
2671         i    = am - k + 1; /* num of unfactored rows */
2672         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2673         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2674         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2675         reallocs++;
2676       }
2677 
2678       /* copy data into free_space and free_space_lvl, then initialize lnk */
2679       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2680       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2681 
2682       /* add the k-th row into il and jl */
2683       if (nzk > 1) {
2684         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2685         jl[k] = jl[i]; jl[i] = k;
2686         il[k] = ui[k] + 1;
2687       }
2688       uj_ptr[k]     = current_space->array;
2689       uj_lvl_ptr[k] = current_space_lvl->array;
2690 
2691       current_space->array           += nzk;
2692       current_space->local_used      += nzk;
2693       current_space->local_remaining -= nzk;
2694 
2695       current_space_lvl->array           += nzk;
2696       current_space_lvl->local_used      += nzk;
2697       current_space_lvl->local_remaining -= nzk;
2698 
2699       ui[k+1] = ui[k] + nzk;
2700     }
2701 
2702 #if defined(PETSC_USE_INFO)
2703     if (ai[am] != 0) {
2704       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2705       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2706       ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2707       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2708     } else {
2709       ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2710     }
2711 #endif
2712 
2713     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2714     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2715     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2716     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2717 
2718     /* destroy list of free space and other temporary array(s) */
2719     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2720     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2721     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2722     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2723 
2724   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2725 
2726   /* put together the new matrix in MATSEQSBAIJ format */
2727 
2728   b               = (Mat_SeqSBAIJ*)fact->data;
2729   b->singlemalloc = PETSC_FALSE;
2730 
2731   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2732 
2733   b->j         = uj;
2734   b->i         = ui;
2735   b->diag      = udiag;
2736   b->free_diag = PETSC_TRUE;
2737   b->ilen      = 0;
2738   b->imax      = 0;
2739   b->row       = perm;
2740   b->col       = perm;
2741 
2742   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2743   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2744 
2745   b->icol          = iperm;
2746   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2747   ierr             = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2748   ierr             = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2749   b->maxnz         = b->nz = ui[am];
2750   b->free_a        = PETSC_TRUE;
2751   b->free_ij       = PETSC_TRUE;
2752 
2753   fact->info.factor_mallocs   = reallocs;
2754   fact->info.fill_ratio_given = fill;
2755   if (ai[am] != 0) {
2756     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2757   } else {
2758     fact->info.fill_ratio_needed = 0.0;
2759   }
2760   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2765 {
2766   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2767   Mat_SeqSBAIJ       *b;
2768   PetscErrorCode     ierr;
2769   PetscBool          perm_identity,missing;
2770   PetscReal          fill = info->fill;
2771   const PetscInt     *rip,*riip;
2772   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2773   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2774   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2775   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2776   PetscBT            lnkbt;
2777   IS                 iperm;
2778 
2779   PetscFunctionBegin;
2780   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2781   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2782   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2783 
2784   /* check whether perm is the identity mapping */
2785   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2786   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2787   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2788   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2789 
2790   /* initialization */
2791   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2792   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2793   ui[0] = 0;
2794 
2795   /* jl: linked list for storing indices of the pivot rows
2796      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2797   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2798   for (i=0; i<am; i++) {
2799     jl[i] = am; il[i] = 0;
2800   }
2801 
2802   /* create and initialize a linked list for storing column indices of the active row k */
2803   nlnk = am + 1;
2804   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2805 
2806   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2807   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2808   current_space = free_space;
2809 
2810   for (k=0; k<am; k++) {  /* for each active row k */
2811     /* initialize lnk by the column indices of row rip[k] of A */
2812     nzk   = 0;
2813     ncols = ai[rip[k]+1] - ai[rip[k]];
2814     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2815     ncols_upper = 0;
2816     for (j=0; j<ncols; j++) {
2817       i = riip[*(aj + ai[rip[k]] + j)];
2818       if (i >= k) { /* only take upper triangular entry */
2819         cols[ncols_upper] = i;
2820         ncols_upper++;
2821       }
2822     }
2823     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2824     nzk += nlnk;
2825 
2826     /* update lnk by computing fill-in for each pivot row to be merged in */
2827     prow = jl[k]; /* 1st pivot row */
2828 
2829     while (prow < k) {
2830       nextprow = jl[prow];
2831       /* merge prow into k-th row */
2832       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2833       jmax   = ui[prow+1];
2834       ncols  = jmax-jmin;
2835       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2836       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2837       nzk   += nlnk;
2838 
2839       /* update il and jl for prow */
2840       if (jmin < jmax) {
2841         il[prow] = jmin;
2842         j        = *uj_ptr;
2843         jl[prow] = jl[j];
2844         jl[j]    = prow;
2845       }
2846       prow = nextprow;
2847     }
2848 
2849     /* if free space is not available, make more free space */
2850     if (current_space->local_remaining<nzk) {
2851       i    = am - k + 1; /* num of unfactored rows */
2852       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2853       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2854       reallocs++;
2855     }
2856 
2857     /* copy data into free space, then initialize lnk */
2858     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2859 
2860     /* add the k-th row into il and jl */
2861     if (nzk > 1) {
2862       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2863       jl[k] = jl[i]; jl[i] = k;
2864       il[k] = ui[k] + 1;
2865     }
2866     ui_ptr[k] = current_space->array;
2867 
2868     current_space->array           += nzk;
2869     current_space->local_used      += nzk;
2870     current_space->local_remaining -= nzk;
2871 
2872     ui[k+1] = ui[k] + nzk;
2873   }
2874 
2875   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2876   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2877   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2878 
2879   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2880   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2881   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2882   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2883 
2884   /* put together the new matrix in MATSEQSBAIJ format */
2885 
2886   b               = (Mat_SeqSBAIJ*)fact->data;
2887   b->singlemalloc = PETSC_FALSE;
2888   b->free_a       = PETSC_TRUE;
2889   b->free_ij      = PETSC_TRUE;
2890 
2891   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2892 
2893   b->j         = uj;
2894   b->i         = ui;
2895   b->diag      = udiag;
2896   b->free_diag = PETSC_TRUE;
2897   b->ilen      = 0;
2898   b->imax      = 0;
2899   b->row       = perm;
2900   b->col       = perm;
2901 
2902   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2903   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2904 
2905   b->icol          = iperm;
2906   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2907 
2908   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2909   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2910 
2911   b->maxnz = b->nz = ui[am];
2912 
2913   fact->info.factor_mallocs   = reallocs;
2914   fact->info.fill_ratio_given = fill;
2915   if (ai[am] != 0) {
2916     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2917     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2918   } else {
2919     fact->info.fill_ratio_needed = 0.0;
2920   }
2921 #if defined(PETSC_USE_INFO)
2922   if (ai[am] != 0) {
2923     PetscReal af = fact->info.fill_ratio_needed;
2924     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2925     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2926     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2927   } else {
2928     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2929   }
2930 #endif
2931   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2936 {
2937   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2938   Mat_SeqSBAIJ       *b;
2939   PetscErrorCode     ierr;
2940   PetscBool          perm_identity,missing;
2941   PetscReal          fill = info->fill;
2942   const PetscInt     *rip,*riip;
2943   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2944   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2945   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2946   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2947   PetscBT            lnkbt;
2948   IS                 iperm;
2949 
2950   PetscFunctionBegin;
2951   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2952   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2953   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2954 
2955   /* check whether perm is the identity mapping */
2956   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2957   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2958   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2959   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2960 
2961   /* initialization */
2962   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2963   ui[0] = 0;
2964 
2965   /* jl: linked list for storing indices of the pivot rows
2966      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2967   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2968   for (i=0; i<am; i++) {
2969     jl[i] = am; il[i] = 0;
2970   }
2971 
2972   /* create and initialize a linked list for storing column indices of the active row k */
2973   nlnk = am + 1;
2974   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2975 
2976   /* initial FreeSpace size is fill*(ai[am]+1) */
2977   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2978   current_space = free_space;
2979 
2980   for (k=0; k<am; k++) {  /* for each active row k */
2981     /* initialize lnk by the column indices of row rip[k] of A */
2982     nzk   = 0;
2983     ncols = ai[rip[k]+1] - ai[rip[k]];
2984     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2985     ncols_upper = 0;
2986     for (j=0; j<ncols; j++) {
2987       i = riip[*(aj + ai[rip[k]] + j)];
2988       if (i >= k) { /* only take upper triangular entry */
2989         cols[ncols_upper] = i;
2990         ncols_upper++;
2991       }
2992     }
2993     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2994     nzk += nlnk;
2995 
2996     /* update lnk by computing fill-in for each pivot row to be merged in */
2997     prow = jl[k]; /* 1st pivot row */
2998 
2999     while (prow < k) {
3000       nextprow = jl[prow];
3001       /* merge prow into k-th row */
3002       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3003       jmax   = ui[prow+1];
3004       ncols  = jmax-jmin;
3005       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3006       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3007       nzk   += nlnk;
3008 
3009       /* update il and jl for prow */
3010       if (jmin < jmax) {
3011         il[prow] = jmin;
3012         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3013       }
3014       prow = nextprow;
3015     }
3016 
3017     /* if free space is not available, make more free space */
3018     if (current_space->local_remaining<nzk) {
3019       i    = am - k + 1; /* num of unfactored rows */
3020       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3021       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3022       reallocs++;
3023     }
3024 
3025     /* copy data into free space, then initialize lnk */
3026     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3027 
3028     /* add the k-th row into il and jl */
3029     if (nzk-1 > 0) {
3030       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3031       jl[k] = jl[i]; jl[i] = k;
3032       il[k] = ui[k] + 1;
3033     }
3034     ui_ptr[k] = current_space->array;
3035 
3036     current_space->array           += nzk;
3037     current_space->local_used      += nzk;
3038     current_space->local_remaining -= nzk;
3039 
3040     ui[k+1] = ui[k] + nzk;
3041   }
3042 
3043 #if defined(PETSC_USE_INFO)
3044   if (ai[am] != 0) {
3045     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3046     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
3047     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
3048     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
3049   } else {
3050     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
3051   }
3052 #endif
3053 
3054   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3055   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3056   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3057 
3058   /* destroy list of free space and other temporary array(s) */
3059   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
3060   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3061   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3062 
3063   /* put together the new matrix in MATSEQSBAIJ format */
3064 
3065   b               = (Mat_SeqSBAIJ*)fact->data;
3066   b->singlemalloc = PETSC_FALSE;
3067   b->free_a       = PETSC_TRUE;
3068   b->free_ij      = PETSC_TRUE;
3069 
3070   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
3071 
3072   b->j    = uj;
3073   b->i    = ui;
3074   b->diag = 0;
3075   b->ilen = 0;
3076   b->imax = 0;
3077   b->row  = perm;
3078   b->col  = perm;
3079 
3080   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3081   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3082 
3083   b->icol          = iperm;
3084   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3085 
3086   ierr     = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
3087   ierr     = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3088   b->maxnz = b->nz = ui[am];
3089 
3090   fact->info.factor_mallocs   = reallocs;
3091   fact->info.fill_ratio_given = fill;
3092   if (ai[am] != 0) {
3093     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3094   } else {
3095     fact->info.fill_ratio_needed = 0.0;
3096   }
3097   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3102 {
3103   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3104   PetscErrorCode    ierr;
3105   PetscInt          n   = A->rmap->n;
3106   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3107   PetscScalar       *x,sum;
3108   const PetscScalar *b;
3109   const MatScalar   *aa = a->a,*v;
3110   PetscInt          i,nz;
3111 
3112   PetscFunctionBegin;
3113   if (!n) PetscFunctionReturn(0);
3114 
3115   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3116   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
3117 
3118   /* forward solve the lower triangular */
3119   x[0] = b[0];
3120   v    = aa;
3121   vi   = aj;
3122   for (i=1; i<n; i++) {
3123     nz  = ai[i+1] - ai[i];
3124     sum = b[i];
3125     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3126     v   += nz;
3127     vi  += nz;
3128     x[i] = sum;
3129   }
3130 
3131   /* backward solve the upper triangular */
3132   for (i=n-1; i>=0; i--) {
3133     v   = aa + adiag[i+1] + 1;
3134     vi  = aj + adiag[i+1] + 1;
3135     nz  = adiag[i] - adiag[i+1]-1;
3136     sum = x[i];
3137     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3138     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3139   }
3140 
3141   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3142   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3143   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
3144   PetscFunctionReturn(0);
3145 }
3146 
3147 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3148 {
3149   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3150   IS                iscol = a->col,isrow = a->row;
3151   PetscErrorCode    ierr;
3152   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3153   const PetscInt    *rout,*cout,*r,*c;
3154   PetscScalar       *x,*tmp,sum;
3155   const PetscScalar *b;
3156   const MatScalar   *aa = a->a,*v;
3157 
3158   PetscFunctionBegin;
3159   if (!n) PetscFunctionReturn(0);
3160 
3161   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3162   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
3163   tmp  = a->solve_work;
3164 
3165   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3166   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3167 
3168   /* forward solve the lower triangular */
3169   tmp[0] = b[r[0]];
3170   v      = aa;
3171   vi     = aj;
3172   for (i=1; i<n; i++) {
3173     nz  = ai[i+1] - ai[i];
3174     sum = b[r[i]];
3175     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3176     tmp[i] = sum;
3177     v     += nz; vi += nz;
3178   }
3179 
3180   /* backward solve the upper triangular */
3181   for (i=n-1; i>=0; i--) {
3182     v   = aa + adiag[i+1]+1;
3183     vi  = aj + adiag[i+1]+1;
3184     nz  = adiag[i]-adiag[i+1]-1;
3185     sum = tmp[i];
3186     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3187     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3188   }
3189 
3190   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3191   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3192   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3193   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
3194   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 /*
3199     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3200 */
3201 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3202 {
3203   Mat            B = *fact;
3204   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3205   IS             isicol;
3206   PetscErrorCode ierr;
3207   const PetscInt *r,*ic;
3208   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3209   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3210   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3211   PetscInt       nlnk,*lnk;
3212   PetscBT        lnkbt;
3213   PetscBool      row_identity,icol_identity;
3214   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3215   const PetscInt *ics;
3216   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3217   PetscReal      dt     =info->dt,shift=info->shiftamount;
3218   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3219   PetscBool      missing;
3220 
3221   PetscFunctionBegin;
3222   if (dt      == PETSC_DEFAULT) dt = 0.005;
3223   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3224 
3225   /* ------- symbolic factorization, can be reused ---------*/
3226   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3227   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3228   adiag=a->diag;
3229 
3230   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3231 
3232   /* bdiag is location of diagonal in factor */
3233   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3234   ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */
3235 
3236   /* allocate row pointers bi */
3237   ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr);
3238 
3239   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3240   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3241   nnz_max = ai[n]+2*n*dtcount+2;
3242 
3243   ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr);
3244   ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr);
3245 
3246   /* put together the new matrix */
3247   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3248   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
3249   b    = (Mat_SeqAIJ*)B->data;
3250 
3251   b->free_a       = PETSC_TRUE;
3252   b->free_ij      = PETSC_TRUE;
3253   b->singlemalloc = PETSC_FALSE;
3254 
3255   b->a    = ba;
3256   b->j    = bj;
3257   b->i    = bi;
3258   b->diag = bdiag;
3259   b->ilen = 0;
3260   b->imax = 0;
3261   b->row  = isrow;
3262   b->col  = iscol;
3263   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3264   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3265   b->icol = isicol;
3266 
3267   ierr     = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
3268   ierr     = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3269   b->maxnz = nnz_max;
3270 
3271   B->factortype            = MAT_FACTOR_ILUDT;
3272   B->info.factor_mallocs   = 0;
3273   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3274   /* ------- end of symbolic factorization ---------*/
3275 
3276   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3277   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3278   ics  = ic;
3279 
3280   /* linked list for storing column indices of the active row */
3281   nlnk = n + 1;
3282   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3283 
3284   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3285   ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr);
3286   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3287   ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr);
3288   ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr);
3289 
3290   bi[0]        = 0;
3291   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3292   bdiag_rev[n] = bdiag[0];
3293   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3294   for (i=0; i<n; i++) {
3295     /* copy initial fill into linked list */
3296     nzi = ai[r[i]+1] - ai[r[i]];
3297     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3298     nzi_al = adiag[r[i]] - ai[r[i]];
3299     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3300     ajtmp  = aj + ai[r[i]];
3301     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3302 
3303     /* load in initial (unfactored row) */
3304     aatmp = a->a + ai[r[i]];
3305     for (j=0; j<nzi; j++) {
3306       rtmp[ics[*ajtmp++]] = *aatmp++;
3307     }
3308 
3309     /* add pivot rows into linked list */
3310     row = lnk[n];
3311     while (row < i) {
3312       nzi_bl = bi[row+1] - bi[row] + 1;
3313       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3314       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3315       nzi   += nlnk;
3316       row    = lnk[row];
3317     }
3318 
3319     /* copy data from lnk into jtmp, then initialize lnk */
3320     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3321 
3322     /* numerical factorization */
3323     bjtmp = jtmp;
3324     row   = *bjtmp++; /* 1st pivot row */
3325     while (row < i) {
3326       pc         = rtmp + row;
3327       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3328       multiplier = (*pc) * (*pv);
3329       *pc        = multiplier;
3330       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3331         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3332         pv = ba + bdiag[row+1] + 1;
3333         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3334         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3335         ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
3336       }
3337       row = *bjtmp++;
3338     }
3339 
3340     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3341     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3342     nzi_bl   = 0; j = 0;
3343     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3344       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3345       nzi_bl++; j++;
3346     }
3347     nzi_bu = nzi - nzi_bl -1;
3348     while (j < nzi) {
3349       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3350       j++;
3351     }
3352 
3353     bjtmp = bj + bi[i];
3354     batmp = ba + bi[i];
3355     /* apply level dropping rule to L part */
3356     ncut = nzi_al + dtcount;
3357     if (ncut < nzi_bl) {
3358       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3359       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3360     } else {
3361       ncut = nzi_bl;
3362     }
3363     for (j=0; j<ncut; j++) {
3364       bjtmp[j] = jtmp[j];
3365       batmp[j] = vtmp[j];
3366     }
3367     bi[i+1] = bi[i] + ncut;
3368     nzi     = ncut + 1;
3369 
3370     /* apply level dropping rule to U part */
3371     ncut = nzi_au + dtcount;
3372     if (ncut < nzi_bu) {
3373       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3374       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3375     } else {
3376       ncut = nzi_bu;
3377     }
3378     nzi += ncut;
3379 
3380     /* mark bdiagonal */
3381     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3382     bdiag_rev[n-i-1] = bdiag[i+1];
3383     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3384     bjtmp            = bj + bdiag[i];
3385     batmp            = ba + bdiag[i];
3386     *bjtmp           = i;
3387     *batmp           = diag_tmp; /* rtmp[i]; */
3388     if (*batmp == 0.0) {
3389       *batmp = dt+shift;
3390     }
3391     *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3392 
3393     bjtmp = bj + bdiag[i+1]+1;
3394     batmp = ba + bdiag[i+1]+1;
3395     for (k=0; k<ncut; k++) {
3396       bjtmp[k] = jtmp[nzi_bl+1+k];
3397       batmp[k] = vtmp[nzi_bl+1+k];
3398     }
3399 
3400     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3401   } /* for (i=0; i<n; i++) */
3402   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3403 
3404   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3405   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3406 
3407   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3408   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3409   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3410   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3411 
3412   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3413   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3414 
3415   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3416   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3417   if (row_identity && icol_identity) {
3418     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3419   } else {
3420     B->ops->solve = MatSolve_SeqAIJ;
3421   }
3422 
3423   B->ops->solveadd          = 0;
3424   B->ops->solvetranspose    = 0;
3425   B->ops->solvetransposeadd = 0;
3426   B->ops->matsolve          = 0;
3427   B->assembled              = PETSC_TRUE;
3428   B->preallocated           = PETSC_TRUE;
3429   PetscFunctionReturn(0);
3430 }
3431 
3432 /* a wraper of MatILUDTFactor_SeqAIJ() */
3433 /*
3434     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3435 */
3436 
3437 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3438 {
3439   PetscErrorCode ierr;
3440 
3441   PetscFunctionBegin;
3442   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 /*
3447    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3448    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3449 */
3450 /*
3451     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3452 */
3453 
3454 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3455 {
3456   Mat            C     =fact;
3457   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3458   IS             isrow = b->row,isicol = b->icol;
3459   PetscErrorCode ierr;
3460   const PetscInt *r,*ic,*ics;
3461   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3462   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3463   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3464   PetscReal      dt=info->dt,shift=info->shiftamount;
3465   PetscBool      row_identity, col_identity;
3466 
3467   PetscFunctionBegin;
3468   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3469   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3470   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
3471   ics  = ic;
3472 
3473   for (i=0; i<n; i++) {
3474     /* initialize rtmp array */
3475     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3476     bjtmp = bj + bi[i];
3477     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3478     rtmp[i] = 0.0;
3479     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3480     bjtmp   = bj + bdiag[i+1] + 1;
3481     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3482 
3483     /* load in initial unfactored row of A */
3484     nz    = ai[r[i]+1] - ai[r[i]];
3485     ajtmp = aj + ai[r[i]];
3486     v     = aa + ai[r[i]];
3487     for (j=0; j<nz; j++) {
3488       rtmp[ics[*ajtmp++]] = v[j];
3489     }
3490 
3491     /* numerical factorization */
3492     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3493     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3494     k     = 0;
3495     while (k < nzl) {
3496       row        = *bjtmp++;
3497       pc         = rtmp + row;
3498       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3499       multiplier = (*pc) * (*pv);
3500       *pc        = multiplier;
3501       if (PetscAbsScalar(multiplier) > dt) {
3502         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3503         pv = b->a + bdiag[row+1] + 1;
3504         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3505         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3506         ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
3507       }
3508       k++;
3509     }
3510 
3511     /* finished row so stick it into b->a */
3512     /* L-part */
3513     pv  = b->a + bi[i];
3514     pj  = bj + bi[i];
3515     nzl = bi[i+1] - bi[i];
3516     for (j=0; j<nzl; j++) {
3517       pv[j] = rtmp[pj[j]];
3518     }
3519 
3520     /* diagonal: invert diagonal entries for simplier triangular solves */
3521     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3522     b->a[bdiag[i]] = 1.0/rtmp[i];
3523 
3524     /* U-part */
3525     pv  = b->a + bdiag[i+1] + 1;
3526     pj  = bj + bdiag[i+1] + 1;
3527     nzu = bdiag[i] - bdiag[i+1] - 1;
3528     for (j=0; j<nzu; j++) {
3529       pv[j] = rtmp[pj[j]];
3530     }
3531   }
3532 
3533   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3534   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3535   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3536 
3537   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3538   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3539   if (row_identity && col_identity) {
3540     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3541   } else {
3542     C->ops->solve = MatSolve_SeqAIJ;
3543   }
3544   C->ops->solveadd          = 0;
3545   C->ops->solvetranspose    = 0;
3546   C->ops->solvetransposeadd = 0;
3547   C->ops->matsolve          = 0;
3548   C->assembled              = PETSC_TRUE;
3549   C->preallocated           = PETSC_TRUE;
3550 
3551   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3552   PetscFunctionReturn(0);
3553 }
3554