xref: /petsc/src/mat/impls/aij/seq/aijfact.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <../src/mat/impls/sbaij/seq/sbaij.h>
4 #include <petscbt.h>
5 #include <../src/mat/utils/freespace.h>
6 
7 /*
8       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9 
10       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
11 */
12 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol)
13 {
14   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)mat->data;
15   PetscErrorCode    ierr;
16   PetscInt          i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
17   const PetscInt    *ai = a->i, *aj = a->j;
18   const PetscScalar *aa = a->a;
19   PetscBool         *done;
20   PetscReal         best,past = 0,future;
21 
22   PetscFunctionBegin;
23   /* pick initial row */
24   best = -1;
25   for (i=0; i<n; i++) {
26     future = 0.0;
27     for (j=ai[i]; j<ai[i+1]; j++) {
28       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
29       else              past  = PetscAbsScalar(aa[j]);
30     }
31     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
32     if (past/future > best) {
33       best    = past/future;
34       current = i;
35     }
36   }
37 
38   ierr     = PetscMalloc1(n,&done);CHKERRQ(ierr);
39   ierr     = PetscArrayzero(done,n);CHKERRQ(ierr);
40   ierr     = PetscMalloc1(n,&order);CHKERRQ(ierr);
41   order[0] = current;
42   for (i=0; i<n-1; i++) {
43     done[current] = PETSC_TRUE;
44     best          = -1;
45     /* loop over all neighbors of current pivot */
46     for (j=ai[current]; j<ai[current+1]; j++) {
47       jj = aj[j];
48       if (done[jj]) continue;
49       /* loop over columns of potential next row computing weights for below and above diagonal */
50       past = future = 0.0;
51       for (k=ai[jj]; k<ai[jj+1]; k++) {
52         kk = aj[k];
53         if (done[kk]) past += PetscAbsScalar(aa[k]);
54         else if (kk != jj) future += PetscAbsScalar(aa[k]);
55       }
56       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
57       if (past/future > best) {
58         best       = past/future;
59         newcurrent = jj;
60       }
61     }
62     if (best == -1) { /* no neighbors to select from so select best of all that remain */
63       best = -1;
64       for (k=0; k<n; k++) {
65         if (done[k]) continue;
66         future = 0.0;
67         past   = 0.0;
68         for (j=ai[k]; j<ai[k+1]; j++) {
69           kk = aj[j];
70           if (done[kk])       past += PetscAbsScalar(aa[j]);
71           else if (kk != k) future += PetscAbsScalar(aa[j]);
72         }
73         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
74         if (past/future > best) {
75           best       = past/future;
76           newcurrent = k;
77         }
78       }
79     }
80     if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current");
81     current    = newcurrent;
82     order[i+1] = current;
83   }
84   ierr  = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr);
85   *icol = *irow;
86   ierr  = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr);
87   ierr  = PetscFree(done);CHKERRQ(ierr);
88   ierr  = PetscFree(order);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 static PetscErrorCode MatFactorGetSolverType_petsc(Mat A,MatSolverType *type)
93 {
94   PetscFunctionBegin;
95   *type = MATSOLVERPETSC;
96   PetscFunctionReturn(0);
97 }
98 
99 PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
100 {
101   PetscInt       n = A->rmap->n;
102   PetscErrorCode ierr;
103 
104   PetscFunctionBegin;
105 #if defined(PETSC_USE_COMPLEX)
106   if (A->hermitian && !A->symmetric && (ftype == MAT_FACTOR_CHOLESKY||ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY or ICC Factor is not supported");
107 #endif
108   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
109   ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr);
110   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
111     ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr);
112 
113     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
114     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;
115 
116     ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr);
117     ierr = PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
118     ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILU]);CHKERRQ(ierr);
119     ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ILUDT]);CHKERRQ(ierr);
120   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
121     ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr);
122     ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
123 
124     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
125     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126     ierr = PetscStrallocpy(MATORDERINGND,(char**)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
127     ierr = PetscStrallocpy(MATORDERINGNATURAL,(char**)&(*B)->preferredordering[MAT_FACTOR_ICC]);CHKERRQ(ierr);
128   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
129   (*B)->factortype = ftype;
130 
131   ierr = PetscFree((*B)->solvertype);CHKERRQ(ierr);
132   ierr = PetscStrallocpy(MATSOLVERPETSC,&(*B)->solvertype);CHKERRQ(ierr);
133   (*B)->canuseordering = PETSC_TRUE;
134   ierr = PetscObjectComposeFunction((PetscObject)*B,"MatFactorGetSolverType_C",MatFactorGetSolverType_petsc);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
139 {
140   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
141   IS                 isicol;
142   PetscErrorCode     ierr;
143   const PetscInt     *r,*ic;
144   PetscInt           i,n=A->rmap->n,*ai=a->i,*aj=a->j;
145   PetscInt           *bi,*bj,*ajtmp;
146   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
147   PetscReal          f;
148   PetscInt           nlnk,*lnk,k,**bi_ptr;
149   PetscFreeSpaceList free_space=NULL,current_space=NULL;
150   PetscBT            lnkbt;
151   PetscBool          missing;
152 
153   PetscFunctionBegin;
154   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
155   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
156   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
157 
158   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
159   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
160   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
161 
162   /* get new row pointers */
163   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
164   bi[0] = 0;
165 
166   /* bdiag is location of diagonal in factor */
167   ierr     = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
168   bdiag[0] = 0;
169 
170   /* linked list for storing column indices of the active row */
171   nlnk = n + 1;
172   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
173 
174   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
175 
176   /* initial FreeSpace size is f*(ai[n]+1) */
177   f             = info->fill;
178   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
179   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
180   current_space = free_space;
181 
182   for (i=0; i<n; i++) {
183     /* copy previous fill into linked list */
184     nzi = 0;
185     nnz = ai[r[i]+1] - ai[r[i]];
186     ajtmp = aj + ai[r[i]];
187     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
188     nzi  += nlnk;
189 
190     /* add pivot rows into linked list */
191     row = lnk[n];
192     while (row < i) {
193       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
194       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
195       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
196       nzi  += nlnk;
197       row   = lnk[row];
198     }
199     bi[i+1] = bi[i] + nzi;
200     im[i]   = nzi;
201 
202     /* mark bdiag */
203     nzbd = 0;
204     nnz  = nzi;
205     k    = lnk[n];
206     while (nnz-- && k < i) {
207       nzbd++;
208       k = lnk[k];
209     }
210     bdiag[i] = bi[i] + nzbd;
211 
212     /* if free space is not available, make more free space */
213     if (current_space->local_remaining<nzi) {
214       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
215       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
216       reallocs++;
217     }
218 
219     /* copy data into free space, then initialize lnk */
220     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
221 
222     bi_ptr[i]                       = current_space->array;
223     current_space->array           += nzi;
224     current_space->local_used      += nzi;
225     current_space->local_remaining -= nzi;
226   }
227 #if defined(PETSC_USE_INFO)
228   if (ai[n] != 0) {
229     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
230     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
231     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
232     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
233     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
234   } else {
235     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
236   }
237 #endif
238 
239   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
240   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
241 
242   /* destroy list of free space and other temporary array(s) */
243   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
244   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
245   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
246   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
247 
248   /* put together the new matrix */
249   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
250   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
251   b    = (Mat_SeqAIJ*)(B)->data;
252 
253   b->free_a       = PETSC_TRUE;
254   b->free_ij      = PETSC_TRUE;
255   b->singlemalloc = PETSC_FALSE;
256 
257   ierr    = PetscMalloc1(bi[n]+1,&b->a);CHKERRQ(ierr);
258   b->j    = bj;
259   b->i    = bi;
260   b->diag = bdiag;
261   b->ilen = NULL;
262   b->imax = NULL;
263   b->row  = isrow;
264   b->col  = iscol;
265   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
266   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
267   b->icol = isicol;
268   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
269 
270   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271   ierr     = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
272   b->maxnz = b->nz = bi[n];
273 
274   (B)->factortype            = MAT_FACTOR_LU;
275   (B)->info.factor_mallocs   = reallocs;
276   (B)->info.fill_ratio_given = f;
277 
278   if (ai[n]) {
279     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
280   } else {
281     (B)->info.fill_ratio_needed = 0.0;
282   }
283   (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
284   if (a->inode.size) {
285     (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
286   }
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
291 {
292   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
293   IS                 isicol;
294   PetscErrorCode     ierr;
295   const PetscInt     *r,*ic,*ai=a->i,*aj=a->j,*ajtmp;
296   PetscInt           i,n=A->rmap->n;
297   PetscInt           *bi,*bj;
298   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
299   PetscReal          f;
300   PetscInt           nlnk,*lnk,k,**bi_ptr;
301   PetscFreeSpaceList free_space=NULL,current_space=NULL;
302   PetscBT            lnkbt;
303   PetscBool          missing;
304 
305   PetscFunctionBegin;
306   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
307   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
308   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
309 
310   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
311   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
312   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
313 
314   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
315   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
316   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
317   bi[0] = bdiag[0] = 0;
318 
319   /* linked list for storing column indices of the active row */
320   nlnk = n + 1;
321   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
322 
323   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
324 
325   /* initial FreeSpace size is f*(ai[n]+1) */
326   f             = info->fill;
327   if (n==1)   f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
328   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
329   current_space = free_space;
330 
331   for (i=0; i<n; i++) {
332     /* copy previous fill into linked list */
333     nzi = 0;
334     nnz = ai[r[i]+1] - ai[r[i]];
335     ajtmp = aj + ai[r[i]];
336     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
337     nzi  += nlnk;
338 
339     /* add pivot rows into linked list */
340     row = lnk[n];
341     while (row < i) {
342       nzbd  = bdiag[row] + 1; /* num of entries in the row with column index <= row */
343       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
344       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
345       nzi  += nlnk;
346       row   = lnk[row];
347     }
348     bi[i+1] = bi[i] + nzi;
349     im[i]   = nzi;
350 
351     /* mark bdiag */
352     nzbd = 0;
353     nnz  = nzi;
354     k    = lnk[n];
355     while (nnz-- && k < i) {
356       nzbd++;
357       k = lnk[k];
358     }
359     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
360 
361     /* if free space is not available, make more free space */
362     if (current_space->local_remaining<nzi) {
363       /* estimated additional space needed */
364       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n-1,nzi));
365       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
366       reallocs++;
367     }
368 
369     /* copy data into free space, then initialize lnk */
370     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
371 
372     bi_ptr[i]                       = current_space->array;
373     current_space->array           += nzi;
374     current_space->local_used      += nzi;
375     current_space->local_remaining -= nzi;
376   }
377 
378   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
379   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
380 
381   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
382   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
383   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
384   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
385   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
386 
387   /* put together the new matrix */
388   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
389   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
390   b    = (Mat_SeqAIJ*)(B)->data;
391 
392   b->free_a       = PETSC_TRUE;
393   b->free_ij      = PETSC_TRUE;
394   b->singlemalloc = PETSC_FALSE;
395 
396   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
397 
398   b->j    = bj;
399   b->i    = bi;
400   b->diag = bdiag;
401   b->ilen = NULL;
402   b->imax = NULL;
403   b->row  = isrow;
404   b->col  = iscol;
405   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
406   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
407   b->icol = isicol;
408   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
409 
410   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
411   ierr     = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
412   b->maxnz = b->nz = bdiag[0]+1;
413 
414   B->factortype            = MAT_FACTOR_LU;
415   B->info.factor_mallocs   = reallocs;
416   B->info.fill_ratio_given = f;
417 
418   if (ai[n]) {
419     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
420   } else {
421     B->info.fill_ratio_needed = 0.0;
422   }
423 #if defined(PETSC_USE_INFO)
424   if (ai[n] != 0) {
425     PetscReal af = B->info.fill_ratio_needed;
426     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
427     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
428     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
429     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
430   } else {
431     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
432   }
433 #endif
434   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
435   if (a->inode.size) {
436     B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
437   }
438   ierr = MatSeqAIJCheckInode_FactorLU(B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     Trouble in factorization, should we dump the original matrix?
444 */
445 PetscErrorCode MatFactorDumpMatrix(Mat A)
446 {
447   PetscErrorCode ierr;
448   PetscBool      flg = PETSC_FALSE;
449 
450   PetscFunctionBegin;
451   ierr = PetscOptionsGetBool(((PetscObject)A)->options,NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr);
452   if (flg) {
453     PetscViewer viewer;
454     char        filename[PETSC_MAX_PATH_LEN];
455 
456     ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr);
457     ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
458     ierr = MatView(A,viewer);CHKERRQ(ierr);
459     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
460   }
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
465 {
466   Mat             C     =B;
467   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
468   IS              isrow = b->row,isicol = b->icol;
469   PetscErrorCode  ierr;
470   const PetscInt  *r,*ic,*ics;
471   const PetscInt  n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
472   PetscInt        i,j,k,nz,nzL,row,*pj;
473   const PetscInt  *ajtmp,*bjtmp;
474   MatScalar       *rtmp,*pc,multiplier,*pv;
475   const MatScalar *aa=a->a,*v;
476   PetscBool       row_identity,col_identity;
477   FactorShiftCtx  sctx;
478   const PetscInt  *ddiag;
479   PetscReal       rs;
480   MatScalar       d;
481 
482   PetscFunctionBegin;
483   /* MatPivotSetUp(): initialize shift context sctx */
484   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
485 
486   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
487     ddiag          = a->diag;
488     sctx.shift_top = info->zeropivot;
489     for (i=0; i<n; i++) {
490       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
491       d  = (aa)[ddiag[i]];
492       rs = -PetscAbsScalar(d) - PetscRealPart(d);
493       v  = aa+ai[i];
494       nz = ai[i+1] - ai[i];
495       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
496       if (rs>sctx.shift_top) sctx.shift_top = rs;
497     }
498     sctx.shift_top *= 1.1;
499     sctx.nshift_max = 5;
500     sctx.shift_lo   = 0.;
501     sctx.shift_hi   = 1.;
502   }
503 
504   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
505   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
506   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
507   ics  = ic;
508 
509   do {
510     sctx.newshift = PETSC_FALSE;
511     for (i=0; i<n; i++) {
512       /* zero rtmp */
513       /* L part */
514       nz    = bi[i+1] - bi[i];
515       bjtmp = bj + bi[i];
516       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
517 
518       /* U part */
519       nz    = bdiag[i]-bdiag[i+1];
520       bjtmp = bj + bdiag[i+1]+1;
521       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
522 
523       /* load in initial (unfactored row) */
524       nz    = ai[r[i]+1] - ai[r[i]];
525       ajtmp = aj + ai[r[i]];
526       v     = aa + ai[r[i]];
527       for (j=0; j<nz; j++) {
528         rtmp[ics[ajtmp[j]]] = v[j];
529       }
530       /* ZeropivotApply() */
531       rtmp[i] += sctx.shift_amount;  /* shift the diagonal of the matrix */
532 
533       /* elimination */
534       bjtmp = bj + bi[i];
535       row   = *bjtmp++;
536       nzL   = bi[i+1] - bi[i];
537       for (k=0; k < nzL; k++) {
538         pc = rtmp + row;
539         if (*pc != 0.0) {
540           pv         = b->a + bdiag[row];
541           multiplier = *pc * (*pv);
542           *pc        = multiplier;
543 
544           pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
545           pv = b->a + bdiag[row+1]+1;
546           nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
547 
548           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
549           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
550         }
551         row = *bjtmp++;
552       }
553 
554       /* finished row so stick it into b->a */
555       rs = 0.0;
556       /* L part */
557       pv = b->a + bi[i];
558       pj = b->j + bi[i];
559       nz = bi[i+1] - bi[i];
560       for (j=0; j<nz; j++) {
561         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562       }
563 
564       /* U part */
565       pv = b->a + bdiag[i+1]+1;
566       pj = b->j + bdiag[i+1]+1;
567       nz = bdiag[i] - bdiag[i+1]-1;
568       for (j=0; j<nz; j++) {
569         pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
570       }
571 
572       sctx.rs = rs;
573       sctx.pv = rtmp[i];
574       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
575       if (sctx.newshift) break; /* break for-loop */
576       rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
577 
578       /* Mark diagonal and invert diagonal for simpler triangular solves */
579       pv  = b->a + bdiag[i];
580       *pv = 1.0/rtmp[i];
581 
582     } /* endof for (i=0; i<n; i++) { */
583 
584     /* MatPivotRefine() */
585     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
586       /*
587        * if no shift in this attempt & shifting & started shifting & can refine,
588        * then try lower shift
589        */
590       sctx.shift_hi       = sctx.shift_fraction;
591       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
593       sctx.newshift       = PETSC_TRUE;
594       sctx.nshift++;
595     }
596   } while (sctx.newshift);
597 
598   ierr = PetscFree(rtmp);CHKERRQ(ierr);
599   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
600   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
601 
602   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
603   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
604   if (b->inode.size) {
605     C->ops->solve = MatSolve_SeqAIJ_Inode;
606   } else if (row_identity && col_identity) {
607     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
608   } else {
609     C->ops->solve = MatSolve_SeqAIJ;
610   }
611   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
612   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
613   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
614   C->ops->matsolve          = MatMatSolve_SeqAIJ;
615   C->assembled              = PETSC_TRUE;
616   C->preallocated           = PETSC_TRUE;
617 
618   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
619 
620   /* MatShiftView(A,info,&sctx) */
621   if (sctx.nshift) {
622     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
623       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
624     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
625       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
626     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
627       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
634 {
635   Mat             C     =B;
636   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
637   IS              isrow = b->row,isicol = b->icol;
638   PetscErrorCode  ierr;
639   const PetscInt  *r,*ic,*ics;
640   PetscInt        nz,row,i,j,n=A->rmap->n,diag;
641   const PetscInt  *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
642   const PetscInt  *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
643   MatScalar       *pv,*rtmp,*pc,multiplier,d;
644   const MatScalar *v,*aa=a->a;
645   PetscReal       rs=0.0;
646   FactorShiftCtx  sctx;
647   const PetscInt  *ddiag;
648   PetscBool       row_identity, col_identity;
649 
650   PetscFunctionBegin;
651   /* MatPivotSetUp(): initialize shift context sctx */
652   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
653 
654   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
655     ddiag          = a->diag;
656     sctx.shift_top = info->zeropivot;
657     for (i=0; i<n; i++) {
658       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659       d  = (aa)[ddiag[i]];
660       rs = -PetscAbsScalar(d) - PetscRealPart(d);
661       v  = aa+ai[i];
662       nz = ai[i+1] - ai[i];
663       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
664       if (rs>sctx.shift_top) sctx.shift_top = rs;
665     }
666     sctx.shift_top *= 1.1;
667     sctx.nshift_max = 5;
668     sctx.shift_lo   = 0.;
669     sctx.shift_hi   = 1.;
670   }
671 
672   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
673   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
674   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
675   ics  = ic;
676 
677   do {
678     sctx.newshift = PETSC_FALSE;
679     for (i=0; i<n; i++) {
680       nz    = bi[i+1] - bi[i];
681       bjtmp = bj + bi[i];
682       for  (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
683 
684       /* load in initial (unfactored row) */
685       nz    = ai[r[i]+1] - ai[r[i]];
686       ajtmp = aj + ai[r[i]];
687       v     = aa + ai[r[i]];
688       for (j=0; j<nz; j++) {
689         rtmp[ics[ajtmp[j]]] = v[j];
690       }
691       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
692 
693       row = *bjtmp++;
694       while  (row < i) {
695         pc = rtmp + row;
696         if (*pc != 0.0) {
697           pv         = b->a + diag_offset[row];
698           pj         = b->j + diag_offset[row] + 1;
699           multiplier = *pc / *pv++;
700           *pc        = multiplier;
701           nz         = bi[row+1] - diag_offset[row] - 1;
702           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
703           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
704         }
705         row = *bjtmp++;
706       }
707       /* finished row so stick it into b->a */
708       pv   = b->a + bi[i];
709       pj   = b->j + bi[i];
710       nz   = bi[i+1] - bi[i];
711       diag = diag_offset[i] - bi[i];
712       rs   = 0.0;
713       for (j=0; j<nz; j++) {
714         pv[j] = rtmp[pj[j]];
715         rs   += PetscAbsScalar(pv[j]);
716       }
717       rs -= PetscAbsScalar(pv[diag]);
718 
719       sctx.rs = rs;
720       sctx.pv = pv[diag];
721       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
722       if (sctx.newshift) break;
723       pv[diag] = sctx.pv;
724     }
725 
726     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
727       /*
728        * if no shift in this attempt & shifting & started shifting & can refine,
729        * then try lower shift
730        */
731       sctx.shift_hi       = sctx.shift_fraction;
732       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
733       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
734       sctx.newshift       = PETSC_TRUE;
735       sctx.nshift++;
736     }
737   } while (sctx.newshift);
738 
739   /* invert diagonal entries for simpler triangular solves */
740   for (i=0; i<n; i++) {
741     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
742   }
743   ierr = PetscFree(rtmp);CHKERRQ(ierr);
744   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
745   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
746 
747   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
748   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
749   if (row_identity && col_identity) {
750     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
751   } else {
752     C->ops->solve = MatSolve_SeqAIJ_inplace;
753   }
754   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
755   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
756   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
757   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
758 
759   C->assembled    = PETSC_TRUE;
760   C->preallocated = PETSC_TRUE;
761 
762   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
763   if (sctx.nshift) {
764     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
765       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
766     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
767       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
768     }
769   }
770   (C)->ops->solve          = MatSolve_SeqAIJ_inplace;
771   (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
772 
773   ierr = MatSeqAIJCheckInode(C);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 /*
778    This routine implements inplace ILU(0) with row or/and column permutations.
779    Input:
780      A - original matrix
781    Output;
782      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
783          a->j (col index) is permuted by the inverse of colperm, then sorted
784          a->a reordered accordingly with a->j
785          a->diag (ptr to diagonal elements) is updated.
786 */
787 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
788 {
789   Mat_SeqAIJ      *a    =(Mat_SeqAIJ*)A->data;
790   IS              isrow = a->row,isicol = a->icol;
791   PetscErrorCode  ierr;
792   const PetscInt  *r,*ic,*ics;
793   PetscInt        i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
794   PetscInt        *ajtmp,nz,row;
795   PetscInt        *diag = a->diag,nbdiag,*pj;
796   PetscScalar     *rtmp,*pc,multiplier,d;
797   MatScalar       *pv,*v;
798   PetscReal       rs;
799   FactorShiftCtx  sctx;
800   const MatScalar *aa=a->a,*vtmp;
801 
802   PetscFunctionBegin;
803   if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
804 
805   /* MatPivotSetUp(): initialize shift context sctx */
806   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
807 
808   if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
809     const PetscInt *ddiag = a->diag;
810     sctx.shift_top = info->zeropivot;
811     for (i=0; i<n; i++) {
812       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
813       d    = (aa)[ddiag[i]];
814       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
815       vtmp = aa+ai[i];
816       nz   = ai[i+1] - ai[i];
817       for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]);
818       if (rs>sctx.shift_top) sctx.shift_top = rs;
819     }
820     sctx.shift_top *= 1.1;
821     sctx.nshift_max = 5;
822     sctx.shift_lo   = 0.;
823     sctx.shift_hi   = 1.;
824   }
825 
826   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
827   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
828   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
829   ierr = PetscArrayzero(rtmp,n+1);CHKERRQ(ierr);
830   ics  = ic;
831 
832 #if defined(MV)
833   sctx.shift_top      = 0.;
834   sctx.nshift_max     = 0;
835   sctx.shift_lo       = 0.;
836   sctx.shift_hi       = 0.;
837   sctx.shift_fraction = 0.;
838 
839   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
840     sctx.shift_top = 0.;
841     for (i=0; i<n; i++) {
842       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
843       d  = (a->a)[diag[i]];
844       rs = -PetscAbsScalar(d) - PetscRealPart(d);
845       v  = a->a+ai[i];
846       nz = ai[i+1] - ai[i];
847       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
848       if (rs>sctx.shift_top) sctx.shift_top = rs;
849     }
850     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
851     sctx.shift_top *= 1.1;
852     sctx.nshift_max = 5;
853     sctx.shift_lo   = 0.;
854     sctx.shift_hi   = 1.;
855   }
856 
857   sctx.shift_amount = 0.;
858   sctx.nshift       = 0;
859 #endif
860 
861   do {
862     sctx.newshift = PETSC_FALSE;
863     for (i=0; i<n; i++) {
864       /* load in initial unfactored row */
865       nz    = ai[r[i]+1] - ai[r[i]];
866       ajtmp = aj + ai[r[i]];
867       v     = a->a + ai[r[i]];
868       /* sort permuted ajtmp and values v accordingly */
869       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
870       ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr);
871 
872       diag[r[i]] = ai[r[i]];
873       for (j=0; j<nz; j++) {
874         rtmp[ajtmp[j]] = v[j];
875         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
876       }
877       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
878 
879       row = *ajtmp++;
880       while  (row < i) {
881         pc = rtmp + row;
882         if (*pc != 0.0) {
883           pv = a->a + diag[r[row]];
884           pj = aj + diag[r[row]] + 1;
885 
886           multiplier = *pc / *pv++;
887           *pc        = multiplier;
888           nz         = ai[r[row]+1] - diag[r[row]] - 1;
889           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
890           ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
891         }
892         row = *ajtmp++;
893       }
894       /* finished row so overwrite it onto a->a */
895       pv     = a->a + ai[r[i]];
896       pj     = aj + ai[r[i]];
897       nz     = ai[r[i]+1] - ai[r[i]];
898       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
899 
900       rs = 0.0;
901       for (j=0; j<nz; j++) {
902         pv[j] = rtmp[pj[j]];
903         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
904       }
905 
906       sctx.rs = rs;
907       sctx.pv = pv[nbdiag];
908       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
909       if (sctx.newshift) break;
910       pv[nbdiag] = sctx.pv;
911     }
912 
913     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
914       /*
915        * if no shift in this attempt & shifting & started shifting & can refine,
916        * then try lower shift
917        */
918       sctx.shift_hi       = sctx.shift_fraction;
919       sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
920       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
921       sctx.newshift       = PETSC_TRUE;
922       sctx.nshift++;
923     }
924   } while (sctx.newshift);
925 
926   /* invert diagonal entries for simpler triangular solves */
927   for (i=0; i<n; i++) {
928     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
929   }
930 
931   ierr = PetscFree(rtmp);CHKERRQ(ierr);
932   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
933   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
934 
935   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
939 
940   A->assembled    = PETSC_TRUE;
941   A->preallocated = PETSC_TRUE;
942 
943   ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr);
944   if (sctx.nshift) {
945     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
947     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
949     }
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 /* ----------------------------------------------------------- */
955 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
956 {
957   PetscErrorCode ierr;
958   Mat            C;
959 
960   PetscFunctionBegin;
961   ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr);
962   ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr);
963   ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr);
964 
965   A->ops->solve          = C->ops->solve;
966   A->ops->solvetranspose = C->ops->solvetranspose;
967 
968   ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
969   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 /* ----------------------------------------------------------- */
973 
974 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
975 {
976   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
977   IS                iscol = a->col,isrow = a->row;
978   PetscErrorCode    ierr;
979   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
980   PetscInt          nz;
981   const PetscInt    *rout,*cout,*r,*c;
982   PetscScalar       *x,*tmp,*tmps,sum;
983   const PetscScalar *b;
984   const MatScalar   *aa = a->a,*v;
985 
986   PetscFunctionBegin;
987   if (!n) PetscFunctionReturn(0);
988 
989   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
990   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
991   tmp  = a->solve_work;
992 
993   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
994   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
995 
996   /* forward solve the lower triangular */
997   tmp[0] = b[*r++];
998   tmps   = tmp;
999   for (i=1; i<n; i++) {
1000     v   = aa + ai[i];
1001     vi  = aj + ai[i];
1002     nz  = a->diag[i] - ai[i];
1003     sum = b[*r++];
1004     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1005     tmp[i] = sum;
1006   }
1007 
1008   /* backward solve the upper triangular */
1009   for (i=n-1; i>=0; i--) {
1010     v   = aa + a->diag[i] + 1;
1011     vi  = aj + a->diag[i] + 1;
1012     nz  = ai[i+1] - a->diag[i] - 1;
1013     sum = tmp[i];
1014     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1015     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1016   }
1017 
1018   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1019   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1020   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1021   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1022   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1027 {
1028   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1029   IS                iscol = a->col,isrow = a->row;
1030   PetscErrorCode    ierr;
1031   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1032   PetscInt          nz,neq,ldb,ldx;
1033   const PetscInt    *rout,*cout,*r,*c;
1034   PetscScalar       *x,*tmp = a->solve_work,*tmps,sum;
1035   const PetscScalar *b,*aa = a->a,*v;
1036   PetscBool         isdense;
1037 
1038   PetscFunctionBegin;
1039   if (!n) PetscFunctionReturn(0);
1040   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1041   if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1042   if (X != B) {
1043     ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1044     if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1045   }
1046   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1047   ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr);
1048   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1049   ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr);
1050   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1051   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1052   for (neq=0; neq<B->cmap->n; neq++) {
1053     /* forward solve the lower triangular */
1054     tmp[0] = b[r[0]];
1055     tmps   = tmp;
1056     for (i=1; i<n; i++) {
1057       v   = aa + ai[i];
1058       vi  = aj + ai[i];
1059       nz  = a->diag[i] - ai[i];
1060       sum = b[r[i]];
1061       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1062       tmp[i] = sum;
1063     }
1064     /* backward solve the upper triangular */
1065     for (i=n-1; i>=0; i--) {
1066       v   = aa + a->diag[i] + 1;
1067       vi  = aj + a->diag[i] + 1;
1068       nz  = ai[i+1] - a->diag[i] - 1;
1069       sum = tmp[i];
1070       PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1071       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1072     }
1073     b += ldb;
1074     x += ldx;
1075   }
1076   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1077   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1078   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1079   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1080   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1085 {
1086   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1087   IS                iscol = a->col,isrow = a->row;
1088   PetscErrorCode    ierr;
1089   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1090   PetscInt          nz,neq,ldb,ldx;
1091   const PetscInt    *rout,*cout,*r,*c;
1092   PetscScalar       *x,*tmp = a->solve_work,sum;
1093   const PetscScalar *b,*aa = a->a,*v;
1094   PetscBool         isdense;
1095 
1096   PetscFunctionBegin;
1097   if (!n) PetscFunctionReturn(0);
1098   ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1099   if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1100   if (X != B) {
1101     ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&isdense);CHKERRQ(ierr);
1102     if (!isdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1103   }
1104   ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
1105   ierr = MatDenseGetLDA(B,&ldb);CHKERRQ(ierr);
1106   ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
1107   ierr = MatDenseGetLDA(X,&ldx);CHKERRQ(ierr);
1108   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1109   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1110   for (neq=0; neq<B->cmap->n; neq++) {
1111     /* forward solve the lower triangular */
1112     tmp[0] = b[r[0]];
1113     v      = aa;
1114     vi     = aj;
1115     for (i=1; i<n; i++) {
1116       nz  = ai[i+1] - ai[i];
1117       sum = b[r[i]];
1118       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1119       tmp[i] = sum;
1120       v     += nz; vi += nz;
1121     }
1122     /* backward solve the upper triangular */
1123     for (i=n-1; i>=0; i--) {
1124       v   = aa + adiag[i+1]+1;
1125       vi  = aj + adiag[i+1]+1;
1126       nz  = adiag[i]-adiag[i+1]-1;
1127       sum = tmp[i];
1128       PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1129       x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1130     }
1131     b += ldb;
1132     x += ldx;
1133   }
1134   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1135   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1136   ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);
1137   ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
1138   ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1143 {
1144   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1145   IS                iscol = a->col,isrow = a->row;
1146   PetscErrorCode    ierr;
1147   const PetscInt    *r,*c,*rout,*cout;
1148   PetscInt          i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1149   PetscInt          nz,row;
1150   PetscScalar       *x,*tmp,*tmps,sum;
1151   const PetscScalar *b;
1152   const MatScalar   *aa = a->a,*v;
1153 
1154   PetscFunctionBegin;
1155   if (!n) PetscFunctionReturn(0);
1156 
1157   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1158   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1159   tmp  = a->solve_work;
1160 
1161   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1162   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1163 
1164   /* forward solve the lower triangular */
1165   tmp[0] = b[*r++];
1166   tmps   = tmp;
1167   for (row=1; row<n; row++) {
1168     i   = rout[row]; /* permuted row */
1169     v   = aa + ai[i];
1170     vi  = aj + ai[i];
1171     nz  = a->diag[i] - ai[i];
1172     sum = b[*r++];
1173     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1174     tmp[row] = sum;
1175   }
1176 
1177   /* backward solve the upper triangular */
1178   for (row=n-1; row>=0; row--) {
1179     i   = rout[row]; /* permuted row */
1180     v   = aa + a->diag[i] + 1;
1181     vi  = aj + a->diag[i] + 1;
1182     nz  = ai[i+1] - a->diag[i] - 1;
1183     sum = tmp[row];
1184     PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1185     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1186   }
1187 
1188   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1189   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1190   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1191   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1192   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /* ----------------------------------------------------------- */
1197 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1198 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1199 {
1200   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
1201   PetscErrorCode    ierr;
1202   PetscInt          n   = A->rmap->n;
1203   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag;
1204   PetscScalar       *x;
1205   const PetscScalar *b;
1206   const MatScalar   *aa = a->a;
1207 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1208   PetscInt        adiag_i,i,nz,ai_i;
1209   const PetscInt  *vi;
1210   const MatScalar *v;
1211   PetscScalar     sum;
1212 #endif
1213 
1214   PetscFunctionBegin;
1215   if (!n) PetscFunctionReturn(0);
1216 
1217   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1218   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1219 
1220 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1221   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1222 #else
1223   /* forward solve the lower triangular */
1224   x[0] = b[0];
1225   for (i=1; i<n; i++) {
1226     ai_i = ai[i];
1227     v    = aa + ai_i;
1228     vi   = aj + ai_i;
1229     nz   = adiag[i] - ai_i;
1230     sum  = b[i];
1231     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1232     x[i] = sum;
1233   }
1234 
1235   /* backward solve the upper triangular */
1236   for (i=n-1; i>=0; i--) {
1237     adiag_i = adiag[i];
1238     v       = aa + adiag_i + 1;
1239     vi      = aj + adiag_i + 1;
1240     nz      = ai[i+1] - adiag_i - 1;
1241     sum     = x[i];
1242     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1243     x[i] = sum*aa[adiag_i];
1244   }
1245 #endif
1246   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
1247   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1248   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1253 {
1254   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1255   IS                iscol = a->col,isrow = a->row;
1256   PetscErrorCode    ierr;
1257   PetscInt          i, n = A->rmap->n,j;
1258   PetscInt          nz;
1259   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1260   PetscScalar       *x,*tmp,sum;
1261   const PetscScalar *b;
1262   const MatScalar   *aa = a->a,*v;
1263 
1264   PetscFunctionBegin;
1265   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1266 
1267   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1268   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1269   tmp  = a->solve_work;
1270 
1271   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1272   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1);
1273 
1274   /* forward solve the lower triangular */
1275   tmp[0] = b[*r++];
1276   for (i=1; i<n; i++) {
1277     v   = aa + ai[i];
1278     vi  = aj + ai[i];
1279     nz  = a->diag[i] - ai[i];
1280     sum = b[*r++];
1281     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1282     tmp[i] = sum;
1283   }
1284 
1285   /* backward solve the upper triangular */
1286   for (i=n-1; i>=0; i--) {
1287     v   = aa + a->diag[i] + 1;
1288     vi  = aj + a->diag[i] + 1;
1289     nz  = ai[i+1] - a->diag[i] - 1;
1290     sum = tmp[i];
1291     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1292     tmp[i]   = sum*aa[a->diag[i]];
1293     x[*c--] += tmp[i];
1294   }
1295 
1296   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1298   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1299   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1300   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1305 {
1306   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1307   IS                iscol = a->col,isrow = a->row;
1308   PetscErrorCode    ierr;
1309   PetscInt          i, n = A->rmap->n,j;
1310   PetscInt          nz;
1311   const PetscInt    *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1312   PetscScalar       *x,*tmp,sum;
1313   const PetscScalar *b;
1314   const MatScalar   *aa = a->a,*v;
1315 
1316   PetscFunctionBegin;
1317   if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);}
1318 
1319   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1320   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1321   tmp  = a->solve_work;
1322 
1323   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1324   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1325 
1326   /* forward solve the lower triangular */
1327   tmp[0] = b[r[0]];
1328   v      = aa;
1329   vi     = aj;
1330   for (i=1; i<n; i++) {
1331     nz  = ai[i+1] - ai[i];
1332     sum = b[r[i]];
1333     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1334     tmp[i] = sum;
1335     v     += nz;
1336     vi    += nz;
1337   }
1338 
1339   /* backward solve the upper triangular */
1340   v  = aa + adiag[n-1];
1341   vi = aj + adiag[n-1];
1342   for (i=n-1; i>=0; i--) {
1343     nz  = adiag[i] - adiag[i+1] - 1;
1344     sum = tmp[i];
1345     for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346     tmp[i]   = sum*v[nz];
1347     x[c[i]] += tmp[i];
1348     v       += nz+1; vi += nz+1;
1349   }
1350 
1351   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1352   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1353   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1354   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1355   ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1360 {
1361   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1362   IS                iscol = a->col,isrow = a->row;
1363   PetscErrorCode    ierr;
1364   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1365   PetscInt          i,n = A->rmap->n,j;
1366   PetscInt          nz;
1367   PetscScalar       *x,*tmp,s1;
1368   const MatScalar   *aa = a->a,*v;
1369   const PetscScalar *b;
1370 
1371   PetscFunctionBegin;
1372   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1373   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1374   tmp  = a->solve_work;
1375 
1376   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1377   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1378 
1379   /* copy the b into temp work space according to permutation */
1380   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1381 
1382   /* forward solve the U^T */
1383   for (i=0; i<n; i++) {
1384     v   = aa + diag[i];
1385     vi  = aj + diag[i] + 1;
1386     nz  = ai[i+1] - diag[i] - 1;
1387     s1  = tmp[i];
1388     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1389     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1390     tmp[i] = s1;
1391   }
1392 
1393   /* backward solve the L^T */
1394   for (i=n-1; i>=0; i--) {
1395     v  = aa + diag[i] - 1;
1396     vi = aj + diag[i] - 1;
1397     nz = diag[i] - ai[i];
1398     s1 = tmp[i];
1399     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1400   }
1401 
1402   /* copy tmp into x according to permutation */
1403   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1404 
1405   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1406   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1407   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1408   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1409 
1410   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1415 {
1416   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1417   IS                iscol = a->col,isrow = a->row;
1418   PetscErrorCode    ierr;
1419   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1420   PetscInt          i,n = A->rmap->n,j;
1421   PetscInt          nz;
1422   PetscScalar       *x,*tmp,s1;
1423   const MatScalar   *aa = a->a,*v;
1424   const PetscScalar *b;
1425 
1426   PetscFunctionBegin;
1427   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1428   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
1429   tmp  = a->solve_work;
1430 
1431   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1432   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1433 
1434   /* copy the b into temp work space according to permutation */
1435   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1436 
1437   /* forward solve the U^T */
1438   for (i=0; i<n; i++) {
1439     v   = aa + adiag[i+1] + 1;
1440     vi  = aj + adiag[i+1] + 1;
1441     nz  = adiag[i] - adiag[i+1] - 1;
1442     s1  = tmp[i];
1443     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1444     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1445     tmp[i] = s1;
1446   }
1447 
1448   /* backward solve the L^T */
1449   for (i=n-1; i>=0; i--) {
1450     v  = aa + ai[i];
1451     vi = aj + ai[i];
1452     nz = ai[i+1] - ai[i];
1453     s1 = tmp[i];
1454     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1455   }
1456 
1457   /* copy tmp into x according to permutation */
1458   for (i=0; i<n; i++) x[r[i]] = tmp[i];
1459 
1460   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1461   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1462   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1463   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
1464 
1465   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1470 {
1471   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1472   IS                iscol = a->col,isrow = a->row;
1473   PetscErrorCode    ierr;
1474   const PetscInt    *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1475   PetscInt          i,n = A->rmap->n,j;
1476   PetscInt          nz;
1477   PetscScalar       *x,*tmp,s1;
1478   const MatScalar   *aa = a->a,*v;
1479   const PetscScalar *b;
1480 
1481   PetscFunctionBegin;
1482   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1483   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1484   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1485   tmp  = a->solve_work;
1486 
1487   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1488   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1489 
1490   /* copy the b into temp work space according to permutation */
1491   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1492 
1493   /* forward solve the U^T */
1494   for (i=0; i<n; i++) {
1495     v   = aa + diag[i];
1496     vi  = aj + diag[i] + 1;
1497     nz  = ai[i+1] - diag[i] - 1;
1498     s1  = tmp[i];
1499     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1500     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1501     tmp[i] = s1;
1502   }
1503 
1504   /* backward solve the L^T */
1505   for (i=n-1; i>=0; i--) {
1506     v  = aa + diag[i] - 1;
1507     vi = aj + diag[i] - 1;
1508     nz = diag[i] - ai[i];
1509     s1 = tmp[i];
1510     for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1511   }
1512 
1513   /* copy tmp into x according to permutation */
1514   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1515 
1516   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1517   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1518   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1519   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1520 
1521   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1526 {
1527   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
1528   IS                iscol = a->col,isrow = a->row;
1529   PetscErrorCode    ierr;
1530   const PetscInt    *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1531   PetscInt          i,n = A->rmap->n,j;
1532   PetscInt          nz;
1533   PetscScalar       *x,*tmp,s1;
1534   const MatScalar   *aa = a->a,*v;
1535   const PetscScalar *b;
1536 
1537   PetscFunctionBegin;
1538   if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);}
1539   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1540   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1541   tmp  = a->solve_work;
1542 
1543   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
1544   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
1545 
1546   /* copy the b into temp work space according to permutation */
1547   for (i=0; i<n; i++) tmp[i] = b[c[i]];
1548 
1549   /* forward solve the U^T */
1550   for (i=0; i<n; i++) {
1551     v   = aa + adiag[i+1] + 1;
1552     vi  = aj + adiag[i+1] + 1;
1553     nz  = adiag[i] - adiag[i+1] - 1;
1554     s1  = tmp[i];
1555     s1 *= v[nz];  /* multiply by inverse of diagonal entry */
1556     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1557     tmp[i] = s1;
1558   }
1559 
1560   /* backward solve the L^T */
1561   for (i=n-1; i>=0; i--) {
1562     v  = aa + ai[i];
1563     vi = aj + ai[i];
1564     nz = ai[i+1] - ai[i];
1565     s1 = tmp[i];
1566     for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1567   }
1568 
1569   /* copy tmp into x according to permutation */
1570   for (i=0; i<n; i++) x[r[i]] += tmp[i];
1571 
1572   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
1573   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
1574   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1575   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1576 
1577   ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 /* ----------------------------------------------------------------*/
1582 
1583 /*
1584    ilu() under revised new data structure.
1585    Factored arrays bj and ba are stored as
1586      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1587 
1588    bi=fact->i is an array of size n+1, in which
1589    bi+
1590      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1591      bi[n]:  points to L(n-1,n-1)+1
1592 
1593   bdiag=fact->diag is an array of size n+1,in which
1594      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1595      bdiag[n]: points to entry of U(n-1,0)-1
1596 
1597    U(i,:) contains bdiag[i] as its last entry, i.e.,
1598     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1599 */
1600 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1601 {
1602   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
1603   PetscErrorCode ierr;
1604   const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1605   PetscInt       i,j,k=0,nz,*bi,*bj,*bdiag;
1606   IS             isicol;
1607 
1608   PetscFunctionBegin;
1609   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1610   ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr);
1611   b    = (Mat_SeqAIJ*)(fact)->data;
1612 
1613   /* allocate matrix arrays for new data structure */
1614   ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr);
1615   ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1616 
1617   b->singlemalloc = PETSC_TRUE;
1618   if (!b->diag) {
1619     ierr = PetscMalloc1(n+1,&b->diag);CHKERRQ(ierr);
1620     ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr);
1621   }
1622   bdiag = b->diag;
1623 
1624   if (n > 0) {
1625     ierr = PetscArrayzero(b->a,ai[n]);CHKERRQ(ierr);
1626   }
1627 
1628   /* set bi and bj with new data structure */
1629   bi = b->i;
1630   bj = b->j;
1631 
1632   /* L part */
1633   bi[0] = 0;
1634   for (i=0; i<n; i++) {
1635     nz      = adiag[i] - ai[i];
1636     bi[i+1] = bi[i] + nz;
1637     aj      = a->j + ai[i];
1638     for (j=0; j<nz; j++) {
1639       /*   *bj = aj[j]; bj++; */
1640       bj[k++] = aj[j];
1641     }
1642   }
1643 
1644   /* U part */
1645   bdiag[n] = bi[n]-1;
1646   for (i=n-1; i>=0; i--) {
1647     nz = ai[i+1] - adiag[i] - 1;
1648     aj = a->j + adiag[i] + 1;
1649     for (j=0; j<nz; j++) {
1650       /*      *bj = aj[j]; bj++; */
1651       bj[k++] = aj[j];
1652     }
1653     /* diag[i] */
1654     /*    *bj = i; bj++; */
1655     bj[k++]  = i;
1656     bdiag[i] = bdiag[i+1] + nz + 1;
1657   }
1658 
1659   fact->factortype             = MAT_FACTOR_ILU;
1660   fact->info.factor_mallocs    = 0;
1661   fact->info.fill_ratio_given  = info->fill;
1662   fact->info.fill_ratio_needed = 1.0;
1663   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1664   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1665 
1666   b       = (Mat_SeqAIJ*)(fact)->data;
1667   b->row  = isrow;
1668   b->col  = iscol;
1669   b->icol = isicol;
1670   ierr    = PetscMalloc1(fact->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1671   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1672   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1677 {
1678   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1679   IS                 isicol;
1680   PetscErrorCode     ierr;
1681   const PetscInt     *r,*ic;
1682   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1683   PetscInt           *bi,*cols,nnz,*cols_lvl;
1684   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1685   PetscInt           i,levels,diagonal_fill;
1686   PetscBool          col_identity,row_identity,missing;
1687   PetscReal          f;
1688   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1689   PetscBT            lnkbt;
1690   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1691   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1692   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1693 
1694   PetscFunctionBegin;
1695   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1696   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1697   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1698 
1699   levels = (PetscInt)info->levels;
1700   ierr   = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1701   ierr   = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1702   if (!levels && row_identity && col_identity) {
1703     /* special case: ilu(0) with natural ordering */
1704     ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr);
1705     if (a->inode.size) {
1706       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1707     }
1708     PetscFunctionReturn(0);
1709   }
1710 
1711   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1712   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1713   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1714 
1715   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1716   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1717   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1718   bi[0] = bdiag[0] = 0;
1719   ierr  = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1720 
1721   /* create a linked list for storing column indices of the active row */
1722   nlnk = n + 1;
1723   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1724 
1725   /* initial FreeSpace size is f*(ai[n]+1) */
1726   f                 = info->fill;
1727   diagonal_fill     = (PetscInt)info->diagonal_fill;
1728   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1729   current_space     = free_space;
1730   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1731   current_space_lvl = free_space_lvl;
1732   for (i=0; i<n; i++) {
1733     nzi = 0;
1734     /* copy current row into linked list */
1735     nnz = ai[r[i]+1] - ai[r[i]];
1736     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1737     cols   = aj + ai[r[i]];
1738     lnk[i] = -1; /* marker to indicate if diagonal exists */
1739     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1740     nzi   += nlnk;
1741 
1742     /* make sure diagonal entry is included */
1743     if (diagonal_fill && lnk[i] == -1) {
1744       fm = n;
1745       while (lnk[fm] < i) fm = lnk[fm];
1746       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1747       lnk[fm]    = i;
1748       lnk_lvl[i] = 0;
1749       nzi++; dcount++;
1750     }
1751 
1752     /* add pivot rows into the active row */
1753     nzbd = 0;
1754     prow = lnk[n];
1755     while (prow < i) {
1756       nnz      = bdiag[prow];
1757       cols     = bj_ptr[prow] + nnz + 1;
1758       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1759       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1760       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1761       nzi     += nlnk;
1762       prow     = lnk[prow];
1763       nzbd++;
1764     }
1765     bdiag[i] = nzbd;
1766     bi[i+1]  = bi[i] + nzi;
1767     /* if free space is not available, make more free space */
1768     if (current_space->local_remaining<nzi) {
1769       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(nzi,n - i)); /* estimated and max additional space needed */
1770       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1771       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1772       reallocs++;
1773     }
1774 
1775     /* copy data into free_space and free_space_lvl, then initialize lnk */
1776     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1777     bj_ptr[i]    = current_space->array;
1778     bjlvl_ptr[i] = current_space_lvl->array;
1779 
1780     /* make sure the active row i has diagonal entry */
1781     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1782 
1783     current_space->array               += nzi;
1784     current_space->local_used          += nzi;
1785     current_space->local_remaining     -= nzi;
1786     current_space_lvl->array           += nzi;
1787     current_space_lvl->local_used      += nzi;
1788     current_space_lvl->local_remaining -= nzi;
1789   }
1790 
1791   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1792   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1793   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1794   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1795   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
1796 
1797   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1798   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1799   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1800 
1801 #if defined(PETSC_USE_INFO)
1802   {
1803     PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1804     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
1805     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
1806     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
1807     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
1808     if (diagonal_fill) {
1809       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
1810     }
1811   }
1812 #endif
1813   /* put together the new matrix */
1814   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
1815   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
1816   b    = (Mat_SeqAIJ*)(fact)->data;
1817 
1818   b->free_a       = PETSC_TRUE;
1819   b->free_ij      = PETSC_TRUE;
1820   b->singlemalloc = PETSC_FALSE;
1821 
1822   ierr = PetscMalloc1(bdiag[0]+1,&b->a);CHKERRQ(ierr);
1823 
1824   b->j    = bj;
1825   b->i    = bi;
1826   b->diag = bdiag;
1827   b->ilen = NULL;
1828   b->imax = NULL;
1829   b->row  = isrow;
1830   b->col  = iscol;
1831   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1832   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1833   b->icol = isicol;
1834 
1835   ierr = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
1836   /* In b structure:  Free imax, ilen, old a, old j.
1837      Allocate bdiag, solve_work, new a, new j */
1838   ierr     = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
1839   b->maxnz = b->nz = bdiag[0]+1;
1840 
1841   (fact)->info.factor_mallocs    = reallocs;
1842   (fact)->info.fill_ratio_given  = f;
1843   (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1844   (fact)->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1845   if (a->inode.size) {
1846     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1847   }
1848   ierr = MatSeqAIJCheckInode_FactorLU(fact);CHKERRQ(ierr);
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1853 {
1854   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1855   IS                 isicol;
1856   PetscErrorCode     ierr;
1857   const PetscInt     *r,*ic;
1858   PetscInt           n=A->rmap->n,*ai=a->i,*aj=a->j;
1859   PetscInt           *bi,*cols,nnz,*cols_lvl;
1860   PetscInt           *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1861   PetscInt           i,levels,diagonal_fill;
1862   PetscBool          col_identity,row_identity;
1863   PetscReal          f;
1864   PetscInt           nlnk,*lnk,*lnk_lvl=NULL;
1865   PetscBT            lnkbt;
1866   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1867   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
1868   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
1869   PetscBool          missing;
1870 
1871   PetscFunctionBegin;
1872   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1873   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
1874   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1875 
1876   f             = info->fill;
1877   levels        = (PetscInt)info->levels;
1878   diagonal_fill = (PetscInt)info->diagonal_fill;
1879 
1880   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
1881 
1882   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
1883   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
1884   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1885     ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr);
1886 
1887     (fact)->ops->lufactornumeric =  MatLUFactorNumeric_SeqAIJ_inplace;
1888     if (a->inode.size) {
1889       (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1890     }
1891     fact->factortype               = MAT_FACTOR_ILU;
1892     (fact)->info.factor_mallocs    = 0;
1893     (fact)->info.fill_ratio_given  = info->fill;
1894     (fact)->info.fill_ratio_needed = 1.0;
1895 
1896     b    = (Mat_SeqAIJ*)(fact)->data;
1897     b->row  = isrow;
1898     b->col  = iscol;
1899     b->icol = isicol;
1900     ierr    = PetscMalloc1((fact)->rmap->n+1,&b->solve_work);CHKERRQ(ierr);
1901     ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1902     ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1903     PetscFunctionReturn(0);
1904   }
1905 
1906   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
1907   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
1908 
1909   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1910   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
1911   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
1912   bi[0] = bdiag[0] = 0;
1913 
1914   ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr);
1915 
1916   /* create a linked list for storing column indices of the active row */
1917   nlnk = n + 1;
1918   ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1919 
1920   /* initial FreeSpace size is f*(ai[n]+1) */
1921   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
1922   current_space     = free_space;
1923   ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space_lvl);CHKERRQ(ierr);
1924   current_space_lvl = free_space_lvl;
1925 
1926   for (i=0; i<n; i++) {
1927     nzi = 0;
1928     /* copy current row into linked list */
1929     nnz = ai[r[i]+1] - ai[r[i]];
1930     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1931     cols   = aj + ai[r[i]];
1932     lnk[i] = -1; /* marker to indicate if diagonal exists */
1933     ierr   = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
1934     nzi   += nlnk;
1935 
1936     /* make sure diagonal entry is included */
1937     if (diagonal_fill && lnk[i] == -1) {
1938       fm = n;
1939       while (lnk[fm] < i) fm = lnk[fm];
1940       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1941       lnk[fm]    = i;
1942       lnk_lvl[i] = 0;
1943       nzi++; dcount++;
1944     }
1945 
1946     /* add pivot rows into the active row */
1947     nzbd = 0;
1948     prow = lnk[n];
1949     while (prow < i) {
1950       nnz      = bdiag[prow];
1951       cols     = bj_ptr[prow] + nnz + 1;
1952       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1953       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1954       ierr     = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr);
1955       nzi     += nlnk;
1956       prow     = lnk[prow];
1957       nzbd++;
1958     }
1959     bdiag[i] = nzbd;
1960     bi[i+1]  = bi[i] + nzi;
1961 
1962     /* if free space is not available, make more free space */
1963     if (current_space->local_remaining<nzi) {
1964       nnz  = PetscIntMultTruncate(nzi,n - i); /* estimated and max additional space needed */
1965       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
1966       ierr = PetscFreeSpaceGet(nnz,&current_space_lvl);CHKERRQ(ierr);
1967       reallocs++;
1968     }
1969 
1970     /* copy data into free_space and free_space_lvl, then initialize lnk */
1971     ierr         = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
1972     bj_ptr[i]    = current_space->array;
1973     bjlvl_ptr[i] = current_space_lvl->array;
1974 
1975     /* make sure the active row i has diagonal entry */
1976     if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1977 
1978     current_space->array               += nzi;
1979     current_space->local_used          += nzi;
1980     current_space->local_remaining     -= nzi;
1981     current_space_lvl->array           += nzi;
1982     current_space_lvl->local_used      += nzi;
1983     current_space_lvl->local_remaining -= nzi;
1984   }
1985 
1986   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
1987   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
1988 
1989   /* destroy list of free space and other temporary arrays */
1990   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
1991   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */
1992   ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
1993   ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
1994   ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr);
1995 
1996 #if defined(PETSC_USE_INFO)
1997   {
1998     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1999     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
2000     ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2001     ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr);
2002     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
2003     if (diagonal_fill) {
2004       ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals\n",dcount);CHKERRQ(ierr);
2005     }
2006   }
2007 #endif
2008 
2009   /* put together the new matrix */
2010   ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2011   ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr);
2012   b    = (Mat_SeqAIJ*)(fact)->data;
2013 
2014   b->free_a       = PETSC_TRUE;
2015   b->free_ij      = PETSC_TRUE;
2016   b->singlemalloc = PETSC_FALSE;
2017 
2018   ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr);
2019   b->j = bj;
2020   b->i = bi;
2021   for (i=0; i<n; i++) bdiag[i] += bi[i];
2022   b->diag = bdiag;
2023   b->ilen = NULL;
2024   b->imax = NULL;
2025   b->row  = isrow;
2026   b->col  = iscol;
2027   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
2028   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
2029   b->icol = isicol;
2030   ierr    = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
2031   /* In b structure:  Free imax, ilen, old a, old j.
2032      Allocate bdiag, solve_work, new a, new j */
2033   ierr     = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr);
2034   b->maxnz = b->nz = bi[n];
2035 
2036   (fact)->info.factor_mallocs    = reallocs;
2037   (fact)->info.fill_ratio_given  = f;
2038   (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2039   (fact)->ops->lufactornumeric   =  MatLUFactorNumeric_SeqAIJ_inplace;
2040   if (a->inode.size) {
2041     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2047 {
2048   Mat            C = B;
2049   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2050   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2051   IS             ip=b->row,iip = b->icol;
2052   PetscErrorCode ierr;
2053   const PetscInt *rip,*riip;
2054   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2055   PetscInt       *ai=a->i,*aj=a->j;
2056   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2057   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2058   PetscBool      perm_identity;
2059   FactorShiftCtx sctx;
2060   PetscReal      rs;
2061   MatScalar      d,*v;
2062 
2063   PetscFunctionBegin;
2064   /* MatPivotSetUp(): initialize shift context sctx */
2065   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2066 
2067   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2068     sctx.shift_top = info->zeropivot;
2069     for (i=0; i<mbs; i++) {
2070       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2071       d  = (aa)[a->diag[i]];
2072       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2073       v  = aa+ai[i];
2074       nz = ai[i+1] - ai[i];
2075       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2076       if (rs>sctx.shift_top) sctx.shift_top = rs;
2077     }
2078     sctx.shift_top *= 1.1;
2079     sctx.nshift_max = 5;
2080     sctx.shift_lo   = 0.;
2081     sctx.shift_hi   = 1.;
2082   }
2083 
2084   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2085   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2086 
2087   /* allocate working arrays
2088      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2089      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2090   */
2091   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr);
2092 
2093   do {
2094     sctx.newshift = PETSC_FALSE;
2095 
2096     for (i=0; i<mbs; i++) c2r[i] = mbs;
2097     if (mbs) il[0] = 0;
2098 
2099     for (k = 0; k<mbs; k++) {
2100       /* zero rtmp */
2101       nz    = bi[k+1] - bi[k];
2102       bjtmp = bj + bi[k];
2103       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2104 
2105       /* load in initial unfactored row */
2106       bval = ba + bi[k];
2107       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2108       for (j = jmin; j < jmax; j++) {
2109         col = riip[aj[j]];
2110         if (col >= k) { /* only take upper triangular entry */
2111           rtmp[col] = aa[j];
2112           *bval++   = 0.0; /* for in-place factorization */
2113         }
2114       }
2115       /* shift the diagonal of the matrix: ZeropivotApply() */
2116       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */
2117 
2118       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2119       dk = rtmp[k];
2120       i  = c2r[k]; /* first row to be added to k_th row  */
2121 
2122       while (i < k) {
2123         nexti = c2r[i]; /* next row to be added to k_th row */
2124 
2125         /* compute multiplier, update diag(k) and U(i,k) */
2126         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2127         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2128         dk     += uikdi*ba[ili]; /* update diag[k] */
2129         ba[ili] = uikdi; /* -U(i,k) */
2130 
2131         /* add multiple of row i to k-th row */
2132         jmin = ili + 1; jmax = bi[i+1];
2133         if (jmin < jmax) {
2134           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2135           /* update il and c2r for row i */
2136           il[i] = jmin;
2137           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2138         }
2139         i = nexti;
2140       }
2141 
2142       /* copy data into U(k,:) */
2143       rs   = 0.0;
2144       jmin = bi[k]; jmax = bi[k+1]-1;
2145       if (jmin < jmax) {
2146         for (j=jmin; j<jmax; j++) {
2147           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2148         }
2149         /* add the k-th row into il and c2r */
2150         il[k] = jmin;
2151         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2152       }
2153 
2154       /* MatPivotCheck() */
2155       sctx.rs = rs;
2156       sctx.pv = dk;
2157       ierr    = MatPivotCheck(B,A,info,&sctx,i);CHKERRQ(ierr);
2158       if (sctx.newshift) break;
2159       dk = sctx.pv;
2160 
2161       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2162     }
2163   } while (sctx.newshift);
2164 
2165   ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr);
2166   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2167   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2168 
2169   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2170   if (perm_identity) {
2171     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2172     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2173     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2174     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2175   } else {
2176     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2177     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2178     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2179     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2180   }
2181 
2182   C->assembled    = PETSC_TRUE;
2183   C->preallocated = PETSC_TRUE;
2184 
2185   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2186 
2187   /* MatPivotView() */
2188   if (sctx.nshift) {
2189     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2190       ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr);
2191     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2192       ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2193     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2194       ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr);
2195     }
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2201 {
2202   Mat            C = B;
2203   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
2204   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
2205   IS             ip=b->row,iip = b->icol;
2206   PetscErrorCode ierr;
2207   const PetscInt *rip,*riip;
2208   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2209   PetscInt       *ai=a->i,*aj=a->j;
2210   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2211   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2212   PetscBool      perm_identity;
2213   FactorShiftCtx sctx;
2214   PetscReal      rs;
2215   MatScalar      d,*v;
2216 
2217   PetscFunctionBegin;
2218   /* MatPivotSetUp(): initialize shift context sctx */
2219   ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr);
2220 
2221   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2222     sctx.shift_top = info->zeropivot;
2223     for (i=0; i<mbs; i++) {
2224       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2225       d  = (aa)[a->diag[i]];
2226       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2227       v  = aa+ai[i];
2228       nz = ai[i+1] - ai[i];
2229       for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]);
2230       if (rs>sctx.shift_top) sctx.shift_top = rs;
2231     }
2232     sctx.shift_top *= 1.1;
2233     sctx.nshift_max = 5;
2234     sctx.shift_lo   = 0.;
2235     sctx.shift_hi   = 1.;
2236   }
2237 
2238   ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr);
2239   ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr);
2240 
2241   /* initialization */
2242   ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr);
2243 
2244   do {
2245     sctx.newshift = PETSC_FALSE;
2246 
2247     for (i=0; i<mbs; i++) jl[i] = mbs;
2248     il[0] = 0;
2249 
2250     for (k = 0; k<mbs; k++) {
2251       /* zero rtmp */
2252       nz    = bi[k+1] - bi[k];
2253       bjtmp = bj + bi[k];
2254       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2255 
2256       bval = ba + bi[k];
2257       /* initialize k-th row by the perm[k]-th row of A */
2258       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2259       for (j = jmin; j < jmax; j++) {
2260         col = riip[aj[j]];
2261         if (col >= k) { /* only take upper triangular entry */
2262           rtmp[col] = aa[j];
2263           *bval++   = 0.0; /* for in-place factorization */
2264         }
2265       }
2266       /* shift the diagonal of the matrix */
2267       if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2268 
2269       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2270       dk = rtmp[k];
2271       i  = jl[k]; /* first row to be added to k_th row  */
2272 
2273       while (i < k) {
2274         nexti = jl[i]; /* next row to be added to k_th row */
2275 
2276         /* compute multiplier, update diag(k) and U(i,k) */
2277         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2278         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
2279         dk     += uikdi*ba[ili];
2280         ba[ili] = uikdi; /* -U(i,k) */
2281 
2282         /* add multiple of row i to k-th row */
2283         jmin = ili + 1; jmax = bi[i+1];
2284         if (jmin < jmax) {
2285           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2286           /* update il and jl for row i */
2287           il[i] = jmin;
2288           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2289         }
2290         i = nexti;
2291       }
2292 
2293       /* shift the diagonals when zero pivot is detected */
2294       /* compute rs=sum of abs(off-diagonal) */
2295       rs   = 0.0;
2296       jmin = bi[k]+1;
2297       nz   = bi[k+1] - jmin;
2298       bcol = bj + jmin;
2299       for (j=0; j<nz; j++) {
2300         rs += PetscAbsScalar(rtmp[bcol[j]]);
2301       }
2302 
2303       sctx.rs = rs;
2304       sctx.pv = dk;
2305       ierr    = MatPivotCheck(B,A,info,&sctx,k);CHKERRQ(ierr);
2306       if (sctx.newshift) break;
2307       dk = sctx.pv;
2308 
2309       /* copy data into U(k,:) */
2310       ba[bi[k]] = 1.0/dk; /* U(k,k) */
2311       jmin      = bi[k]+1; jmax = bi[k+1];
2312       if (jmin < jmax) {
2313         for (j=jmin; j<jmax; j++) {
2314           col = bj[j]; ba[j] = rtmp[col];
2315         }
2316         /* add the k-th row into il and jl */
2317         il[k] = jmin;
2318         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2319       }
2320     }
2321   } while (sctx.newshift);
2322 
2323   ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr);
2324   ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr);
2325   ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr);
2326 
2327   ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr);
2328   if (perm_identity) {
2329     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2330     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2331     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2332     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2333   } else {
2334     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2335     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2336     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2337     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2338   }
2339 
2340   C->assembled    = PETSC_TRUE;
2341   C->preallocated = PETSC_TRUE;
2342 
2343   ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr);
2344   if (sctx.nshift) {
2345     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2346       ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2347     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2348       ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr);
2349     }
2350   }
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 /*
2355    icc() under revised new data structure.
2356    Factored arrays bj and ba are stored as
2357      U(0,:),...,U(i,:),U(n-1,:)
2358 
2359    ui=fact->i is an array of size n+1, in which
2360    ui+
2361      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2362      ui[n]:  points to U(n-1,n-1)+1
2363 
2364   udiag=fact->diag is an array of size n,in which
2365      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2366 
2367    U(i,:) contains udiag[i] as its last entry, i.e.,
2368     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2369 */
2370 
2371 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2372 {
2373   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2374   Mat_SeqSBAIJ       *b;
2375   PetscErrorCode     ierr;
2376   PetscBool          perm_identity,missing;
2377   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2378   const PetscInt     *rip,*riip;
2379   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2380   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2381   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2382   PetscReal          fill          =info->fill,levels=info->levels;
2383   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2384   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2385   PetscBT            lnkbt;
2386   IS                 iperm;
2387 
2388   PetscFunctionBegin;
2389   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2390   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2391   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2392   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2393   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2394 
2395   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2396   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2397   ui[0] = 0;
2398 
2399   /* ICC(0) without matrix ordering: simply rearrange column indices */
2400   if (!levels && perm_identity) {
2401     for (i=0; i<am; i++) {
2402       ncols    = ai[i+1] - a->diag[i];
2403       ui[i+1]  = ui[i] + ncols;
2404       udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2405     }
2406     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2407     cols = uj;
2408     for (i=0; i<am; i++) {
2409       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2410       ncols = ai[i+1] - a->diag[i] -1;
2411       for (j=0; j<ncols; j++) *cols++ = aj[j];
2412       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2413     }
2414   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2415     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2416     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2417 
2418     /* initialization */
2419     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2420 
2421     /* jl: linked list for storing indices of the pivot rows
2422        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2423     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2424     for (i=0; i<am; i++) {
2425       jl[i] = am; il[i] = 0;
2426     }
2427 
2428     /* create and initialize a linked list for storing column indices of the active row k */
2429     nlnk = am + 1;
2430     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2431 
2432     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2433     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2434     current_space     = free_space;
2435     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr);
2436     current_space_lvl = free_space_lvl;
2437 
2438     for (k=0; k<am; k++) {  /* for each active row k */
2439       /* initialize lnk by the column indices of row rip[k] of A */
2440       nzk   = 0;
2441       ncols = ai[rip[k]+1] - ai[rip[k]];
2442       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2443       ncols_upper = 0;
2444       for (j=0; j<ncols; j++) {
2445         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2446         if (riip[i] >= k) { /* only take upper triangular entry */
2447           ajtmp[ncols_upper] = i;
2448           ncols_upper++;
2449         }
2450       }
2451       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2452       nzk += nlnk;
2453 
2454       /* update lnk by computing fill-in for each pivot row to be merged in */
2455       prow = jl[k]; /* 1st pivot row */
2456 
2457       while (prow < k) {
2458         nextprow = jl[prow];
2459 
2460         /* merge prow into k-th row */
2461         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2462         jmax  = ui[prow+1];
2463         ncols = jmax-jmin;
2464         i     = jmin - ui[prow];
2465         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2466         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2467         j     = *(uj - 1);
2468         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2469         nzk  += nlnk;
2470 
2471         /* update il and jl for prow */
2472         if (jmin < jmax) {
2473           il[prow] = jmin;
2474           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2475         }
2476         prow = nextprow;
2477       }
2478 
2479       /* if free space is not available, make more free space */
2480       if (current_space->local_remaining<nzk) {
2481         i    = am - k + 1; /* num of unfactored rows */
2482         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2483         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2484         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2485         reallocs++;
2486       }
2487 
2488       /* copy data into free_space and free_space_lvl, then initialize lnk */
2489       if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2490       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2491 
2492       /* add the k-th row into il and jl */
2493       if (nzk > 1) {
2494         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2495         jl[k] = jl[i]; jl[i] = k;
2496         il[k] = ui[k] + 1;
2497       }
2498       uj_ptr[k]     = current_space->array;
2499       uj_lvl_ptr[k] = current_space_lvl->array;
2500 
2501       current_space->array           += nzk;
2502       current_space->local_used      += nzk;
2503       current_space->local_remaining -= nzk;
2504 
2505       current_space_lvl->array           += nzk;
2506       current_space_lvl->local_used      += nzk;
2507       current_space_lvl->local_remaining -= nzk;
2508 
2509       ui[k+1] = ui[k] + nzk;
2510     }
2511 
2512     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2513     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2514     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2515     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2516 
2517     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2518     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2519     ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor  */
2520     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2521     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2522 
2523   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2524 
2525   /* put together the new matrix in MATSEQSBAIJ format */
2526   b               = (Mat_SeqSBAIJ*)(fact)->data;
2527   b->singlemalloc = PETSC_FALSE;
2528 
2529   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2530 
2531   b->j             = uj;
2532   b->i             = ui;
2533   b->diag          = udiag;
2534   b->free_diag     = PETSC_TRUE;
2535   b->ilen          = NULL;
2536   b->imax          = NULL;
2537   b->row           = perm;
2538   b->col           = perm;
2539   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2540   ierr             = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2541   b->icol          = iperm;
2542   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2543 
2544   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2545   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2546 
2547   b->maxnz   = b->nz = ui[am];
2548   b->free_a  = PETSC_TRUE;
2549   b->free_ij = PETSC_TRUE;
2550 
2551   fact->info.factor_mallocs   = reallocs;
2552   fact->info.fill_ratio_given = fill;
2553   if (ai[am] != 0) {
2554     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2555     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2556   } else {
2557     fact->info.fill_ratio_needed = 0.0;
2558   }
2559 #if defined(PETSC_USE_INFO)
2560   if (ai[am] != 0) {
2561     PetscReal af = fact->info.fill_ratio_needed;
2562     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2563     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2564     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2565   } else {
2566     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2567   }
2568 #endif
2569   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2574 {
2575   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2576   Mat_SeqSBAIJ       *b;
2577   PetscErrorCode     ierr;
2578   PetscBool          perm_identity,missing;
2579   PetscInt           reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2580   const PetscInt     *rip,*riip;
2581   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2582   PetscInt           nlnk,*lnk,*lnk_lvl=NULL,d;
2583   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2584   PetscReal          fill          =info->fill,levels=info->levels;
2585   PetscFreeSpaceList free_space    =NULL,current_space=NULL;
2586   PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL;
2587   PetscBT            lnkbt;
2588   IS                 iperm;
2589 
2590   PetscFunctionBegin;
2591   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2592   ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr);
2593   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2594   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2595   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2596 
2597   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2598   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2599   ui[0] = 0;
2600 
2601   /* ICC(0) without matrix ordering: simply copies fill pattern */
2602   if (!levels && perm_identity) {
2603 
2604     for (i=0; i<am; i++) {
2605       ui[i+1]  = ui[i] + ai[i+1] - a->diag[i];
2606       udiag[i] = ui[i];
2607     }
2608     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2609     cols = uj;
2610     for (i=0; i<am; i++) {
2611       aj    = a->j + a->diag[i];
2612       ncols = ui[i+1] - ui[i];
2613       for (j=0; j<ncols; j++) *cols++ = *aj++;
2614     }
2615   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2616     ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2617     ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2618 
2619     /* initialization */
2620     ierr = PetscMalloc1(am+1,&ajtmp);CHKERRQ(ierr);
2621 
2622     /* jl: linked list for storing indices of the pivot rows
2623        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2624     ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr);
2625     for (i=0; i<am; i++) {
2626       jl[i] = am; il[i] = 0;
2627     }
2628 
2629     /* create and initialize a linked list for storing column indices of the active row k */
2630     nlnk = am + 1;
2631     ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2632 
2633     /* initial FreeSpace size is fill*(ai[am]+1) */
2634     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2635     current_space     = free_space;
2636     ierr              = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space_lvl);CHKERRQ(ierr);
2637     current_space_lvl = free_space_lvl;
2638 
2639     for (k=0; k<am; k++) {  /* for each active row k */
2640       /* initialize lnk by the column indices of row rip[k] of A */
2641       nzk   = 0;
2642       ncols = ai[rip[k]+1] - ai[rip[k]];
2643       if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2644       ncols_upper = 0;
2645       for (j=0; j<ncols; j++) {
2646         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2647         if (riip[i] >= k) { /* only take upper triangular entry */
2648           ajtmp[ncols_upper] = i;
2649           ncols_upper++;
2650         }
2651       }
2652       ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr);
2653       nzk += nlnk;
2654 
2655       /* update lnk by computing fill-in for each pivot row to be merged in */
2656       prow = jl[k]; /* 1st pivot row */
2657 
2658       while (prow < k) {
2659         nextprow = jl[prow];
2660 
2661         /* merge prow into k-th row */
2662         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2663         jmax  = ui[prow+1];
2664         ncols = jmax-jmin;
2665         i     = jmin - ui[prow];
2666         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2667         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2668         j     = *(uj - 1);
2669         ierr  = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr);
2670         nzk  += nlnk;
2671 
2672         /* update il and jl for prow */
2673         if (jmin < jmax) {
2674           il[prow] = jmin;
2675           j        = *cols; jl[prow] = jl[j]; jl[j] = prow;
2676         }
2677         prow = nextprow;
2678       }
2679 
2680       /* if free space is not available, make more free space */
2681       if (current_space->local_remaining<nzk) {
2682         i    = am - k + 1; /* num of unfactored rows */
2683         i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2684         ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2685         ierr = PetscFreeSpaceGet(i,&current_space_lvl);CHKERRQ(ierr);
2686         reallocs++;
2687       }
2688 
2689       /* copy data into free_space and free_space_lvl, then initialize lnk */
2690       if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2691       ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr);
2692 
2693       /* add the k-th row into il and jl */
2694       if (nzk > 1) {
2695         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2696         jl[k] = jl[i]; jl[i] = k;
2697         il[k] = ui[k] + 1;
2698       }
2699       uj_ptr[k]     = current_space->array;
2700       uj_lvl_ptr[k] = current_space_lvl->array;
2701 
2702       current_space->array           += nzk;
2703       current_space->local_used      += nzk;
2704       current_space->local_remaining -= nzk;
2705 
2706       current_space_lvl->array           += nzk;
2707       current_space_lvl->local_used      += nzk;
2708       current_space_lvl->local_remaining -= nzk;
2709 
2710       ui[k+1] = ui[k] + nzk;
2711     }
2712 
2713 #if defined(PETSC_USE_INFO)
2714     if (ai[am] != 0) {
2715       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2716       ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2717       ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2718       ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2719     } else {
2720       ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2721     }
2722 #endif
2723 
2724     ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2725     ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2726     ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr);
2727     ierr = PetscFree(ajtmp);CHKERRQ(ierr);
2728 
2729     /* destroy list of free space and other temporary array(s) */
2730     ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2731     ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
2732     ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2733     ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr);
2734 
2735   } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2736 
2737   /* put together the new matrix in MATSEQSBAIJ format */
2738 
2739   b               = (Mat_SeqSBAIJ*)fact->data;
2740   b->singlemalloc = PETSC_FALSE;
2741 
2742   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2743 
2744   b->j         = uj;
2745   b->i         = ui;
2746   b->diag      = udiag;
2747   b->free_diag = PETSC_TRUE;
2748   b->ilen      = NULL;
2749   b->imax      = NULL;
2750   b->row       = perm;
2751   b->col       = perm;
2752 
2753   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2754   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2755 
2756   b->icol          = iperm;
2757   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2758   ierr             = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2759   ierr             = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2760   b->maxnz         = b->nz = ui[am];
2761   b->free_a        = PETSC_TRUE;
2762   b->free_ij       = PETSC_TRUE;
2763 
2764   fact->info.factor_mallocs   = reallocs;
2765   fact->info.fill_ratio_given = fill;
2766   if (ai[am] != 0) {
2767     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2768   } else {
2769     fact->info.fill_ratio_needed = 0.0;
2770   }
2771   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2776 {
2777   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2778   Mat_SeqSBAIJ       *b;
2779   PetscErrorCode     ierr;
2780   PetscBool          perm_identity,missing;
2781   PetscReal          fill = info->fill;
2782   const PetscInt     *rip,*riip;
2783   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2784   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2785   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2786   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2787   PetscBT            lnkbt;
2788   IS                 iperm;
2789 
2790   PetscFunctionBegin;
2791   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2792   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2793   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2794 
2795   /* check whether perm is the identity mapping */
2796   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2797   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2798   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2799   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2800 
2801   /* initialization */
2802   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2803   ierr  = PetscMalloc1(am+1,&udiag);CHKERRQ(ierr);
2804   ui[0] = 0;
2805 
2806   /* jl: linked list for storing indices of the pivot rows
2807      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2808   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2809   for (i=0; i<am; i++) {
2810     jl[i] = am; il[i] = 0;
2811   }
2812 
2813   /* create and initialize a linked list for storing column indices of the active row k */
2814   nlnk = am + 1;
2815   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2816 
2817   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2818   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,(ai[am]+am)/2),&free_space);CHKERRQ(ierr);
2819   current_space = free_space;
2820 
2821   for (k=0; k<am; k++) {  /* for each active row k */
2822     /* initialize lnk by the column indices of row rip[k] of A */
2823     nzk   = 0;
2824     ncols = ai[rip[k]+1] - ai[rip[k]];
2825     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2826     ncols_upper = 0;
2827     for (j=0; j<ncols; j++) {
2828       i = riip[*(aj + ai[rip[k]] + j)];
2829       if (i >= k) { /* only take upper triangular entry */
2830         cols[ncols_upper] = i;
2831         ncols_upper++;
2832       }
2833     }
2834     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2835     nzk += nlnk;
2836 
2837     /* update lnk by computing fill-in for each pivot row to be merged in */
2838     prow = jl[k]; /* 1st pivot row */
2839 
2840     while (prow < k) {
2841       nextprow = jl[prow];
2842       /* merge prow into k-th row */
2843       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2844       jmax   = ui[prow+1];
2845       ncols  = jmax-jmin;
2846       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2847       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2848       nzk   += nlnk;
2849 
2850       /* update il and jl for prow */
2851       if (jmin < jmax) {
2852         il[prow] = jmin;
2853         j        = *uj_ptr;
2854         jl[prow] = jl[j];
2855         jl[j]    = prow;
2856       }
2857       prow = nextprow;
2858     }
2859 
2860     /* if free space is not available, make more free space */
2861     if (current_space->local_remaining<nzk) {
2862       i    = am - k + 1; /* num of unfactored rows */
2863       i    = PetscIntMultTruncate(i,PetscMin(nzk,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2864       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
2865       reallocs++;
2866     }
2867 
2868     /* copy data into free space, then initialize lnk */
2869     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
2870 
2871     /* add the k-th row into il and jl */
2872     if (nzk > 1) {
2873       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2874       jl[k] = jl[i]; jl[i] = k;
2875       il[k] = ui[k] + 1;
2876     }
2877     ui_ptr[k] = current_space->array;
2878 
2879     current_space->array           += nzk;
2880     current_space->local_used      += nzk;
2881     current_space->local_remaining -= nzk;
2882 
2883     ui[k+1] = ui[k] + nzk;
2884   }
2885 
2886   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
2887   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
2888   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
2889 
2890   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2891   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
2892   ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */
2893   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
2894 
2895   /* put together the new matrix in MATSEQSBAIJ format */
2896 
2897   b               = (Mat_SeqSBAIJ*)fact->data;
2898   b->singlemalloc = PETSC_FALSE;
2899   b->free_a       = PETSC_TRUE;
2900   b->free_ij      = PETSC_TRUE;
2901 
2902   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
2903 
2904   b->j         = uj;
2905   b->i         = ui;
2906   b->diag      = udiag;
2907   b->free_diag = PETSC_TRUE;
2908   b->ilen      = NULL;
2909   b->imax      = NULL;
2910   b->row       = perm;
2911   b->col       = perm;
2912 
2913   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2914   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
2915 
2916   b->icol          = iperm;
2917   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2918 
2919   ierr = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
2920   ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
2921 
2922   b->maxnz = b->nz = ui[am];
2923 
2924   fact->info.factor_mallocs   = reallocs;
2925   fact->info.fill_ratio_given = fill;
2926   if (ai[am] != 0) {
2927     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2928     fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am);
2929   } else {
2930     fact->info.fill_ratio_needed = 0.0;
2931   }
2932 #if defined(PETSC_USE_INFO)
2933   if (ai[am] != 0) {
2934     PetscReal af = fact->info.fill_ratio_needed;
2935     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
2936     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
2937     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
2938   } else {
2939     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
2940   }
2941 #endif
2942   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2947 {
2948   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
2949   Mat_SeqSBAIJ       *b;
2950   PetscErrorCode     ierr;
2951   PetscBool          perm_identity,missing;
2952   PetscReal          fill = info->fill;
2953   const PetscInt     *rip,*riip;
2954   PetscInt           i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2955   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2956   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2957   PetscFreeSpaceList free_space=NULL,current_space=NULL;
2958   PetscBT            lnkbt;
2959   IS                 iperm;
2960 
2961   PetscFunctionBegin;
2962   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2963   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
2964   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
2965 
2966   /* check whether perm is the identity mapping */
2967   ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr);
2968   ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr);
2969   ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr);
2970   ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr);
2971 
2972   /* initialization */
2973   ierr  = PetscMalloc1(am+1,&ui);CHKERRQ(ierr);
2974   ui[0] = 0;
2975 
2976   /* jl: linked list for storing indices of the pivot rows
2977      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2978   ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr);
2979   for (i=0; i<am; i++) {
2980     jl[i] = am; il[i] = 0;
2981   }
2982 
2983   /* create and initialize a linked list for storing column indices of the active row k */
2984   nlnk = am + 1;
2985   ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
2986 
2987   /* initial FreeSpace size is fill*(ai[am]+1) */
2988   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[am]+1),&free_space);CHKERRQ(ierr);
2989   current_space = free_space;
2990 
2991   for (k=0; k<am; k++) {  /* for each active row k */
2992     /* initialize lnk by the column indices of row rip[k] of A */
2993     nzk   = 0;
2994     ncols = ai[rip[k]+1] - ai[rip[k]];
2995     if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2996     ncols_upper = 0;
2997     for (j=0; j<ncols; j++) {
2998       i = riip[*(aj + ai[rip[k]] + j)];
2999       if (i >= k) { /* only take upper triangular entry */
3000         cols[ncols_upper] = i;
3001         ncols_upper++;
3002       }
3003     }
3004     ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3005     nzk += nlnk;
3006 
3007     /* update lnk by computing fill-in for each pivot row to be merged in */
3008     prow = jl[k]; /* 1st pivot row */
3009 
3010     while (prow < k) {
3011       nextprow = jl[prow];
3012       /* merge prow into k-th row */
3013       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3014       jmax   = ui[prow+1];
3015       ncols  = jmax-jmin;
3016       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3017       ierr   = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3018       nzk   += nlnk;
3019 
3020       /* update il and jl for prow */
3021       if (jmin < jmax) {
3022         il[prow] = jmin;
3023         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3024       }
3025       prow = nextprow;
3026     }
3027 
3028     /* if free space is not available, make more free space */
3029     if (current_space->local_remaining<nzk) {
3030       i    = am - k + 1; /* num of unfactored rows */
3031       i    = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3032       ierr = PetscFreeSpaceGet(i,&current_space);CHKERRQ(ierr);
3033       reallocs++;
3034     }
3035 
3036     /* copy data into free space, then initialize lnk */
3037     ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3038 
3039     /* add the k-th row into il and jl */
3040     if (nzk-1 > 0) {
3041       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3042       jl[k] = jl[i]; jl[i] = k;
3043       il[k] = ui[k] + 1;
3044     }
3045     ui_ptr[k] = current_space->array;
3046 
3047     current_space->array           += nzk;
3048     current_space->local_used      += nzk;
3049     current_space->local_remaining -= nzk;
3050 
3051     ui[k+1] = ui[k] + nzk;
3052   }
3053 
3054 #if defined(PETSC_USE_INFO)
3055   if (ai[am] != 0) {
3056     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3057     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr);
3058     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
3059     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr);
3060   } else {
3061     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
3062   }
3063 #endif
3064 
3065   ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr);
3066   ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr);
3067   ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr);
3068 
3069   /* destroy list of free space and other temporary array(s) */
3070   ierr = PetscMalloc1(ui[am]+1,&uj);CHKERRQ(ierr);
3071   ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr);
3072   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3073 
3074   /* put together the new matrix in MATSEQSBAIJ format */
3075 
3076   b               = (Mat_SeqSBAIJ*)fact->data;
3077   b->singlemalloc = PETSC_FALSE;
3078   b->free_a       = PETSC_TRUE;
3079   b->free_ij      = PETSC_TRUE;
3080 
3081   ierr = PetscMalloc1(ui[am]+1,&b->a);CHKERRQ(ierr);
3082 
3083   b->j    = uj;
3084   b->i    = ui;
3085   b->diag = NULL;
3086   b->ilen = NULL;
3087   b->imax = NULL;
3088   b->row  = perm;
3089   b->col  = perm;
3090 
3091   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3092   ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr);
3093 
3094   b->icol          = iperm;
3095   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3096 
3097   ierr     = PetscMalloc1(am+1,&b->solve_work);CHKERRQ(ierr);
3098   ierr     = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3099   b->maxnz = b->nz = ui[am];
3100 
3101   fact->info.factor_mallocs   = reallocs;
3102   fact->info.fill_ratio_given = fill;
3103   if (ai[am] != 0) {
3104     fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3105   } else {
3106     fact->info.fill_ratio_needed = 0.0;
3107   }
3108   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3113 {
3114   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)A->data;
3115   PetscErrorCode    ierr;
3116   PetscInt          n   = A->rmap->n;
3117   const PetscInt    *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3118   PetscScalar       *x,sum;
3119   const PetscScalar *b;
3120   const MatScalar   *aa = a->a,*v;
3121   PetscInt          i,nz;
3122 
3123   PetscFunctionBegin;
3124   if (!n) PetscFunctionReturn(0);
3125 
3126   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3127   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
3128 
3129   /* forward solve the lower triangular */
3130   x[0] = b[0];
3131   v    = aa;
3132   vi   = aj;
3133   for (i=1; i<n; i++) {
3134     nz  = ai[i+1] - ai[i];
3135     sum = b[i];
3136     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3137     v   += nz;
3138     vi  += nz;
3139     x[i] = sum;
3140   }
3141 
3142   /* backward solve the upper triangular */
3143   for (i=n-1; i>=0; i--) {
3144     v   = aa + adiag[i+1] + 1;
3145     vi  = aj + adiag[i+1] + 1;
3146     nz  = adiag[i] - adiag[i+1]-1;
3147     sum = x[i];
3148     PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3149     x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3150   }
3151 
3152   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3153   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3154   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
3155   PetscFunctionReturn(0);
3156 }
3157 
3158 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3159 {
3160   Mat_SeqAIJ        *a    = (Mat_SeqAIJ*)A->data;
3161   IS                iscol = a->col,isrow = a->row;
3162   PetscErrorCode    ierr;
3163   PetscInt          i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3164   const PetscInt    *rout,*cout,*r,*c;
3165   PetscScalar       *x,*tmp,sum;
3166   const PetscScalar *b;
3167   const MatScalar   *aa = a->a,*v;
3168 
3169   PetscFunctionBegin;
3170   if (!n) PetscFunctionReturn(0);
3171 
3172   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3173   ierr = VecGetArrayWrite(xx,&x);CHKERRQ(ierr);
3174   tmp  = a->solve_work;
3175 
3176   ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout;
3177   ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout;
3178 
3179   /* forward solve the lower triangular */
3180   tmp[0] = b[r[0]];
3181   v      = aa;
3182   vi     = aj;
3183   for (i=1; i<n; i++) {
3184     nz  = ai[i+1] - ai[i];
3185     sum = b[r[i]];
3186     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3187     tmp[i] = sum;
3188     v     += nz; vi += nz;
3189   }
3190 
3191   /* backward solve the upper triangular */
3192   for (i=n-1; i>=0; i--) {
3193     v   = aa + adiag[i+1]+1;
3194     vi  = aj + adiag[i+1]+1;
3195     nz  = adiag[i]-adiag[i+1]-1;
3196     sum = tmp[i];
3197     PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3198     x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3199   }
3200 
3201   ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr);
3202   ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr);
3203   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3204   ierr = VecRestoreArrayWrite(xx,&x);CHKERRQ(ierr);
3205   ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr);
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 /*
3210     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3211 */
3212 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3213 {
3214   Mat            B = *fact;
3215   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b;
3216   IS             isicol;
3217   PetscErrorCode ierr;
3218   const PetscInt *r,*ic;
3219   PetscInt       i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3220   PetscInt       *bi,*bj,*bdiag,*bdiag_rev;
3221   PetscInt       row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3222   PetscInt       nlnk,*lnk;
3223   PetscBT        lnkbt;
3224   PetscBool      row_identity,icol_identity;
3225   MatScalar      *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3226   const PetscInt *ics;
3227   PetscInt       j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3228   PetscReal      dt     =info->dt,shift=info->shiftamount;
3229   PetscInt       dtcount=(PetscInt)info->dtcount,nnz_max;
3230   PetscBool      missing;
3231 
3232   PetscFunctionBegin;
3233   if (dt      == PETSC_DEFAULT) dt = 0.005;
3234   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3235 
3236   /* ------- symbolic factorization, can be reused ---------*/
3237   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
3238   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3239   adiag=a->diag;
3240 
3241   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
3242 
3243   /* bdiag is location of diagonal in factor */
3244   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);     /* becomes b->diag */
3245   ierr = PetscMalloc1(n+1,&bdiag_rev);CHKERRQ(ierr); /* temporary */
3246 
3247   /* allocate row pointers bi */
3248   ierr = PetscMalloc1(2*n+2,&bi);CHKERRQ(ierr);
3249 
3250   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3251   if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3252   nnz_max = ai[n]+2*n*dtcount+2;
3253 
3254   ierr = PetscMalloc1(nnz_max+1,&bj);CHKERRQ(ierr);
3255   ierr = PetscMalloc1(nnz_max+1,&ba);CHKERRQ(ierr);
3256 
3257   /* put together the new matrix */
3258   ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3259   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
3260   b    = (Mat_SeqAIJ*)B->data;
3261 
3262   b->free_a       = PETSC_TRUE;
3263   b->free_ij      = PETSC_TRUE;
3264   b->singlemalloc = PETSC_FALSE;
3265 
3266   b->a    = ba;
3267   b->j    = bj;
3268   b->i    = bi;
3269   b->diag = bdiag;
3270   b->ilen = NULL;
3271   b->imax = NULL;
3272   b->row  = isrow;
3273   b->col  = iscol;
3274   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
3275   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
3276   b->icol = isicol;
3277 
3278   ierr     = PetscMalloc1(n+1,&b->solve_work);CHKERRQ(ierr);
3279   ierr     = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr);
3280   b->maxnz = nnz_max;
3281 
3282   B->factortype            = MAT_FACTOR_ILUDT;
3283   B->info.factor_mallocs   = 0;
3284   B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3285   /* ------- end of symbolic factorization ---------*/
3286 
3287   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3288   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3289   ics  = ic;
3290 
3291   /* linked list for storing column indices of the active row */
3292   nlnk = n + 1;
3293   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3294 
3295   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3296   ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr);
3297   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3298   ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr);
3299   ierr = PetscArrayzero(rtmp,n);CHKERRQ(ierr);
3300 
3301   bi[0]        = 0;
3302   bdiag[0]     = nnz_max-1; /* location of diag[0] in factor B */
3303   bdiag_rev[n] = bdiag[0];
3304   bi[2*n+1]    = bdiag[0]+1; /* endof bj and ba array */
3305   for (i=0; i<n; i++) {
3306     /* copy initial fill into linked list */
3307     nzi = ai[r[i]+1] - ai[r[i]];
3308     if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3309     nzi_al = adiag[r[i]] - ai[r[i]];
3310     nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3311     ajtmp  = aj + ai[r[i]];
3312     ierr   = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3313 
3314     /* load in initial (unfactored row) */
3315     aatmp = a->a + ai[r[i]];
3316     for (j=0; j<nzi; j++) {
3317       rtmp[ics[*ajtmp++]] = *aatmp++;
3318     }
3319 
3320     /* add pivot rows into linked list */
3321     row = lnk[n];
3322     while (row < i) {
3323       nzi_bl = bi[row+1] - bi[row] + 1;
3324       bjtmp  = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3325       ierr   = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr);
3326       nzi   += nlnk;
3327       row    = lnk[row];
3328     }
3329 
3330     /* copy data from lnk into jtmp, then initialize lnk */
3331     ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr);
3332 
3333     /* numerical factorization */
3334     bjtmp = jtmp;
3335     row   = *bjtmp++; /* 1st pivot row */
3336     while (row < i) {
3337       pc         = rtmp + row;
3338       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3339       multiplier = (*pc) * (*pv);
3340       *pc        = multiplier;
3341       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3342         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3343         pv = ba + bdiag[row+1] + 1;
3344         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3345         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3346         ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
3347       }
3348       row = *bjtmp++;
3349     }
3350 
3351     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3352     diag_tmp = rtmp[i];  /* save diagonal value - may not needed?? */
3353     nzi_bl   = 0; j = 0;
3354     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3355       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3356       nzi_bl++; j++;
3357     }
3358     nzi_bu = nzi - nzi_bl -1;
3359     while (j < nzi) {
3360       vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3361       j++;
3362     }
3363 
3364     bjtmp = bj + bi[i];
3365     batmp = ba + bi[i];
3366     /* apply level dropping rule to L part */
3367     ncut = nzi_al + dtcount;
3368     if (ncut < nzi_bl) {
3369       ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr);
3370       ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr);
3371     } else {
3372       ncut = nzi_bl;
3373     }
3374     for (j=0; j<ncut; j++) {
3375       bjtmp[j] = jtmp[j];
3376       batmp[j] = vtmp[j];
3377     }
3378     bi[i+1] = bi[i] + ncut;
3379     nzi     = ncut + 1;
3380 
3381     /* apply level dropping rule to U part */
3382     ncut = nzi_au + dtcount;
3383     if (ncut < nzi_bu) {
3384       ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr);
3385       ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr);
3386     } else {
3387       ncut = nzi_bu;
3388     }
3389     nzi += ncut;
3390 
3391     /* mark bdiagonal */
3392     bdiag[i+1]       = bdiag[i] - (ncut + 1);
3393     bdiag_rev[n-i-1] = bdiag[i+1];
3394     bi[2*n - i]      = bi[2*n - i +1] - (ncut + 1);
3395     bjtmp            = bj + bdiag[i];
3396     batmp            = ba + bdiag[i];
3397     *bjtmp           = i;
3398     *batmp           = diag_tmp; /* rtmp[i]; */
3399     if (*batmp == 0.0) {
3400       *batmp = dt+shift;
3401     }
3402     *batmp = 1.0/(*batmp); /* invert diagonal entries for simpler triangular solves */
3403 
3404     bjtmp = bj + bdiag[i+1]+1;
3405     batmp = ba + bdiag[i+1]+1;
3406     for (k=0; k<ncut; k++) {
3407       bjtmp[k] = jtmp[nzi_bl+1+k];
3408       batmp[k] = vtmp[nzi_bl+1+k];
3409     }
3410 
3411     im[i] = nzi;   /* used by PetscLLAddSortedLU() */
3412   } /* for (i=0; i<n; i++) */
3413   if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3414 
3415   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3416   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3417 
3418   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3419   ierr = PetscFree2(im,jtmp);CHKERRQ(ierr);
3420   ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr);
3421   ierr = PetscFree(bdiag_rev);CHKERRQ(ierr);
3422 
3423   ierr     = PetscLogFlops(B->cmap->n);CHKERRQ(ierr);
3424   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3425 
3426   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3427   ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr);
3428   if (row_identity && icol_identity) {
3429     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3430   } else {
3431     B->ops->solve = MatSolve_SeqAIJ;
3432   }
3433 
3434   B->ops->solveadd          = NULL;
3435   B->ops->solvetranspose    = NULL;
3436   B->ops->solvetransposeadd = NULL;
3437   B->ops->matsolve          = NULL;
3438   B->assembled              = PETSC_TRUE;
3439   B->preallocated           = PETSC_TRUE;
3440   PetscFunctionReturn(0);
3441 }
3442 
3443 /* a wraper of MatILUDTFactor_SeqAIJ() */
3444 /*
3445     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3446 */
3447 
3448 PetscErrorCode  MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3449 {
3450   PetscErrorCode ierr;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr);
3454   PetscFunctionReturn(0);
3455 }
3456 
3457 /*
3458    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3459    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3460 */
3461 /*
3462     This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3463 */
3464 
3465 PetscErrorCode  MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3466 {
3467   Mat            C     =fact;
3468   Mat_SeqAIJ     *a    =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data;
3469   IS             isrow = b->row,isicol = b->icol;
3470   PetscErrorCode ierr;
3471   const PetscInt *r,*ic,*ics;
3472   PetscInt       i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3473   PetscInt       *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3474   MatScalar      *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3475   PetscReal      dt=info->dt,shift=info->shiftamount;
3476   PetscBool      row_identity, col_identity;
3477 
3478   PetscFunctionBegin;
3479   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
3480   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
3481   ierr = PetscMalloc1(n+1,&rtmp);CHKERRQ(ierr);
3482   ics  = ic;
3483 
3484   for (i=0; i<n; i++) {
3485     /* initialize rtmp array */
3486     nzl   = bi[i+1] - bi[i];       /* num of nozeros in L(i,:) */
3487     bjtmp = bj + bi[i];
3488     for  (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3489     rtmp[i] = 0.0;
3490     nzu     = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3491     bjtmp   = bj + bdiag[i+1] + 1;
3492     for  (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3493 
3494     /* load in initial unfactored row of A */
3495     nz    = ai[r[i]+1] - ai[r[i]];
3496     ajtmp = aj + ai[r[i]];
3497     v     = aa + ai[r[i]];
3498     for (j=0; j<nz; j++) {
3499       rtmp[ics[*ajtmp++]] = v[j];
3500     }
3501 
3502     /* numerical factorization */
3503     bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3504     nzl   = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3505     k     = 0;
3506     while (k < nzl) {
3507       row        = *bjtmp++;
3508       pc         = rtmp + row;
3509       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3510       multiplier = (*pc) * (*pv);
3511       *pc        = multiplier;
3512       if (PetscAbsScalar(multiplier) > dt) {
3513         pj = bj + bdiag[row+1] + 1;         /* point to 1st entry of U(row,:) */
3514         pv = b->a + bdiag[row+1] + 1;
3515         nz = bdiag[row] - bdiag[row+1] - 1;         /* num of entries in U(row,:), excluding diagonal */
3516         for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3517         ierr = PetscLogFlops(1+2.0*nz);CHKERRQ(ierr);
3518       }
3519       k++;
3520     }
3521 
3522     /* finished row so stick it into b->a */
3523     /* L-part */
3524     pv  = b->a + bi[i];
3525     pj  = bj + bi[i];
3526     nzl = bi[i+1] - bi[i];
3527     for (j=0; j<nzl; j++) {
3528       pv[j] = rtmp[pj[j]];
3529     }
3530 
3531     /* diagonal: invert diagonal entries for simpler triangular solves */
3532     if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3533     b->a[bdiag[i]] = 1.0/rtmp[i];
3534 
3535     /* U-part */
3536     pv  = b->a + bdiag[i+1] + 1;
3537     pj  = bj + bdiag[i+1] + 1;
3538     nzu = bdiag[i] - bdiag[i+1] - 1;
3539     for (j=0; j<nzu; j++) {
3540       pv[j] = rtmp[pj[j]];
3541     }
3542   }
3543 
3544   ierr = PetscFree(rtmp);CHKERRQ(ierr);
3545   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
3546   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
3547 
3548   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
3549   ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr);
3550   if (row_identity && col_identity) {
3551     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3552   } else {
3553     C->ops->solve = MatSolve_SeqAIJ;
3554   }
3555   C->ops->solveadd          = NULL;
3556   C->ops->solvetranspose    = NULL;
3557   C->ops->solvetransposeadd = NULL;
3558   C->ops->matsolve          = NULL;
3559   C->assembled              = PETSC_TRUE;
3560   C->preallocated           = PETSC_TRUE;
3561 
3562   ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr);
3563   PetscFunctionReturn(0);
3564 }
3565