xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 362febeeeb69b91ebadcb4b2dc0a22cb6dfc4097)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: PCFactorSetMatSolverType(), MatSolverType, PCFactorSetLevels(), PCFactorSetDropTolerance()
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix)
31 {
32   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
33                     sizeof(PetscBool)               + /* block_data */
34                     sizeof(PetscScalar**)           + /* values */
35                     sizeof(PetscScalar*)            + /* alloc_val */
36                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
37                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
38                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
39                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
40 
41   /* icol0[*] */
42   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
43 
44   /* icols[*][*] */
45   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
46   else memreq += matrix.nnz * sizeof(PetscInt);
47 
48   if (matrix.values) {
49     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
50     /* values[*][*] */
51     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
52     else memreq += matrix.nnz * sizeof(PetscScalar);
53   }
54   return memreq;
55 }
56 
57 /*
58   spbas_allocate_pattern:
59     allocate the pattern arrays row_nnz, icols and optionally values
60 */
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
62 {
63   PetscErrorCode ierr;
64   PetscInt       nrows        = result->nrows;
65   PetscInt       col_idx_type = result->col_idx_type;
66 
67   PetscFunctionBegin;
68   /* Allocate sparseness pattern */
69   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
70   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
71 
72   /* If offsets are given wrt an array, create array */
73   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
75   } else  {
76     result->icol0 = NULL;
77   }
78 
79   /* If values are given, allocate values array */
80   if (do_values)  {
81     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
82   } else {
83     result->values = NULL;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
97 {
98   PetscInt       i;
99   PetscInt       nnz   = result->nnz;
100   PetscInt       nrows = result->nrows;
101   PetscInt       r_nnz;
102   PetscErrorCode ierr;
103   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
104   PetscBool      block_data = result->block_data;
105 
106   PetscFunctionBegin;
107   if (block_data) {
108     /* Allocate the column number array and point to it */
109     result->n_alloc_icol = nnz;
110 
111     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
112 
113     result->icols[0] = result->alloc_icol;
114     for (i=1; i<nrows; i++)  {
115       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116     }
117 
118     /* Allocate the value array and point to it */
119     if (do_values) {
120       result->n_alloc_val = nnz;
121 
122       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
123 
124       result->values[0] = result->alloc_val;
125       for (i=1; i<nrows; i++) {
126         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
127       }
128     }
129   } else {
130     for (i=0; i<nrows; i++)   {
131       r_nnz = result->row_nnz[i];
132       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
133     }
134     if (do_values) {
135       for (i=0; i<nrows; i++)  {
136         r_nnz = result->row_nnz[i];
137         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
138       }
139     }
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145   spbas_row_order_icol
146      determine if row i1 should come
147        + before row i2 in the sorted rows (return -1),
148        + after (return 1)
149        + is identical (return 0).
150 */
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153   PetscInt j;
154   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
155   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
156   PetscInt * icol1 = &icol_in[irow_in[i1]];
157   PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159   if (nnz1<nnz2) return -1;
160   if (nnz1>nnz2) return 1;
161 
162   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163     for (j=0; j<nnz1; j++) {
164       if (icol1[j]< icol2[j]) return -1;
165       if (icol1[j]> icol2[j]) return 1;
166     }
167   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168     for (j=0; j<nnz1; j++) {
169       if (icol1[j]-i1< icol2[j]-i2) return -1;
170       if (icol1[j]-i1> icol2[j]-i2) return 1;
171     }
172   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173     for (j=1; j<nnz1; j++) {
174       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
175       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
176     }
177   }
178   return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
187 {
188   PetscErrorCode ierr;
189   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
190   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
191   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
192   PetscInt       *ialloc;     /* Allocated arrays */
193   PetscInt       *iswap;      /* auxiliary pointers for swapping */
194   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
195   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
196 
197   PetscFunctionBegin;
198   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
199 
200   ihlp1 = ialloc;
201   ihlp2 = isort;
202 
203   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
204   for (istep=1; istep<nrows; istep*=2) {
205     /*
206       Combine sorted parts
207           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
208       of ihlp2 and vhlp2
209 
210       into one sorted part
211           istart:istart+2*istep-1
212       of ihlp1 and vhlp1
213     */
214     for (istart=0; istart<nrows; istart+=2*istep) {
215       /* Set counters and bound array part endings */
216       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
217       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
218 
219       /* Merge the two array parts */
220       for (i=istart; i<i2end; i++)  {
221         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
222           ihlp1[i] = ihlp2[i1];
223           i1++;
224         } else if (i2<i2end) {
225           ihlp1[i] = ihlp2[i2];
226           i2++;
227         } else {
228           ihlp1[i] = ihlp2[i1];
229           i1++;
230         }
231       }
232     }
233 
234     /* Swap the two array sets */
235     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
236   }
237 
238   /* Copy one more time in case the sorted arrays are the temporary ones */
239   if (ihlp2 != isort) {
240     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
241   }
242   ierr = PetscFree(ialloc);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 /*
247   spbas_compress_pattern:
248      calculate a compressed sparseness pattern for a sparseness pattern
249      given in compressed row storage. The compressed sparseness pattern may
250      require (much) less memory.
251 */
252 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
253 {
254   PetscInt        nnz      = irow_in[nrows];
255   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
256   size_t          mem_compressed;
257   PetscErrorCode  ierr;
258   PetscInt        *isort;
259   PetscInt        *icols;
260   PetscInt        row_nnz;
261   PetscInt        *ipoint;
262   PetscBool       *used;
263   PetscInt        ptr;
264   PetscInt        i,j;
265   const PetscBool no_values = PETSC_FALSE;
266 
267   PetscFunctionBegin;
268   /* Allocate the structure of the new matrix */
269   B->nrows        = nrows;
270   B->ncols        = ncols;
271   B->nnz          = nnz;
272   B->col_idx_type = col_idx_type;
273   B->block_data   = PETSC_TRUE;
274 
275   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
276 
277   /* When using an offset array, set it */
278   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
279     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
280   }
281 
282   /* Allocate the ordering for the rows */
283   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
284   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
285   ierr = PetscCalloc1(nrows,&used);CHKERRQ(ierr);
286 
287   for (i = 0; i<nrows; i++)  {
288     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
289     isort[i]      = i;
290     ipoint[i]     = i;
291   }
292 
293   /* Sort the rows so that identical columns will be next to each other */
294   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
295   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
296 
297   /* Replace identical rows with the first one in the list */
298   for (i=1; i<nrows; i++) {
299     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
300       ipoint[isort[i]] = ipoint[isort[i-1]];
301     }
302   }
303 
304   /* Collect the rows which are used*/
305   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
306 
307   /* Calculate needed memory */
308   B->n_alloc_icol = 0;
309   for (i=0; i<nrows; i++)  {
310     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
311   }
312   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
313 
314   /* Fill in the diagonal offsets for the rows which store their own data */
315   ptr = 0;
316   for (i=0; i<B->nrows; i++) {
317     if (used[i]) {
318       B->icols[i] = &B->alloc_icol[ptr];
319       icols = &icol_in[irow_in[i]];
320       row_nnz = B->row_nnz[i];
321       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
322         for (j=0; j<row_nnz; j++) {
323           B->icols[i][j] = icols[j];
324         }
325       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
326         for (j=0; j<row_nnz; j++) {
327           B->icols[i][j] = icols[j]-i;
328         }
329       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
330         for (j=0; j<row_nnz; j++) {
331           B->icols[i][j] = icols[j]-icols[0];
332         }
333       }
334       ptr += B->row_nnz[i];
335     }
336   }
337 
338   /* Point to the right places for all data */
339   for (i=0; i<nrows; i++) {
340     B->icols[i] = B->icols[ipoint[i]];
341   }
342   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
343   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
344 
345   ierr=PetscFree(isort);CHKERRQ(ierr);
346   ierr=PetscFree(used);CHKERRQ(ierr);
347   ierr=PetscFree(ipoint);CHKERRQ(ierr);
348 
349   mem_compressed = spbas_memory_requirement(*B);
350   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
351   PetscFunctionReturn(0);
352 }
353 
354 /*
355    spbas_incomplete_cholesky
356        Incomplete Cholesky decomposition
357 */
358 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
359 
360 /*
361   spbas_delete : de-allocate the arrays owned by this matrix
362 */
363 PetscErrorCode spbas_delete(spbas_matrix matrix)
364 {
365   PetscInt       i;
366   PetscErrorCode ierr;
367 
368   PetscFunctionBegin;
369   if (matrix.block_data) {
370     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
371     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
372   } else {
373     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
374     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
375     if (matrix.values) {
376       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
377     }
378   }
379 
380   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
381   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
382   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
383   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 /*
388 spbas_matrix_to_crs:
389    Convert an spbas_matrix to compessed row storage
390 */
391 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
392 {
393   PetscInt       nrows = matrix_A.nrows;
394   PetscInt       nnz   = matrix_A.nnz;
395   PetscInt       i,j,r_nnz,i0;
396   PetscInt       *irow;
397   PetscInt       *icol;
398   PetscInt       *icol_A;
399   MatScalar      *val;
400   PetscScalar    *val_A;
401   PetscInt       col_idx_type = matrix_A.col_idx_type;
402   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
403   PetscErrorCode ierr;
404 
405   PetscFunctionBegin;
406   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
407   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
408   *icol_out = icol;
409   *irow_out = irow;
410   if (do_values) {
411     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
412     *val_out = val; *icol_out = icol; *irow_out=irow;
413   }
414 
415   irow[0]=0;
416   for (i=0; i<nrows; i++) {
417     r_nnz     = matrix_A.row_nnz[i];
418     i0        = irow[i];
419     irow[i+1] = i0 + r_nnz;
420     icol_A    = matrix_A.icols[i];
421 
422     if (do_values) {
423       val_A = matrix_A.values[i];
424       for (j=0; j<r_nnz; j++) {
425         icol[i0+j] = icol_A[j];
426         val[i0+j]  = val_A[j];
427       }
428     } else {
429       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
430     }
431 
432     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
433       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
434     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
435       i0 = matrix_A.icol0[i];
436       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
437     }
438   }
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     spbas_transpose
444        return the transpose of a matrix
445 */
446 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
447 {
448   PetscInt       col_idx_type = in_matrix.col_idx_type;
449   PetscInt       nnz          = in_matrix.nnz;
450   PetscInt       ncols        = in_matrix.nrows;
451   PetscInt       nrows        = in_matrix.ncols;
452   PetscInt       i,j,k;
453   PetscInt       r_nnz;
454   PetscInt       *irow;
455   PetscInt       icol0 = 0;
456   PetscScalar    * val;
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   /* Copy input values */
461   result->nrows        = nrows;
462   result->ncols        = ncols;
463   result->nnz          = nnz;
464   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
465   result->block_data   = PETSC_TRUE;
466 
467   /* Allocate sparseness pattern */
468   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
469 
470   /*  Count the number of nonzeros in each row */
471   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
472 
473   for (i=0; i<ncols; i++) {
474     r_nnz = in_matrix.row_nnz[i];
475     irow  = in_matrix.icols[i];
476     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
477       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
478     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
479       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
480     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
481       icol0=in_matrix.icol0[i];
482       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
483     }
484   }
485 
486   /* Set the pointers to the data */
487   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
488 
489   /* Reset the number of nonzeros in each row */
490   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
491 
492   /* Fill the data arrays */
493   if (in_matrix.values) {
494     for (i=0; i<ncols; i++) {
495       r_nnz = in_matrix.row_nnz[i];
496       irow  = in_matrix.icols[i];
497       val   = in_matrix.values[i];
498 
499       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
500       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
501       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
502       for (j=0; j<r_nnz; j++)  {
503         k = icol0 + irow[j];
504         result->icols[k][result->row_nnz[k]]  = i;
505         result->values[k][result->row_nnz[k]] = val[j];
506         result->row_nnz[k]++;
507       }
508     }
509   } else {
510     for (i=0; i<ncols; i++) {
511       r_nnz = in_matrix.row_nnz[i];
512       irow  = in_matrix.icols[i];
513 
514       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
515       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
516       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
517 
518       for (j=0; j<r_nnz; j++) {
519         k = icol0 + irow[j];
520         result->icols[k][result->row_nnz[k]] = i;
521         result->row_nnz[k]++;
522       }
523     }
524   }
525   PetscFunctionReturn(0);
526 }
527 
528 /*
529    spbas_mergesort
530 
531       mergesort for an array of integers and an array of associated
532       reals
533 
534       on output, icol[0..nnz-1] is increasing;
535                   val[0..nnz-1] has undergone the same permutation as icol
536 
537       NB: val may be NULL: in that case, only the integers are sorted
538 
539 */
540 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
541 {
542   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
543   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
544   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
545   PetscInt       *ialloc;     /* Allocated arrays */
546   PetscScalar    *valloc=NULL;
547   PetscInt       *iswap;      /* auxiliary pointers for swapping */
548   PetscScalar    *vswap;
549   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
550   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
551   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
552   PetscScalar    *vhlp2=NULL;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
557   ihlp1 = ialloc;
558   ihlp2 = icol;
559 
560   if (val) {
561     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
562     vhlp1 = valloc;
563     vhlp2 = val;
564   }
565 
566   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
567   for (istep=1; istep<nnz; istep*=2) {
568     /*
569       Combine sorted parts
570           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
571       of ihlp2 and vhlp2
572 
573       into one sorted part
574           istart:istart+2*istep-1
575       of ihlp1 and vhlp1
576     */
577     for (istart=0; istart<nnz; istart+=2*istep) {
578       /* Set counters and bound array part endings */
579       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
580       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
581 
582       /* Merge the two array parts */
583       if (val) {
584         for (i=istart; i<i2end; i++) {
585           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
586             ihlp1[i] = ihlp2[i1];
587             vhlp1[i] = vhlp2[i1];
588             i1++;
589           } else if (i2<i2end) {
590             ihlp1[i] = ihlp2[i2];
591             vhlp1[i] = vhlp2[i2];
592             i2++;
593           } else {
594             ihlp1[i] = ihlp2[i1];
595             vhlp1[i] = vhlp2[i1];
596             i1++;
597           }
598         }
599       } else {
600         for (i=istart; i<i2end; i++) {
601           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
602             ihlp1[i] = ihlp2[i1];
603             i1++;
604           } else if (i2<i2end) {
605             ihlp1[i] = ihlp2[i2];
606             i2++;
607           } else {
608             ihlp1[i] = ihlp2[i1];
609             i1++;
610           }
611         }
612       }
613     }
614 
615     /* Swap the two array sets */
616     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
617     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
618   }
619 
620   /* Copy one more time in case the sorted arrays are the temporary ones */
621   if (ihlp2 != icol) {
622     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
623     if (val) {
624       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
625     }
626   }
627 
628   ierr = PetscFree(ialloc);CHKERRQ(ierr);
629   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
630   PetscFunctionReturn(0);
631 }
632 
633 /*
634   spbas_apply_reordering_rows:
635     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
636 */
637 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
638 {
639   PetscInt       i,j,ip;
640   PetscInt       nrows=matrix_A->nrows;
641   PetscInt       * row_nnz;
642   PetscInt       **icols;
643   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
644   PetscScalar    **vals    = NULL;
645   PetscErrorCode ierr;
646 
647   PetscFunctionBegin;
648   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
649 
650   if (do_values) {
651     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
652   }
653   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
654   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
655 
656   for (i=0; i<nrows; i++) {
657     ip = permutation[i];
658     if (do_values) vals[i] = matrix_A->values[ip];
659     icols[i]   = matrix_A->icols[ip];
660     row_nnz[i] = matrix_A->row_nnz[ip];
661     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
662   }
663 
664   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
665   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
666   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
667 
668   if (do_values) matrix_A->values = vals;
669   matrix_A->icols   = icols;
670   matrix_A->row_nnz = row_nnz;
671   PetscFunctionReturn(0);
672 }
673 
674 /*
675   spbas_apply_reordering_cols:
676     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
677 */
678 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
679 {
680   PetscInt       i,j;
681   PetscInt       nrows=matrix_A->nrows;
682   PetscInt       row_nnz;
683   PetscInt       *icols;
684   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
685   PetscScalar    *vals     = NULL;
686   PetscErrorCode ierr;
687 
688   PetscFunctionBegin;
689   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
690 
691   for (i=0; i<nrows; i++) {
692     icols   = matrix_A->icols[i];
693     row_nnz = matrix_A->row_nnz[i];
694     if (do_values) vals = matrix_A->values[i];
695 
696     for (j=0; j<row_nnz; j++) {
697       icols[j] = permutation[i+icols[j]]-i;
698     }
699     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
700   }
701   PetscFunctionReturn(0);
702 }
703 
704 /*
705   spbas_apply_reordering:
706     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
707 */
708 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
709 {
710   PetscErrorCode ierr;
711 
712   PetscFunctionBegin;
713   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
714   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
719 {
720   spbas_matrix   retval;
721   PetscInt       i, j, i0, r_nnz;
722   PetscErrorCode ierr;
723 
724   PetscFunctionBegin;
725   /* Copy input values */
726   retval.nrows = nrows;
727   retval.ncols = ncols;
728   retval.nnz   = ai[nrows];
729 
730   retval.block_data   = PETSC_TRUE;
731   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
732 
733   /* Allocate output matrix */
734   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
735   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
736   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
737   /* Copy the structure */
738   for (i = 0; i<retval.nrows; i++)  {
739     i0    = ai[i];
740     r_nnz = ai[i+1]-i0;
741 
742     for (j=0; j<r_nnz; j++) {
743       retval.icols[i][j] = aj[i0+j]-i;
744     }
745   }
746   *result = retval;
747   PetscFunctionReturn(0);
748 }
749 
750 /*
751    spbas_mark_row_power:
752       Mark the columns in row 'row' which are nonzero in
753           matrix^2log(marker).
754 */
755 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
756                                     PetscInt row,                /* row for which the columns are marked */
757                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
758                                     PetscInt marker,             /* marker-value: 2^power */
759                                     PetscInt minmrk,             /* lower bound for marked points */
760                                     PetscInt maxmrk)             /* upper bound for marked points */
761 {
762   PetscErrorCode ierr;
763   PetscInt       i,j, nnz;
764 
765   PetscFunctionBegin;
766   nnz = in_matrix->row_nnz[row];
767 
768   /* For higher powers, call this function recursively */
769   if (marker>1) {
770     for (i=0; i<nnz; i++) {
771       j = row + in_matrix->icols[row][i];
772       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
773         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
774         iwork[j] |= marker;
775       }
776     }
777   } else {
778     /*  Mark the columns reached */
779     for (i=0; i<nnz; i++)  {
780       j = row + in_matrix->icols[row][i];
781       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
782     }
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 /*
788    spbas_power
789       Calculate sparseness patterns for incomplete Cholesky decompositions
790       of a given order: (almost) all nonzeros of the matrix^(order+1) which
791       are inside the band width are found and stored in the output sparseness
792       pattern.
793 */
794 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
795 {
796   spbas_matrix   retval;
797   PetscInt       nrows = in_matrix.nrows;
798   PetscInt       ncols = in_matrix.ncols;
799   PetscInt       i, j, kend;
800   PetscInt       nnz, inz;
801   PetscInt       *iwork;
802   PetscInt       marker;
803   PetscInt       maxmrk=0;
804   PetscErrorCode ierr;
805 
806   PetscFunctionBegin;
807   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
808   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
809   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
810   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
811 
812   /* Copy input values*/
813   retval.nrows        = ncols;
814   retval.ncols        = nrows;
815   retval.nnz          = 0;
816   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
817   retval.block_data   = PETSC_FALSE;
818 
819   /* Allocate sparseness pattern */
820   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
821 
822   /* Allocate marker array: note sure the max needed so use the max of the two */
823   ierr = PetscCalloc1(PetscMax(ncols,nrows), &iwork);CHKERRQ(ierr);
824 
825   /* Calculate marker values */
826   marker = 1; for (i=1; i<power; i++) marker*=2;
827 
828   for (i=0; i<nrows; i++)  {
829     /* Calculate the pattern for each row */
830 
831     nnz  = in_matrix.row_nnz[i];
832     kend = i+in_matrix.icols[i][nnz-1];
833     if (maxmrk<=kend) maxmrk=kend+1;
834     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
835 
836     /* Count the columns*/
837     nnz = 0;
838     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
839 
840     /* Allocate the column indices */
841     retval.row_nnz[i] = nnz;
842     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
843 
844     /* Administrate the column indices */
845     inz = 0;
846     for (j=i; j<maxmrk; j++) {
847       if (iwork[j]) {
848         retval.icols[i][inz] = j-i;
849         inz++;
850         iwork[j] = 0;
851       }
852     }
853     retval.nnz += nnz;
854   };
855   ierr    = PetscFree(iwork);CHKERRQ(ierr);
856   *result = retval;
857   PetscFunctionReturn(0);
858 }
859 
860 /*
861    spbas_keep_upper:
862       remove the lower part of the matrix: keep the upper part
863 */
864 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
865 {
866   PetscInt i, j;
867   PetscInt jstart;
868 
869   PetscFunctionBegin;
870   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
871   for (i=0; i<inout_matrix->nrows; i++)  {
872     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
873     if (jstart>0) {
874       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
875         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
876       }
877 
878       if (inout_matrix->values) {
879         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
880           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
881         }
882       }
883 
884       inout_matrix->row_nnz[i] -= jstart;
885 
886       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
887 
888       if (inout_matrix->values) {
889         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
890       }
891       inout_matrix->nnz -= jstart;
892     }
893   }
894   PetscFunctionReturn(0);
895 }
896