xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 3923b477fd0dced8a2d147b4fb4519fe3af97d3f)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l>
11 - -pc_factor_drop_tolerance
12 
13   Level: beginner
14 
15    Contributed by: Bas van 't Hof
16 
17 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage, PCFactorSetLevels(), PCFactorSetDropTolerance()
18 
19 M*/
20 
21 /*
22   spbas_memory_requirement:
23     Calculate the number of bytes needed to store tha matrix
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "spbas_memory_requirement"
27 long int spbas_memory_requirement( spbas_matrix matrix)
28 {
29    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
30 						       n_alloc_val, col_idx_type */
31      sizeof(PetscBool)                 + /* block_data */
32      sizeof(PetscScalar**)        + /* values */
33      sizeof(PetscScalar*)         + /* alloc_val */
34      2 * sizeof(int**)            + /* icols, icols0 */
35      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
36      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
37      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
38 
39    /* icol0[*] */
40    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); }
41 
42    /* icols[*][*] */
43    if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
44    else { memreq += matrix.nnz * sizeof(PetscInt); }
45 
46    if (matrix.values) {
47      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
48      /* values[*][*] */
49        if (matrix.block_data) { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
50        else { memreq += matrix.nnz * sizeof(PetscScalar); }
51    }
52    return memreq;
53 }
54 
55 /*
56   spbas_allocate_pattern:
57     allocate the pattern arrays row_nnz, icols and optionally values
58 */
59 #undef __FUNCT__
60 #define __FUNCT__ "spbas_allocate_pattern"
61 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscBool  do_values)
62 {
63    PetscErrorCode ierr;
64    PetscInt       nrows = result->nrows;
65    PetscInt       col_idx_type = result->col_idx_type;
66 
67    PetscFunctionBegin;
68    /* Allocate sparseness pattern */
69    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
70    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
71 
72    /* If offsets are given wrt an array, create array */
73    if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
75    } else  {
76       result->icol0 = PETSC_NULL;
77    }
78 
79    /* If values are given, allocate values array */
80    if (do_values)  {
81       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
82    } else {
83       result->values = PETSC_NULL;
84    }
85    PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 #undef __FUNCT__
97 #define __FUNCT__ "spbas_allocate_data"
98 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
99 {
100    PetscInt       i;
101    PetscInt       nnz   = result->nnz;
102    PetscInt       nrows = result->nrows;
103    PetscInt       r_nnz;
104    PetscErrorCode ierr;
105    PetscBool      do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
106    PetscBool      block_data = result->block_data;
107 
108    PetscFunctionBegin;
109    if (block_data) {
110      /* Allocate the column number array and point to it */
111       result->n_alloc_icol = nnz;
112       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
113       result->icols[0]= result->alloc_icol;
114       for (i=1; i<nrows; i++)  {
115           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116       }
117 
118       /* Allocate the value array and point to it */
119       if (do_values)  {
120          result->n_alloc_val= nnz;
121          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
122          result->values[0]= result->alloc_val;
123          for (i=1; i<nrows; i++) {
124              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
125          }
126       }
127    } else {
128       for (i=0; i<nrows; i++)   {
129          r_nnz = result->row_nnz[i];
130          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
131       }
132       if (do_values) {
133          for (i=0; i<nrows; i++)  {
134             r_nnz = result->row_nnz[i];
135             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]);CHKERRQ(ierr);
136          }
137       }
138    }
139    PetscFunctionReturn(0);
140 }
141 
142 /*
143   spbas_row_order_icol
144      determine if row i1 should come
145        + before row i2 in the sorted rows (return -1),
146        + after (return 1)
147        + is identical (return 0).
148 */
149 #undef __FUNCT__
150 #define __FUNCT__ "spbas_row_order_icol"
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153    PetscInt j;
154    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
155    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
156    PetscInt * icol1 = &icol_in[irow_in[i1]];
157    PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159    if (nnz1<nnz2) {return -1;}
160    if (nnz1>nnz2) {return 1;}
161 
162    if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163       for (j=0; j<nnz1; j++) {
164          if (icol1[j]< icol2[j]) {return -1;}
165          if (icol1[j]> icol2[j]) {return 1;}
166       }
167    } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168       for (j=0; j<nnz1; j++) {
169          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
170          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
171       }
172    } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173       for (j=1; j<nnz1; j++) {
174          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
175          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
176       }
177    }
178    return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 #undef __FUNCT__
187 #define __FUNCT__ "spbas_mergesort_icols"
188 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
189 {
190    PetscErrorCode ierr;
191    PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
192    PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
193    PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
194    PetscInt       *ialloc;     /* Allocated arrays */
195    PetscInt       *iswap;      /* auxiliary pointers for swapping */
196    PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
197    PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
198 
199    PetscFunctionBegin;
200 
201    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
202 
203    ihlp1 = ialloc;
204    ihlp2 = isort;
205 
206    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
207    for (istep=1; istep<nrows; istep*=2) {
208      /*
209        Combine sorted parts
210             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
211        of ihlp2 and vhlp2
212 
213        into one sorted part
214             istart:istart+2*istep-1
215        of ihlp1 and vhlp1
216      */
217       for (istart=0; istart<nrows; istart+=2*istep) {
218 
219 	/* Set counters and bound array part endings */
220          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
221          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
222 
223          /* Merge the two array parts */
224          for (i=istart; i<i2end; i++)  {
225             if (i1<i1end && i2<i2end && spbas_row_order_icol( ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
226                 ihlp1[i] = ihlp2[i1];
227                 i1++;
228             } else if (i2<i2end ) {
229                 ihlp1[i] = ihlp2[i2];
230                 i2++;
231             } else {
232                 ihlp1[i] = ihlp2[i1];
233                 i1++;
234             }
235          }
236       }
237 
238       /* Swap the two array sets */
239       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
240    }
241 
242    /* Copy one more time in case the sorted arrays are the temporary ones */
243    if (ihlp2 != isort) {
244       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
245    }
246    ierr = PetscFree(ialloc);CHKERRQ(ierr);
247    PetscFunctionReturn(0);
248 }
249 
250 
251 
252 /*
253   spbas_compress_pattern:
254      calculate a compressed sparseness pattern for a sparseness pattern
255      given in compressed row storage. The compressed sparseness pattern may
256      require (much) less memory.
257 */
258 #undef __FUNCT__
259 #define __FUNCT__ "spbas_compress_pattern"
260 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
261 {
262    PetscInt         nnz = irow_in[nrows];
263    long int         mem_orig = (nrows + nnz) * sizeof(PetscInt);
264    long int         mem_compressed;
265    PetscErrorCode   ierr;
266    PetscInt         *isort;
267    PetscInt         *icols;
268    PetscInt         row_nnz;
269    PetscInt         *ipoint;
270    PetscBool        *used;
271    PetscInt         ptr;
272    PetscInt         i,j;
273    const PetscBool  no_values = PETSC_FALSE;
274 
275    PetscFunctionBegin;
276    /* Allocate the structure of the new matrix */
277    B->nrows = nrows;
278    B->ncols = ncols;
279    B->nnz   = nnz;
280    B->col_idx_type= col_idx_type;
281    B->block_data = PETSC_TRUE;
282    ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr);
283 
284    /* When using an offset array, set it */
285    if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
286       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
287    }
288 
289    /* Allocate the ordering for the rows */
290    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
291    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
292    ierr = PetscMalloc(nrows*sizeof(PetscBool),&used);CHKERRQ(ierr);
293 
294    /*  Initialize the sorting */
295    ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
296    for (i = 0; i<nrows; i++)  {
297       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
298       isort[i] = i;
299       ipoint[i]= i;
300    }
301 
302    /* Sort the rows so that identical columns will be next to each other */
303    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
304    ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
305 
306    /* Replace identical rows with the first one in the list */
307    for (i=1; i<nrows; i++) {
308       if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
309          ipoint[isort[i]] = ipoint[isort[i-1]];
310       }
311    }
312 
313    /* Collect the rows which are used*/
314    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
315 
316    /* Calculate needed memory */
317    B->n_alloc_icol = 0;
318    for (i=0; i<nrows; i++)  {
319       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
320    }
321    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
322 
323    /* Fill in the diagonal offsets for the rows which store their own data */
324    ptr = 0;
325    for (i=0; i<B->nrows; i++) {
326       if (used[i])  {
327          B->icols[i] = &B->alloc_icol[ptr];
328          icols = &icol_in[irow_in[i]];
329          row_nnz = B->row_nnz[i];
330          if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
331             for (j=0; j<row_nnz; j++) {
332                B->icols[i][j] = icols[j];
333             }
334          } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
335             for (j=0; j<row_nnz; j++) {
336                B->icols[i][j] = icols[j]-i;
337             }
338          } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
339             for (j=0; j<row_nnz; j++) {
340                B->icols[i][j] = icols[j]-icols[0];
341             }
342          }
343          ptr += B->row_nnz[i];
344       }
345    }
346 
347    /* Point to the right places for all data */
348    for (i=0; i<nrows; i++) {
349       B->icols[i] = B->icols[ipoint[i]];
350    }
351    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
352    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
353 
354    ierr=PetscFree(isort);CHKERRQ(ierr);
355    ierr=PetscFree(used);CHKERRQ(ierr);
356    ierr=PetscFree(ipoint);CHKERRQ(ierr);
357 
358    mem_compressed = spbas_memory_requirement( *B );
359    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
360    PetscFunctionReturn(0);
361 }
362 
363 /*
364    spbas_incomplete_cholesky
365        Incomplete Cholesky decomposition
366 */
367 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
368 
369 /*
370   spbas_delete : de-allocate the arrays owned by this matrix
371 */
372 #undef __FUNCT__
373 #define __FUNCT__ "spbas_delete"
374 PetscErrorCode spbas_delete(spbas_matrix matrix)
375 {
376    PetscInt       i;
377    PetscErrorCode ierr;
378 
379    PetscFunctionBegin;
380    if (matrix.block_data) {
381       ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
382       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
383    } else  {
384      for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
385      ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
386      if (matrix.values) {
387          for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
388       }
389    }
390 
391    ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
392    ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
393    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
394    ierr=PetscFree(matrix.values);CHKERRQ(ierr);
395    PetscFunctionReturn(0);
396 }
397 
398 /*
399 spbas_matrix_to_crs:
400    Convert an spbas_matrix to compessed row storage
401 */
402 #undef __FUNCT__
403 #define __FUNCT__ "spbas_matrix_to_crs"
404 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
405 {
406    PetscInt       nrows = matrix_A.nrows;
407    PetscInt       nnz   = matrix_A.nnz;
408    PetscInt       i,j,r_nnz,i0;
409    PetscInt       *irow;
410    PetscInt       *icol;
411    PetscInt       *icol_A;
412    MatScalar      *val;
413    PetscScalar    *val_A;
414    PetscInt       col_idx_type = matrix_A.col_idx_type;
415    PetscBool      do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
416    PetscErrorCode ierr;
417 
418    PetscFunctionBegin;
419    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
420    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
421    *icol_out = icol;
422    *irow_out=irow;
423    if (do_values)  {
424       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
425       *val_out = val; *icol_out = icol; *irow_out=irow;
426    }
427 
428    irow[0]=0;
429    for (i=0; i<nrows; i++) {
430       r_nnz = matrix_A.row_nnz[i];
431       i0 = irow[i];
432       irow[i+1] = i0 + r_nnz;
433       icol_A = matrix_A.icols[i];
434 
435       if (do_values) {
436           val_A = matrix_A.values[i];
437           for (j=0; j<r_nnz; j++) {
438              icol[i0+j] = icol_A[j];
439              val[i0+j]  = val_A[j];
440           }
441       } else {
442           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
443       }
444 
445       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
446          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
447       }
448       else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
449          i0 = matrix_A.icol0[i];
450          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
451       }
452    }
453    PetscFunctionReturn(0);
454 }
455 
456 
457 /*
458     spbas_transpose
459        return the transpose of a matrix
460 */
461 #undef __FUNCT__
462 #define __FUNCT__ "spbas_transpose"
463 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
464 {
465    PetscInt       col_idx_type = in_matrix.col_idx_type;
466    PetscInt       nnz   = in_matrix.nnz;
467    PetscInt       ncols = in_matrix.nrows;
468    PetscInt       nrows = in_matrix.ncols;
469    PetscInt       i,j,k;
470    PetscInt       r_nnz;
471    PetscInt       *irow;
472    PetscInt       icol0 = 0;
473    PetscScalar    * val;
474    PetscErrorCode ierr;
475 
476    PetscFunctionBegin;
477    /* Copy input values */
478    result->nrows        = nrows;
479    result->ncols        = ncols;
480    result->nnz          = nnz;
481    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
482    result->block_data   = PETSC_TRUE;
483 
484    /* Allocate sparseness pattern */
485    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
486 
487    /*  Count the number of nonzeros in each row */
488    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
489 
490    for (i=0; i<ncols; i++) {
491       r_nnz = in_matrix.row_nnz[i];
492       irow  = in_matrix.icols[i];
493       if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
494          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
495       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
496          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
497       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
498          icol0=in_matrix.icol0[i];
499          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
500       }
501    }
502 
503    /* Set the pointers to the data */
504    ierr = spbas_allocate_data(result);CHKERRQ(ierr);
505 
506    /* Reset the number of nonzeros in each row */
507    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
508 
509    /* Fill the data arrays */
510    if (in_matrix.values) {
511       for (i=0; i<ncols; i++) {
512          r_nnz = in_matrix.row_nnz[i];
513          irow  = in_matrix.icols[i];
514          val   = in_matrix.values[i];
515 
516          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
517          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
518          else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
519          for (j=0; j<r_nnz; j++)  {
520             k = icol0 + irow[j];
521             result->icols[k][result->row_nnz[k]]  = i;
522             result->values[k][result->row_nnz[k]] = val[j];
523             result->row_nnz[k]++;
524          }
525       }
526    }  else {
527       for (i=0; i<ncols; i++) {
528          r_nnz = in_matrix.row_nnz[i];
529          irow  = in_matrix.icols[i];
530 
531          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
532          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
533          else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
534 
535          for (j=0; j<r_nnz; j++) {
536             k = icol0 + irow[j];
537             result->icols[k][result->row_nnz[k]]  = i;
538             result->row_nnz[k]++;
539          }
540       }
541    }
542    PetscFunctionReturn(0);
543 }
544 
545 /*
546    spbas_mergesort
547 
548       mergesort for an array of intergers and an array of associated
549       reals
550 
551       on output, icol[0..nnz-1] is increasing;
552                   val[0..nnz-1] has undergone the same permutation as icol
553 
554       NB: val may be PETSC_NULL: in that case, only the integers are sorted
555 
556 */
557 #undef __FUNCT__
558 #define __FUNCT__ "spbas_mergesort"
559 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
560 {
561   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
562   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
563   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
564   PetscInt       *ialloc;     /* Allocated arrays */
565   PetscScalar    *valloc=PETSC_NULL;
566   PetscInt       *iswap;      /* auxiliary pointers for swapping */
567   PetscScalar    *vswap;
568   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
569   PetscScalar    *vhlp1=PETSC_NULL;  /* (arrays under construction) */
570   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
571   PetscScalar    *vhlp2=PETSC_NULL;
572   PetscErrorCode ierr;
573 
574    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
575    ihlp1 = ialloc;
576    ihlp2 = icol;
577 
578    if (val) {
579       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
580       vhlp1 = valloc;
581       vhlp2 = val;
582    }
583 
584 
585    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
586    for (istep=1; istep<nnz; istep*=2) {
587      /*
588        Combine sorted parts
589             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
590        of ihlp2 and vhlp2
591 
592        into one sorted part
593             istart:istart+2*istep-1
594        of ihlp1 and vhlp1
595      */
596       for (istart=0; istart<nnz; istart+=2*istep) {
597 
598 	/* Set counters and bound array part endings */
599          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
600          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
601 
602          /* Merge the two array parts */
603          if (val) {
604             for (i=istart; i<i2end; i++) {
605                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
606                    ihlp1[i] = ihlp2[i1];
607                    vhlp1[i] = vhlp2[i1];
608                    i1++;
609                } else if (i2<i2end ){
610                    ihlp1[i] = ihlp2[i2];
611                    vhlp1[i] = vhlp2[i2];
612                    i2++;
613                } else {
614                    ihlp1[i] = ihlp2[i1];
615                    vhlp1[i] = vhlp2[i1];
616                    i1++;
617                }
618             }
619          } else {
620             for (i=istart; i<i2end; i++) {
621                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
622                    ihlp1[i] = ihlp2[i1];
623                    i1++;
624                } else if (i2<i2end ) {
625                    ihlp1[i] = ihlp2[i2];
626                    i2++;
627                } else {
628                    ihlp1[i] = ihlp2[i1];
629                    i1++;
630                }
631             }
632          }
633       }
634 
635       /* Swap the two array sets */
636       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
637       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
638    }
639 
640    /* Copy one more time in case the sorted arrays are the temporary ones */
641    if (ihlp2 != icol) {
642       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
643       if (val) {
644          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
645       }
646    }
647 
648    ierr = PetscFree(ialloc);CHKERRQ(ierr);
649    if (val){ierr = PetscFree(valloc);CHKERRQ(ierr);}
650    PetscFunctionReturn(0);
651 }
652 
653 /*
654   spbas_apply_reordering_rows:
655     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
656 */
657 #undef __FUNCT__
658 #define __FUNCT__ "spbas_apply_reordering_rows"
659 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
660 {
661    PetscInt       i,j,ip;
662    PetscInt       nrows=matrix_A->nrows;
663    PetscInt       * row_nnz;
664    PetscInt       **icols;
665    PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
666    PetscScalar    **vals=PETSC_NULL;
667    PetscErrorCode ierr;
668 
669    PetscFunctionBegin;
670    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
671 
672    if (do_values)  {
673      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
674    }
675    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
676    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
677 
678    for (i=0; i<nrows;i++) {
679       ip = permutation[i];
680       if (do_values) {vals[i]    = matrix_A->values[ip];}
681       icols[i]   = matrix_A->icols[ip];
682       row_nnz[i] = matrix_A->row_nnz[ip];
683       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
684    }
685 
686    if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
687    ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
688    ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
689 
690    if (do_values) { matrix_A->values  = vals; }
691    matrix_A->icols   = icols;
692    matrix_A->row_nnz = row_nnz;
693 
694    PetscFunctionReturn(0);
695 }
696 
697 
698 /*
699   spbas_apply_reordering_cols:
700     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
701 */
702 #undef __FUNCT__
703 #define __FUNCT__ "spbas_apply_reordering_cols"
704 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
705 {
706    PetscInt    i,j;
707    PetscInt    nrows=matrix_A->nrows;
708    PetscInt    row_nnz;
709    PetscInt    *icols;
710    PetscBool   do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
711    PetscScalar *vals=PETSC_NULL;
712    PetscErrorCode ierr;
713 
714    PetscFunctionBegin;
715    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
716 
717    for (i=0; i<nrows;i++) {
718       icols   = matrix_A->icols[i];
719       row_nnz = matrix_A->row_nnz[i];
720       if (do_values) { vals = matrix_A->values[i]; }
721 
722       for (j=0; j<row_nnz; j++)  {
723          icols[j] = permutation[i+icols[j]]-i;
724       }
725       ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
726    }
727    PetscFunctionReturn(0);
728 }
729 
730 /*
731   spbas_apply_reordering:
732     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "spbas_apply_reordering"
736 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
737 {
738    PetscErrorCode ierr;
739    PetscFunctionBegin;
740    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr);
741    ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr);
742    PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "spbas_pattern_only"
747 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
748 {
749    spbas_matrix   retval;
750    PetscInt       i, j, i0, r_nnz;
751    PetscErrorCode ierr;
752 
753    PetscFunctionBegin;
754 
755    /* Copy input values */
756    retval.nrows = nrows;
757    retval.ncols = ncols;
758    retval.nnz   = ai[nrows];
759 
760    retval.block_data   = PETSC_TRUE;
761    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
762 
763    /* Allocate output matrix */
764    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
765    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
766    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
767    /* Copy the structure */
768    for (i = 0; i<retval.nrows; i++)  {
769       i0 = ai[i];
770       r_nnz = ai[i+1]-i0;
771 
772       for (j=0; j<r_nnz; j++) {
773          retval.icols[i][j]  = aj[i0+j]-i;
774       }
775    }
776    *result = retval;
777    PetscFunctionReturn(0);
778 }
779 
780 
781 /*
782    spbas_mark_row_power:
783       Mark the columns in row 'row' which are nonzero in
784           matrix^2log(marker).
785 */
786 #undef __FUNCT__
787 #define __FUNCT__ "spbas_mark_row_power"
788 PetscErrorCode spbas_mark_row_power(
789 				    PetscInt *iwork,             /* marker-vector */
790 				    PetscInt row,                /* row for which the columns are marked */
791 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
792 				    PetscInt marker,             /* marker-value: 2^power */
793 				    PetscInt minmrk,            /* lower bound for marked points */
794 				    PetscInt maxmrk)            /* upper bound for marked points */
795 {
796    PetscErrorCode ierr;
797    PetscInt       i,j, nnz;
798 
799    PetscFunctionBegin;
800    nnz = in_matrix->row_nnz[row];
801 
802    /* For higher powers, call this function recursively */
803    if (marker>1) {
804       for (i=0; i<nnz;i++) {
805          j = row + in_matrix->icols[row][i];
806          if (minmrk<=j && j<maxmrk && iwork[j] < marker ) {
807             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
808             iwork[j] |= marker;
809          }
810       }
811    } else  {
812      /*  Mark the columns reached */
813       for (i=0; i<nnz;i++)  {
814          j = row + in_matrix->icols[row][i];
815          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
816       }
817    }
818    PetscFunctionReturn(0);
819 }
820 
821 
822 /*
823    spbas_power
824       Calculate sparseness patterns for incomplete Cholesky decompositions
825       of a given order: (almost) all nonzeros of the matrix^(order+1) which
826       are inside the band width are found and stored in the output sparseness
827       pattern.
828 */
829 #undef __FUNCT__
830 #define __FUNCT__ "spbas_power"
831 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
832 {
833    spbas_matrix   retval;
834    PetscInt       nrows = in_matrix.nrows;
835    PetscInt       ncols = in_matrix.ncols;
836    PetscInt       i, j, kend;
837    PetscInt       nnz, inz;
838    PetscInt       *iwork;
839    PetscInt       marker;
840    PetscInt       maxmrk=0;
841    PetscErrorCode ierr;
842 
843    PetscFunctionBegin;
844    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
845    if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
846    if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
847    if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
848 
849    /* Copy input values*/
850    retval.nrows = ncols;
851    retval.ncols = nrows;
852    retval.nnz   = 0;
853    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
854    retval.block_data = PETSC_FALSE;
855 
856    /* Allocate sparseness pattern */
857    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
858 
859    /* Allocate marker array */
860    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
861 
862    /* Erase the pattern for this row */
863    ierr = PetscMemzero( (void *) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
864 
865    /* Calculate marker values */
866    marker = 1; for (i=1; i<power; i++) {marker*=2;}
867 
868    for (i=0; i<nrows; i++)  {
869      /* Calculate the pattern for each row */
870 
871       nnz    = in_matrix.row_nnz[i];
872       kend   = i+in_matrix.icols[i][nnz-1];
873       if (maxmrk<=kend) {maxmrk=kend+1;}
874       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
875 
876       /* Count the columns*/
877       nnz = 0;
878       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
879 
880       /* Allocate the column indices */
881       retval.row_nnz[i] = nnz;
882       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
883 
884       /* Administrate the column indices */
885       inz = 0;
886       for (j=i; j<maxmrk; j++) {
887          if (iwork[j])  {
888             retval.icols[i][inz] = j-i;
889             inz++;
890             iwork[j]=0;
891          }
892       }
893       retval.nnz += nnz;
894    };
895    ierr = PetscFree(iwork);CHKERRQ(ierr);
896    *result = retval;
897    PetscFunctionReturn(0);
898 }
899 
900 
901 
902 /*
903    spbas_keep_upper:
904       remove the lower part of the matrix: keep the upper part
905 */
906 #undef __FUNCT__
907 #define __FUNCT__ "spbas_keep_upper"
908 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
909 {
910    PetscInt i, j;
911    PetscInt jstart;
912 
913    PetscFunctionBegin;
914    if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
915    for (i=0; i<inout_matrix->nrows; i++)  {
916        for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){}
917        if (jstart>0) {
918           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
919              inout_matrix->icols[i][j] =  inout_matrix->icols[i][j+jstart];
920           }
921 
922           if (inout_matrix->values) {
923              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
924                 inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
925              }
926           }
927 
928           inout_matrix->row_nnz[i] -= jstart;
929 
930           inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
931 
932           if (inout_matrix->values) {
933              inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
934           }
935           inout_matrix->nnz -= jstart;
936        }
937    }
938    PetscFunctionReturn(0);
939 }
940 
941