xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 4e1ff37a6225575e794f864ad62814fd735489d8)
1 #include "petsc.h"
2 #include "../src/mat/impls/aij/seq/aij.h"
3 #include "spbas.h"
4 
5 
6 /*
7   spbas_memory_requirement:
8     Calculate the number of bytes needed to store tha matrix
9 */
10 #undef __FUNCT__
11 #define __FUNCT__ "spbas_memory_requirement"
12 long int spbas_memory_requirement( spbas_matrix matrix)
13 {
14    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
15 						       n_alloc_val, col_idx_type */
16      sizeof(PetscTruth)                 + /* block_data */
17      sizeof(PetscScalar**)        + /* values */
18      sizeof(PetscScalar*)         + /* alloc_val */
19      2 * sizeof(int**)            + /* icols, icols0 */
20      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
21      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
22      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
23 
24    /* icol0[*] */
25    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); }
26 
27    /* icols[*][*] */
28    if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
29    else { memreq += matrix.nnz * sizeof(PetscInt); }
30 
31    if (matrix.values) {
32      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
33      /* values[*][*] */
34        if (matrix.block_data) { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
35        else { memreq += matrix.nnz * sizeof(PetscScalar); }
36    }
37    return memreq;
38 }
39 
40 /*
41   spbas_allocate_pattern:
42     allocate the pattern arrays row_nnz, icols and optionally values
43 */
44 #undef __FUNCT__
45 #define __FUNCT__ "spbas_allocate_pattern"
46 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values)
47 {
48    PetscErrorCode ierr;
49    PetscInt       nrows = result->nrows;
50    PetscInt       col_idx_type = result->col_idx_type;
51 
52    PetscFunctionBegin;
53    /* Allocate sparseness pattern */
54    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
55    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
56 
57    /* If offsets are given wrt an array, create array */
58    if (col_idx_type == SPBAS_OFFSET_ARRAY) {
59       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
60    } else  {
61       result->icol0 = PETSC_NULL;
62    }
63 
64    /* If values are given, allocate values array */
65    if (do_values)  {
66       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
67    } else {
68       result->values = PETSC_NULL;
69    }
70    PetscFunctionReturn(0);
71 }
72 
73 /*
74 spbas_allocate_data:
75    in case of block_data:
76        Allocate the data arrays alloc_icol and optionally alloc_val,
77        set appropriate pointers from icols and values;
78    in case of !block_data:
79        Allocate the arrays icols[i] and optionally values[i]
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "spbas_allocate_data"
83 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
84 {
85    PetscInt       i;
86    PetscInt       nnz   = result->nnz;
87    PetscInt       nrows = result->nrows;
88    PetscInt       r_nnz;
89    PetscErrorCode ierr;
90    PetscTruth     do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
91    PetscTruth     block_data = result->block_data;
92 
93    PetscFunctionBegin;
94    if (block_data) {
95      /* Allocate the column number array and point to it */
96       result->n_alloc_icol = nnz;
97       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
98       result->icols[0]= result->alloc_icol;
99       for (i=1; i<nrows; i++)  {
100           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
101       }
102 
103       /* Allocate the value array and point to it */
104       if (do_values)  {
105          result->n_alloc_val= nnz;
106          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
107          result->values[0]= result->alloc_val;
108          for (i=1; i<nrows; i++) {
109              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
110          }
111       }
112    } else {
113       for (i=0; i<nrows; i++)   {
114          r_nnz = result->row_nnz[i];
115          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
116       }
117       if (do_values) {
118          for (i=0; i<nrows; i++)  {
119             r_nnz = result->row_nnz[i];
120             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]);CHKERRQ(ierr);
121          }
122       }
123    }
124    PetscFunctionReturn(0);
125 }
126 
127 /*
128   spbas_row_order_icol
129      determine if row i1 should come
130        + before row i2 in the sorted rows (return -1),
131        + after (return 1)
132        + is identical (return 0).
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "spbas_row_order_icol"
136 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
137 {
138    PetscInt j;
139    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
140    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
141    PetscInt * icol1 = &icol_in[irow_in[i1]];
142    PetscInt * icol2 = &icol_in[irow_in[i2]];
143 
144    if (nnz1<nnz2) {return -1;}
145    if (nnz1>nnz2) {return 1;}
146 
147    if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
148       for (j=0; j<nnz1; j++) {
149          if (icol1[j]< icol2[j]) {return -1;}
150          if (icol1[j]> icol2[j]) {return 1;}
151       }
152    } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
153       for (j=0; j<nnz1; j++) {
154          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
155          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
156       }
157    } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
158       for (j=1; j<nnz1; j++) {
159          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
160          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
161       }
162    }
163    return 0;
164 }
165 
166 /*
167   spbas_mergesort_icols:
168     return a sorting of the rows in which identical sparseness patterns are
169     next to each other
170 */
171 #undef __FUNCT__
172 #define __FUNCT__ "spbas_mergesort_icols"
173 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
174 {
175    PetscErrorCode ierr;
176    PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
177    PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
178    PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
179    PetscInt       *ialloc;     /* Allocated arrays */
180    PetscInt       *iswap;      /* auxiliary pointers for swapping */
181    PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
182    PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
183 
184    PetscFunctionBegin;
185 
186    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
187 
188    ihlp1 = ialloc;
189    ihlp2 = isort;
190 
191    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
192    for (istep=1; istep<nrows; istep*=2) {
193      /*
194        Combine sorted parts
195             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
196        of ihlp2 and vhlp2
197 
198        into one sorted part
199             istart:istart+2*istep-1
200        of ihlp1 and vhlp1
201      */
202       for (istart=0; istart<nrows; istart+=2*istep) {
203 
204 	/* Set counters and bound array part endings */
205          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
206          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
207 
208          /* Merge the two array parts */
209          for (i=istart; i<i2end; i++)  {
210             if (i1<i1end && i2<i2end && spbas_row_order_icol( ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
211                 ihlp1[i] = ihlp2[i1];
212                 i1++;
213             } else if (i2<i2end ) {
214                 ihlp1[i] = ihlp2[i2];
215                 i2++;
216             } else {
217                 ihlp1[i] = ihlp2[i1];
218                 i1++;
219             }
220          }
221       }
222 
223       /* Swap the two array sets */
224       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
225    }
226 
227    /* Copy one more time in case the sorted arrays are the temporary ones */
228    if (ihlp2 != isort) {
229       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
230    }
231    ierr = PetscFree(ialloc);CHKERRQ(ierr);
232    PetscFunctionReturn(0);
233 }
234 
235 
236 
237 /*
238   spbas_compress_pattern:
239      calculate a compressed sparseness pattern for a sparseness pattern
240      given in compressed row storage. The compressed sparseness pattern may
241      require (much) less memory.
242 */
243 #undef __FUNCT__
244 #define __FUNCT__ "spbas_compress_pattern"
245 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
246 {
247    PetscInt         nnz = irow_in[nrows];
248    long int         mem_orig = (nrows + nnz) * sizeof(PetscInt);
249    long int         mem_compressed;
250    PetscErrorCode   ierr;
251    PetscInt         *isort;
252    PetscInt         *icols;
253    PetscInt         row_nnz;
254    PetscInt         *ipoint;
255    PetscTruth       *used;
256    PetscInt         ptr;
257    PetscInt         i,j;
258    const PetscTruth no_values = PETSC_FALSE;
259 
260    PetscFunctionBegin;
261    /* Allocate the structure of the new matrix */
262    B->nrows = nrows;
263    B->ncols = ncols;
264    B->nnz   = nnz;
265    B->col_idx_type= col_idx_type;
266    B->block_data = PETSC_TRUE;
267    ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr);
268 
269    /* When using an offset array, set it */
270    if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
271       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
272    }
273 
274    /* Allocate the ordering for the rows */
275    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
276    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
277    ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr);
278 
279    /*  Initialize the sorting */
280    ierr = PetscMemzero((void*) used, nrows*sizeof(PetscTruth));CHKERRQ(ierr);
281    for (i = 0; i<nrows; i++)  {
282       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
283       isort[i] = i;
284       ipoint[i]= i;
285    }
286 
287    /* Sort the rows so that identical columns will be next to each other */
288    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
289    ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
290 
291    /* Replace identical rows with the first one in the list */
292    for (i=1; i<nrows; i++) {
293       if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
294          ipoint[isort[i]] = ipoint[isort[i-1]];
295       }
296    }
297 
298    /* Collect the rows which are used*/
299    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
300 
301    /* Calculate needed memory */
302    B->n_alloc_icol = 0;
303    for (i=0; i<nrows; i++)  {
304       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
305    }
306    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
307 
308    /* Fill in the diagonal offsets for the rows which store their own data */
309    ptr = 0;
310    for (i=0; i<B->nrows; i++) {
311       if (used[i])  {
312          B->icols[i] = &B->alloc_icol[ptr];
313          icols = &icol_in[irow_in[i]];
314          row_nnz = B->row_nnz[i];
315          if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
316             for (j=0; j<row_nnz; j++) {
317                B->icols[i][j] = icols[j];
318             }
319          } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
320             for (j=0; j<row_nnz; j++) {
321                B->icols[i][j] = icols[j]-i;
322             }
323          } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
324             for (j=0; j<row_nnz; j++) {
325                B->icols[i][j] = icols[j]-icols[0];
326             }
327          }
328          ptr += B->row_nnz[i];
329       }
330    }
331 
332    /* Point to the right places for all data */
333    for (i=0; i<nrows; i++) {
334       B->icols[i] = B->icols[ipoint[i]];
335    }
336    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
337    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
338 
339    ierr=PetscFree(isort);CHKERRQ(ierr);
340    ierr=PetscFree(used);CHKERRQ(ierr);
341    ierr=PetscFree(ipoint);CHKERRQ(ierr);
342 
343    mem_compressed = spbas_memory_requirement( *B );
344    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
345    PetscFunctionReturn(0);
346 }
347 
348 /*
349    spbas_incomplete_cholesky
350        Incomplete Cholesky decomposition
351 */
352 #include "spbas_cholesky.h"
353 
354 /*
355   spbas_delete : de-allocate the arrays owned by this matrix
356 */
357 #undef __FUNCT__
358 #define __FUNCT__ "spbas_delete"
359 PetscErrorCode spbas_delete(spbas_matrix matrix)
360 {
361    PetscInt       i;
362    PetscErrorCode ierr;
363 
364    PetscFunctionBegin;
365    if (matrix.block_data) {
366       ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr)
367       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
368    } else  {
369      for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
370      ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
371      if (matrix.values) {
372          for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
373       }
374    }
375 
376    ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
377    ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
378    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
379    if (matrix.values)  {ierr=PetscFree(matrix.values);CHKERRQ(ierr);}
380    PetscFunctionReturn(0);
381 }
382 
383 /*
384 spbas_matrix_to_crs:
385    Convert an spbas_matrix to compessed row storage
386 */
387 #undef __FUNCT__
388 #define __FUNCT__ "spbas_matrix_to_crs"
389 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
390 {
391    PetscInt       nrows = matrix_A.nrows;
392    PetscInt       nnz   = matrix_A.nnz;
393    PetscInt       i,j,r_nnz,i0;
394    PetscInt       *irow;
395    PetscInt       *icol;
396    PetscInt       *icol_A;
397    MatScalar      *val;
398    PetscScalar    *val_A;
399    PetscInt       col_idx_type = matrix_A.col_idx_type;
400    PetscTruth     do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
401    PetscErrorCode ierr;
402 
403    PetscFunctionBegin;
404    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
405    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
406    *icol_out = icol;
407    *irow_out=irow;
408    if (do_values)  {
409       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
410       *val_out = val; *icol_out = icol; *irow_out=irow;
411    }
412 
413    irow[0]=0;
414    for (i=0; i<nrows; i++) {
415       r_nnz = matrix_A.row_nnz[i];
416       i0 = irow[i];
417       irow[i+1] = i0 + r_nnz;
418       icol_A = matrix_A.icols[i];
419 
420       if (do_values) {
421           val_A = matrix_A.values[i];
422           for (j=0; j<r_nnz; j++) {
423              icol[i0+j] = icol_A[j];
424              val[i0+j]  = val_A[j];
425           }
426       } else {
427           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
428       }
429 
430       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
431          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
432       }
433       else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
434          i0 = matrix_A.icol0[i];
435          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
436       }
437    }
438    PetscFunctionReturn(0);
439 }
440 
441 
442 /*
443     spbas_transpose
444        return the transpose of a matrix
445 */
446 #undef __FUNCT__
447 #define __FUNCT__ "spbas_transpose"
448 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
449 {
450    PetscInt       col_idx_type = in_matrix.col_idx_type;
451    PetscInt       nnz   = in_matrix.nnz;
452    PetscInt       ncols = in_matrix.nrows;
453    PetscInt       nrows = in_matrix.ncols;
454    PetscInt       i,j,k;
455    PetscInt       r_nnz;
456    PetscInt       *irow;
457    PetscInt       icol0;
458    PetscScalar    * val;
459    PetscErrorCode ierr;
460 
461    PetscFunctionBegin;
462 
463    /* Copy input values */
464    result->nrows        = nrows;
465    result->ncols        = ncols;
466    result->nnz          = nnz;
467    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
468    result->block_data   = PETSC_TRUE;
469 
470    /* Allocate sparseness pattern */
471    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
472 
473    /*  Count the number of nonzeros in each row */
474    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
475 
476    for (i=0; i<ncols; i++) {
477       r_nnz = in_matrix.row_nnz[i];
478       irow  = in_matrix.icols[i];
479       if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
480          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
481       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
482          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
483       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
484          icol0=in_matrix.icol0[i];
485          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
486       }
487    }
488 
489    /* Set the pointers to the data */
490    ierr = spbas_allocate_data(result);CHKERRQ(ierr);
491 
492    /* Reset the number of nonzeros in each row */
493    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
494 
495    /* Fill the data arrays */
496    if (in_matrix.values) {
497       for (i=0; i<ncols; i++) {
498          r_nnz = in_matrix.row_nnz[i];
499          irow  = in_matrix.icols[i];
500          val   = in_matrix.values[i];
501 
502          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
503          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
504          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
505                  {icol0=in_matrix.icol0[i];}
506          for (j=0; j<r_nnz; j++)  {
507             k = icol0 + irow[j];
508             result->icols[k][result->row_nnz[k]]  = i;
509             result->values[k][result->row_nnz[k]] = val[j];
510             result->row_nnz[k]++;
511          }
512       }
513    }  else {
514       for (i=0; i<ncols; i++) {
515          r_nnz = in_matrix.row_nnz[i];
516          irow  = in_matrix.icols[i];
517 
518          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
519          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
520          else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
521 
522          for (j=0; j<r_nnz; j++) {
523             k = icol0 + irow[j];
524             result->icols[k][result->row_nnz[k]]  = i;
525             result->row_nnz[k]++;
526          }
527       }
528    }
529    PetscFunctionReturn(0);
530 }
531 
532 /*
533    spbas_mergesort
534 
535       mergesort for an array of intergers and an array of associated
536       reals
537 
538       on output, icol[0..nnz-1] is increasing;
539                   val[0..nnz-1] has undergone the same permutation as icol
540 
541       NB: val may be PETSC_NULL: in that case, only the integers are sorted
542 
543 */
544 #undef __FUNCT__
545 #define __FUNCT__ "spbas_mergesort"
546 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
547 {
548   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
549   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
550   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
551   PetscInt       *ialloc;     /* Allocated arrays */
552   PetscScalar    *valloc=PETSC_NULL;
553   PetscInt       *iswap;      /* auxiliary pointers for swapping */
554   PetscScalar    *vswap;
555   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
556   PetscScalar    *vhlp1=PETSC_NULL;  /* (arrays under construction) */
557   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
558   PetscScalar    *vhlp2=PETSC_NULL;
559   PetscErrorCode ierr;
560 
561    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
562    ihlp1 = ialloc;
563    ihlp2 = icol;
564 
565    if (val) {
566       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
567       vhlp1 = valloc;
568       vhlp2 = val;
569    }
570 
571 
572    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
573    for (istep=1; istep<nnz; istep*=2) {
574      /*
575        Combine sorted parts
576             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
577        of ihlp2 and vhlp2
578 
579        into one sorted part
580             istart:istart+2*istep-1
581        of ihlp1 and vhlp1
582      */
583       for (istart=0; istart<nnz; istart+=2*istep) {
584 
585 	/* Set counters and bound array part endings */
586          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
587          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
588 
589          /* Merge the two array parts */
590          if (val) {
591             for (i=istart; i<i2end; i++) {
592                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
593                    ihlp1[i] = ihlp2[i1];
594                    vhlp1[i] = vhlp2[i1];
595                    i1++;
596                } else if (i2<i2end ){
597                    ihlp1[i] = ihlp2[i2];
598                    vhlp1[i] = vhlp2[i2];
599                    i2++;
600                } else {
601                    ihlp1[i] = ihlp2[i1];
602                    vhlp1[i] = vhlp2[i1];
603                    i1++;
604                }
605             }
606          } else {
607             for (i=istart; i<i2end; i++) {
608                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
609                    ihlp1[i] = ihlp2[i1];
610                    i1++;
611                } else if (i2<i2end ) {
612                    ihlp1[i] = ihlp2[i2];
613                    i2++;
614                } else {
615                    ihlp1[i] = ihlp2[i1];
616                    i1++;
617                }
618             }
619          }
620       }
621 
622       /* Swap the two array sets */
623       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
624       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
625    }
626 
627    /* Copy one more time in case the sorted arrays are the temporary ones */
628    if (ihlp2 != icol) {
629       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
630       if (val) {
631          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
632       }
633    }
634 
635    ierr = PetscFree(ialloc);CHKERRQ(ierr);
636    if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);}
637    PetscFunctionReturn(0);
638 }
639 
640 /*
641   spbas_apply_reordering_rows:
642     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "spbas_apply_reordering_rows"
646 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
647 {
648    PetscInt       i,j,ip;
649    PetscInt       nrows=matrix_A->nrows;
650    PetscInt       * row_nnz;
651    PetscInt       **icols;
652    PetscTruth     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
653    PetscScalar    **vals=PETSC_NULL;
654    PetscErrorCode ierr;
655 
656    PetscFunctionBegin;
657    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
658 
659    if (do_values)  {
660      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
661    }
662    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
663    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
664 
665    for (i=0; i<nrows;i++) {
666       ip = permutation[i];
667       if (do_values) {vals[i]    = matrix_A->values[ip];}
668       icols[i]   = matrix_A->icols[ip];
669       row_nnz[i] = matrix_A->row_nnz[ip];
670       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
671    }
672 
673    if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
674    ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
675    ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
676 
677    if (do_values) { matrix_A->values  = vals; }
678    matrix_A->icols   = icols;
679    matrix_A->row_nnz = row_nnz;
680 
681    PetscFunctionReturn(0);
682 }
683 
684 
685 /*
686   spbas_apply_reordering_cols:
687     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
688 */
689 #undef __FUNCT__
690 #define __FUNCT__ "spbas_apply_reordering_cols"
691 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
692 {
693    PetscInt    i,j;
694    PetscInt    nrows=matrix_A->nrows;
695    PetscInt    row_nnz;
696    PetscInt    *icols;
697    PetscTruth  do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
698    PetscScalar *vals=PETSC_NULL;
699    PetscErrorCode ierr;
700 
701    PetscFunctionBegin;
702    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
703 
704    for (i=0; i<nrows;i++) {
705       icols   = matrix_A->icols[i];
706       row_nnz = matrix_A->row_nnz[i];
707       if (do_values) { vals = matrix_A->values[i]; }
708 
709       for (j=0; j<row_nnz; j++)  {
710          icols[j] = permutation[i+icols[j]]-i;
711       }
712       ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
713    }
714    PetscFunctionReturn(0);
715 }
716 
717 /*
718   spbas_apply_reordering:
719     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
720 */
721 #undef __FUNCT__
722 #define __FUNCT__ "spbas_apply_reordering"
723 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
724 {
725    PetscErrorCode ierr;
726    PetscFunctionBegin;
727    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr);
728    ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr);
729    PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "spbas_pattern_only"
734 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
735 {
736    spbas_matrix   retval;
737    PetscInt       i, j, i0, r_nnz;
738    PetscErrorCode ierr;
739 
740    PetscFunctionBegin;
741 
742    /* Copy input values */
743    retval.nrows = nrows;
744    retval.ncols = ncols;
745    retval.nnz   = ai[nrows];
746 
747    retval.block_data   = PETSC_TRUE;
748    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
749 
750    /* Allocate output matrix */
751    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
752    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
753    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
754    /* Copy the structure */
755    for (i = 0; i<retval.nrows; i++)  {
756       i0 = ai[i];
757       r_nnz = ai[i+1]-i0;
758 
759       for (j=0; j<r_nnz; j++) {
760          retval.icols[i][j]  = aj[i0+j]-i;
761       }
762    }
763    *result = retval;
764    PetscFunctionReturn(0);
765 }
766 
767 
768 /*
769    spbas_mark_row_power:
770       Mark the columns in row 'row' which are nonzero in
771           matrix^2log(marker).
772 */
773 #undef __FUNCT__
774 #define __FUNCT__ "spbas_mark_row_power"
775 PetscErrorCode spbas_mark_row_power(
776 				    PetscInt *iwork,             /* marker-vector */
777 				    PetscInt row,                /* row for which the columns are marked */
778 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
779 				    PetscInt marker,             /* marker-value: 2^power */
780 				    PetscInt minmrk,            /* lower bound for marked points */
781 				    PetscInt maxmrk)            /* upper bound for marked points */
782 {
783    PetscErrorCode ierr;
784    PetscInt       i,j, nnz;
785 
786    PetscFunctionBegin;
787    nnz = in_matrix->row_nnz[row];
788 
789    /* For higher powers, call this function recursively */
790    if (marker>1) {
791       for (i=0; i<nnz;i++) {
792          j = row + in_matrix->icols[row][i];
793          if (minmrk<=j && j<maxmrk && iwork[j] < marker ) {
794             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
795             iwork[j] |= marker;
796          }
797       }
798    } else  {
799      /*  Mark the columns reached */
800       for (i=0; i<nnz;i++)  {
801          j = row + in_matrix->icols[row][i];
802          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
803       }
804    }
805    PetscFunctionReturn(0);
806 }
807 
808 
809 /*
810    spbas_power
811       Calculate sparseness patterns for incomplete Cholesky decompositions
812       of a given order: (almost) all nonzeros of the matrix^(order+1) which
813       are inside the band width are found and stored in the output sparseness
814       pattern.
815 */
816 #undef __FUNCT__
817 #define __FUNCT__ "spbas_power"
818 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
819 {
820    spbas_matrix   retval;
821    PetscInt       nrows = in_matrix.nrows;
822    PetscInt       ncols = in_matrix.ncols;
823    PetscInt       i, j, kend;
824    PetscInt       nnz, inz;
825    PetscInt       *iwork;
826    PetscInt       marker;
827    PetscInt       maxmrk=0;
828    PetscErrorCode ierr;
829 
830    PetscFunctionBegin;
831    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
832    if (ncols != nrows) SETERRQ( PETSC_ERR_ARG_INCOMP, "Dimension error\n");
833    if (in_matrix.values) SETERRQ( PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
834    if (power<=0) SETERRQ( PETSC_ERR_SUP_SYS, "Power must be 1 or up");
835 
836    /* Copy input values*/
837    retval.nrows = ncols;
838    retval.ncols = nrows;
839    retval.nnz   = 0;
840    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
841    retval.block_data = PETSC_FALSE;
842 
843    /* Allocate sparseness pattern */
844    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
845 
846    /* Allocate marker array */
847    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
848 
849    /* Erase the pattern for this row */
850    ierr = PetscMemzero( (void *) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
851 
852    /* Calculate marker values */
853    marker = 1; for (i=1; i<power; i++) {marker*=2;}
854 
855    for (i=0; i<nrows; i++)  {
856      /* Calculate the pattern for each row */
857 
858       nnz    = in_matrix.row_nnz[i];
859       kend   = i+in_matrix.icols[i][nnz-1];
860       if (maxmrk<=kend) {maxmrk=kend+1;}
861       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
862 
863       /* Count the columns*/
864       nnz = 0;
865       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
866 
867       /* Allocate the column indices */
868       retval.row_nnz[i] = nnz;
869       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
870 
871       /* Administrate the column indices */
872       inz = 0;
873       for (j=i; j<maxmrk; j++) {
874          if (iwork[j])  {
875             retval.icols[i][inz] = j-i;
876             inz++;
877             iwork[j]=0;
878          }
879       }
880       retval.nnz += nnz;
881    };
882    ierr = PetscFree(iwork);CHKERRQ(ierr);
883    *result = retval;
884    PetscFunctionReturn(0);
885 }
886 
887 
888 
889 /*
890    spbas_keep_upper:
891       remove the lower part of the matrix: keep the upper part
892 */
893 #undef __FUNCT__
894 #define __FUNCT__ "spbas_keep_upper"
895 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
896 {
897    PetscInt i, j;
898    PetscInt jstart;
899 
900    PetscFunctionBegin;
901    if (inout_matrix->block_data) SETERRQ( PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
902    for (i=0; i<inout_matrix->nrows; i++)  {
903        for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){}
904        if (jstart>0) {
905           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
906              inout_matrix->icols[i][j] =  inout_matrix->icols[i][j+jstart];
907           }
908 
909           if (inout_matrix->values) {
910              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
911                 inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
912              }
913           }
914 
915           inout_matrix->row_nnz[i] -= jstart;
916 
917           inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
918 
919           if (inout_matrix->values) {
920              inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
921           }
922           inout_matrix->nnz -= jstart;
923        }
924    }
925    PetscFunctionReturn(0);
926 }
927 
928