xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision b7c853c42524e99073a4955bd48eeb29623ae6ef)
1 #include "petsc.h"
2 #include "../src/mat/impls/aij/seq/aij.h"
3 #include "../src/mat/impls/aij/seq/bas/spbas.h"
4 
5 /*MC
6   MAT_SOLVER_BAS -  Provides ICC(k) with drop tolerance
7 
8   Works with MATAIJ  matrices
9 
10   Options Database Keys:
11 + -pc_factor_levels <l>
12 - -pc_factor_drop_tolerance
13 
14   Level: beginner
15 
16    Contributed by: Bas van 't Hof
17 
18 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage, PCFactorSetLevels(), PCFactorSetDropTolerance()
19 
20 M*/
21 
22 /*
23   spbas_memory_requirement:
24     Calculate the number of bytes needed to store tha matrix
25 */
26 #undef __FUNCT__
27 #define __FUNCT__ "spbas_memory_requirement"
28 long int spbas_memory_requirement( spbas_matrix matrix)
29 {
30    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
31 						       n_alloc_val, col_idx_type */
32      sizeof(PetscTruth)                 + /* block_data */
33      sizeof(PetscScalar**)        + /* values */
34      sizeof(PetscScalar*)         + /* alloc_val */
35      2 * sizeof(int**)            + /* icols, icols0 */
36      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
37      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
38      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
39 
40    /* icol0[*] */
41    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); }
42 
43    /* icols[*][*] */
44    if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
45    else { memreq += matrix.nnz * sizeof(PetscInt); }
46 
47    if (matrix.values) {
48      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
49      /* values[*][*] */
50        if (matrix.block_data) { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
51        else { memreq += matrix.nnz * sizeof(PetscScalar); }
52    }
53    return memreq;
54 }
55 
56 /*
57   spbas_allocate_pattern:
58     allocate the pattern arrays row_nnz, icols and optionally values
59 */
60 #undef __FUNCT__
61 #define __FUNCT__ "spbas_allocate_pattern"
62 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values)
63 {
64    PetscErrorCode ierr;
65    PetscInt       nrows = result->nrows;
66    PetscInt       col_idx_type = result->col_idx_type;
67 
68    PetscFunctionBegin;
69    /* Allocate sparseness pattern */
70    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
71    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
72 
73    /* If offsets are given wrt an array, create array */
74    if (col_idx_type == SPBAS_OFFSET_ARRAY) {
75       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
76    } else  {
77       result->icol0 = PETSC_NULL;
78    }
79 
80    /* If values are given, allocate values array */
81    if (do_values)  {
82       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
83    } else {
84       result->values = PETSC_NULL;
85    }
86    PetscFunctionReturn(0);
87 }
88 
89 /*
90 spbas_allocate_data:
91    in case of block_data:
92        Allocate the data arrays alloc_icol and optionally alloc_val,
93        set appropriate pointers from icols and values;
94    in case of !block_data:
95        Allocate the arrays icols[i] and optionally values[i]
96 */
97 #undef __FUNCT__
98 #define __FUNCT__ "spbas_allocate_data"
99 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
100 {
101    PetscInt       i;
102    PetscInt       nnz   = result->nnz;
103    PetscInt       nrows = result->nrows;
104    PetscInt       r_nnz;
105    PetscErrorCode ierr;
106    PetscTruth     do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
107    PetscTruth     block_data = result->block_data;
108 
109    PetscFunctionBegin;
110    if (block_data) {
111      /* Allocate the column number array and point to it */
112       result->n_alloc_icol = nnz;
113       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
114       result->icols[0]= result->alloc_icol;
115       for (i=1; i<nrows; i++)  {
116           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
117       }
118 
119       /* Allocate the value array and point to it */
120       if (do_values)  {
121          result->n_alloc_val= nnz;
122          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
123          result->values[0]= result->alloc_val;
124          for (i=1; i<nrows; i++) {
125              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
126          }
127       }
128    } else {
129       for (i=0; i<nrows; i++)   {
130          r_nnz = result->row_nnz[i];
131          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
132       }
133       if (do_values) {
134          for (i=0; i<nrows; i++)  {
135             r_nnz = result->row_nnz[i];
136             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]);CHKERRQ(ierr);
137          }
138       }
139    }
140    PetscFunctionReturn(0);
141 }
142 
143 /*
144   spbas_row_order_icol
145      determine if row i1 should come
146        + before row i2 in the sorted rows (return -1),
147        + after (return 1)
148        + is identical (return 0).
149 */
150 #undef __FUNCT__
151 #define __FUNCT__ "spbas_row_order_icol"
152 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
153 {
154    PetscInt j;
155    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
156    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
157    PetscInt * icol1 = &icol_in[irow_in[i1]];
158    PetscInt * icol2 = &icol_in[irow_in[i2]];
159 
160    if (nnz1<nnz2) {return -1;}
161    if (nnz1>nnz2) {return 1;}
162 
163    if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
164       for (j=0; j<nnz1; j++) {
165          if (icol1[j]< icol2[j]) {return -1;}
166          if (icol1[j]> icol2[j]) {return 1;}
167       }
168    } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
169       for (j=0; j<nnz1; j++) {
170          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
171          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
172       }
173    } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
174       for (j=1; j<nnz1; j++) {
175          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
176          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
177       }
178    }
179    return 0;
180 }
181 
182 /*
183   spbas_mergesort_icols:
184     return a sorting of the rows in which identical sparseness patterns are
185     next to each other
186 */
187 #undef __FUNCT__
188 #define __FUNCT__ "spbas_mergesort_icols"
189 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
190 {
191    PetscErrorCode ierr;
192    PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
193    PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
194    PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
195    PetscInt       *ialloc;     /* Allocated arrays */
196    PetscInt       *iswap;      /* auxiliary pointers for swapping */
197    PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
198    PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
199 
200    PetscFunctionBegin;
201 
202    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
203 
204    ihlp1 = ialloc;
205    ihlp2 = isort;
206 
207    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
208    for (istep=1; istep<nrows; istep*=2) {
209      /*
210        Combine sorted parts
211             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
212        of ihlp2 and vhlp2
213 
214        into one sorted part
215             istart:istart+2*istep-1
216        of ihlp1 and vhlp1
217      */
218       for (istart=0; istart<nrows; istart+=2*istep) {
219 
220 	/* Set counters and bound array part endings */
221          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
222          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
223 
224          /* Merge the two array parts */
225          for (i=istart; i<i2end; i++)  {
226             if (i1<i1end && i2<i2end && spbas_row_order_icol( ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
227                 ihlp1[i] = ihlp2[i1];
228                 i1++;
229             } else if (i2<i2end ) {
230                 ihlp1[i] = ihlp2[i2];
231                 i2++;
232             } else {
233                 ihlp1[i] = ihlp2[i1];
234                 i1++;
235             }
236          }
237       }
238 
239       /* Swap the two array sets */
240       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
241    }
242 
243    /* Copy one more time in case the sorted arrays are the temporary ones */
244    if (ihlp2 != isort) {
245       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
246    }
247    ierr = PetscFree(ialloc);CHKERRQ(ierr);
248    PetscFunctionReturn(0);
249 }
250 
251 
252 
253 /*
254   spbas_compress_pattern:
255      calculate a compressed sparseness pattern for a sparseness pattern
256      given in compressed row storage. The compressed sparseness pattern may
257      require (much) less memory.
258 */
259 #undef __FUNCT__
260 #define __FUNCT__ "spbas_compress_pattern"
261 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
262 {
263    PetscInt         nnz = irow_in[nrows];
264    long int         mem_orig = (nrows + nnz) * sizeof(PetscInt);
265    long int         mem_compressed;
266    PetscErrorCode   ierr;
267    PetscInt         *isort;
268    PetscInt         *icols;
269    PetscInt         row_nnz;
270    PetscInt         *ipoint;
271    PetscTruth       *used;
272    PetscInt         ptr;
273    PetscInt         i,j;
274    const PetscTruth no_values = PETSC_FALSE;
275 
276    PetscFunctionBegin;
277    /* Allocate the structure of the new matrix */
278    B->nrows = nrows;
279    B->ncols = ncols;
280    B->nnz   = nnz;
281    B->col_idx_type= col_idx_type;
282    B->block_data = PETSC_TRUE;
283    ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr);
284 
285    /* When using an offset array, set it */
286    if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
287       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
288    }
289 
290    /* Allocate the ordering for the rows */
291    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
292    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
293    ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr);
294 
295    /*  Initialize the sorting */
296    ierr = PetscMemzero((void*) used, nrows*sizeof(PetscTruth));CHKERRQ(ierr);
297    for (i = 0; i<nrows; i++)  {
298       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
299       isort[i] = i;
300       ipoint[i]= i;
301    }
302 
303    /* Sort the rows so that identical columns will be next to each other */
304    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
305    ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
306 
307    /* Replace identical rows with the first one in the list */
308    for (i=1; i<nrows; i++) {
309       if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
310          ipoint[isort[i]] = ipoint[isort[i-1]];
311       }
312    }
313 
314    /* Collect the rows which are used*/
315    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
316 
317    /* Calculate needed memory */
318    B->n_alloc_icol = 0;
319    for (i=0; i<nrows; i++)  {
320       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
321    }
322    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
323 
324    /* Fill in the diagonal offsets for the rows which store their own data */
325    ptr = 0;
326    for (i=0; i<B->nrows; i++) {
327       if (used[i])  {
328          B->icols[i] = &B->alloc_icol[ptr];
329          icols = &icol_in[irow_in[i]];
330          row_nnz = B->row_nnz[i];
331          if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
332             for (j=0; j<row_nnz; j++) {
333                B->icols[i][j] = icols[j];
334             }
335          } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
336             for (j=0; j<row_nnz; j++) {
337                B->icols[i][j] = icols[j]-i;
338             }
339          } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
340             for (j=0; j<row_nnz; j++) {
341                B->icols[i][j] = icols[j]-icols[0];
342             }
343          }
344          ptr += B->row_nnz[i];
345       }
346    }
347 
348    /* Point to the right places for all data */
349    for (i=0; i<nrows; i++) {
350       B->icols[i] = B->icols[ipoint[i]];
351    }
352    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
353    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
354 
355    ierr=PetscFree(isort);CHKERRQ(ierr);
356    ierr=PetscFree(used);CHKERRQ(ierr);
357    ierr=PetscFree(ipoint);CHKERRQ(ierr);
358 
359    mem_compressed = spbas_memory_requirement( *B );
360    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
361    PetscFunctionReturn(0);
362 }
363 
364 /*
365    spbas_incomplete_cholesky
366        Incomplete Cholesky decomposition
367 */
368 #include "../src/mat/impls/aij/seq/bas/spbas_cholesky.h"
369 
370 /*
371   spbas_delete : de-allocate the arrays owned by this matrix
372 */
373 #undef __FUNCT__
374 #define __FUNCT__ "spbas_delete"
375 PetscErrorCode spbas_delete(spbas_matrix matrix)
376 {
377    PetscInt       i;
378    PetscErrorCode ierr;
379 
380    PetscFunctionBegin;
381    if (matrix.block_data) {
382       ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr)
383       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
384    } else  {
385      for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
386      ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
387      if (matrix.values) {
388          for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
389       }
390    }
391 
392    ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
393    ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
394    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
395    if (matrix.values)  {ierr=PetscFree(matrix.values);CHKERRQ(ierr);}
396    PetscFunctionReturn(0);
397 }
398 
399 /*
400 spbas_matrix_to_crs:
401    Convert an spbas_matrix to compessed row storage
402 */
403 #undef __FUNCT__
404 #define __FUNCT__ "spbas_matrix_to_crs"
405 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
406 {
407    PetscInt       nrows = matrix_A.nrows;
408    PetscInt       nnz   = matrix_A.nnz;
409    PetscInt       i,j,r_nnz,i0;
410    PetscInt       *irow;
411    PetscInt       *icol;
412    PetscInt       *icol_A;
413    MatScalar      *val;
414    PetscScalar    *val_A;
415    PetscInt       col_idx_type = matrix_A.col_idx_type;
416    PetscTruth     do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
417    PetscErrorCode ierr;
418 
419    PetscFunctionBegin;
420    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
421    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
422    *icol_out = icol;
423    *irow_out=irow;
424    if (do_values)  {
425       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
426       *val_out = val; *icol_out = icol; *irow_out=irow;
427    }
428 
429    irow[0]=0;
430    for (i=0; i<nrows; i++) {
431       r_nnz = matrix_A.row_nnz[i];
432       i0 = irow[i];
433       irow[i+1] = i0 + r_nnz;
434       icol_A = matrix_A.icols[i];
435 
436       if (do_values) {
437           val_A = matrix_A.values[i];
438           for (j=0; j<r_nnz; j++) {
439              icol[i0+j] = icol_A[j];
440              val[i0+j]  = val_A[j];
441           }
442       } else {
443           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
444       }
445 
446       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
447          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
448       }
449       else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
450          i0 = matrix_A.icol0[i];
451          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
452       }
453    }
454    PetscFunctionReturn(0);
455 }
456 
457 
458 /*
459     spbas_transpose
460        return the transpose of a matrix
461 */
462 #undef __FUNCT__
463 #define __FUNCT__ "spbas_transpose"
464 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
465 {
466    PetscInt       col_idx_type = in_matrix.col_idx_type;
467    PetscInt       nnz   = in_matrix.nnz;
468    PetscInt       ncols = in_matrix.nrows;
469    PetscInt       nrows = in_matrix.ncols;
470    PetscInt       i,j,k;
471    PetscInt       r_nnz;
472    PetscInt       *irow;
473    PetscInt       icol0;
474    PetscScalar    * val;
475    PetscErrorCode ierr;
476 
477    PetscFunctionBegin;
478 
479    /* Copy input values */
480    result->nrows        = nrows;
481    result->ncols        = ncols;
482    result->nnz          = nnz;
483    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
484    result->block_data   = PETSC_TRUE;
485 
486    /* Allocate sparseness pattern */
487    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
488 
489    /*  Count the number of nonzeros in each row */
490    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
491 
492    for (i=0; i<ncols; i++) {
493       r_nnz = in_matrix.row_nnz[i];
494       irow  = in_matrix.icols[i];
495       if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
496          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
497       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
498          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
499       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
500          icol0=in_matrix.icol0[i];
501          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
502       }
503    }
504 
505    /* Set the pointers to the data */
506    ierr = spbas_allocate_data(result);CHKERRQ(ierr);
507 
508    /* Reset the number of nonzeros in each row */
509    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
510 
511    /* Fill the data arrays */
512    if (in_matrix.values) {
513       for (i=0; i<ncols; i++) {
514          r_nnz = in_matrix.row_nnz[i];
515          irow  = in_matrix.icols[i];
516          val   = in_matrix.values[i];
517 
518          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
519          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
520          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
521                  {icol0=in_matrix.icol0[i];}
522          for (j=0; j<r_nnz; j++)  {
523             k = icol0 + irow[j];
524             result->icols[k][result->row_nnz[k]]  = i;
525             result->values[k][result->row_nnz[k]] = val[j];
526             result->row_nnz[k]++;
527          }
528       }
529    }  else {
530       for (i=0; i<ncols; i++) {
531          r_nnz = in_matrix.row_nnz[i];
532          irow  = in_matrix.icols[i];
533 
534          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
535          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
536          else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
537 
538          for (j=0; j<r_nnz; j++) {
539             k = icol0 + irow[j];
540             result->icols[k][result->row_nnz[k]]  = i;
541             result->row_nnz[k]++;
542          }
543       }
544    }
545    PetscFunctionReturn(0);
546 }
547 
548 /*
549    spbas_mergesort
550 
551       mergesort for an array of intergers and an array of associated
552       reals
553 
554       on output, icol[0..nnz-1] is increasing;
555                   val[0..nnz-1] has undergone the same permutation as icol
556 
557       NB: val may be PETSC_NULL: in that case, only the integers are sorted
558 
559 */
560 #undef __FUNCT__
561 #define __FUNCT__ "spbas_mergesort"
562 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
563 {
564   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
565   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
566   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
567   PetscInt       *ialloc;     /* Allocated arrays */
568   PetscScalar    *valloc=PETSC_NULL;
569   PetscInt       *iswap;      /* auxiliary pointers for swapping */
570   PetscScalar    *vswap;
571   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
572   PetscScalar    *vhlp1=PETSC_NULL;  /* (arrays under construction) */
573   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
574   PetscScalar    *vhlp2=PETSC_NULL;
575   PetscErrorCode ierr;
576 
577    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
578    ihlp1 = ialloc;
579    ihlp2 = icol;
580 
581    if (val) {
582       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
583       vhlp1 = valloc;
584       vhlp2 = val;
585    }
586 
587 
588    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
589    for (istep=1; istep<nnz; istep*=2) {
590      /*
591        Combine sorted parts
592             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
593        of ihlp2 and vhlp2
594 
595        into one sorted part
596             istart:istart+2*istep-1
597        of ihlp1 and vhlp1
598      */
599       for (istart=0; istart<nnz; istart+=2*istep) {
600 
601 	/* Set counters and bound array part endings */
602          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
603          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
604 
605          /* Merge the two array parts */
606          if (val) {
607             for (i=istart; i<i2end; i++) {
608                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
609                    ihlp1[i] = ihlp2[i1];
610                    vhlp1[i] = vhlp2[i1];
611                    i1++;
612                } else if (i2<i2end ){
613                    ihlp1[i] = ihlp2[i2];
614                    vhlp1[i] = vhlp2[i2];
615                    i2++;
616                } else {
617                    ihlp1[i] = ihlp2[i1];
618                    vhlp1[i] = vhlp2[i1];
619                    i1++;
620                }
621             }
622          } else {
623             for (i=istart; i<i2end; i++) {
624                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
625                    ihlp1[i] = ihlp2[i1];
626                    i1++;
627                } else if (i2<i2end ) {
628                    ihlp1[i] = ihlp2[i2];
629                    i2++;
630                } else {
631                    ihlp1[i] = ihlp2[i1];
632                    i1++;
633                }
634             }
635          }
636       }
637 
638       /* Swap the two array sets */
639       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
640       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
641    }
642 
643    /* Copy one more time in case the sorted arrays are the temporary ones */
644    if (ihlp2 != icol) {
645       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
646       if (val) {
647          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
648       }
649    }
650 
651    ierr = PetscFree(ialloc);CHKERRQ(ierr);
652    if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);}
653    PetscFunctionReturn(0);
654 }
655 
656 /*
657   spbas_apply_reordering_rows:
658     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
659 */
660 #undef __FUNCT__
661 #define __FUNCT__ "spbas_apply_reordering_rows"
662 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
663 {
664    PetscInt       i,j,ip;
665    PetscInt       nrows=matrix_A->nrows;
666    PetscInt       * row_nnz;
667    PetscInt       **icols;
668    PetscTruth     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
669    PetscScalar    **vals=PETSC_NULL;
670    PetscErrorCode ierr;
671 
672    PetscFunctionBegin;
673    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
674 
675    if (do_values)  {
676      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
677    }
678    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
679    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
680 
681    for (i=0; i<nrows;i++) {
682       ip = permutation[i];
683       if (do_values) {vals[i]    = matrix_A->values[ip];}
684       icols[i]   = matrix_A->icols[ip];
685       row_nnz[i] = matrix_A->row_nnz[ip];
686       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
687    }
688 
689    if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
690    ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
691    ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
692 
693    if (do_values) { matrix_A->values  = vals; }
694    matrix_A->icols   = icols;
695    matrix_A->row_nnz = row_nnz;
696 
697    PetscFunctionReturn(0);
698 }
699 
700 
701 /*
702   spbas_apply_reordering_cols:
703     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
704 */
705 #undef __FUNCT__
706 #define __FUNCT__ "spbas_apply_reordering_cols"
707 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
708 {
709    PetscInt    i,j;
710    PetscInt    nrows=matrix_A->nrows;
711    PetscInt    row_nnz;
712    PetscInt    *icols;
713    PetscTruth  do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
714    PetscScalar *vals=PETSC_NULL;
715    PetscErrorCode ierr;
716 
717    PetscFunctionBegin;
718    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
719 
720    for (i=0; i<nrows;i++) {
721       icols   = matrix_A->icols[i];
722       row_nnz = matrix_A->row_nnz[i];
723       if (do_values) { vals = matrix_A->values[i]; }
724 
725       for (j=0; j<row_nnz; j++)  {
726          icols[j] = permutation[i+icols[j]]-i;
727       }
728       ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
729    }
730    PetscFunctionReturn(0);
731 }
732 
733 /*
734   spbas_apply_reordering:
735     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
736 */
737 #undef __FUNCT__
738 #define __FUNCT__ "spbas_apply_reordering"
739 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
740 {
741    PetscErrorCode ierr;
742    PetscFunctionBegin;
743    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr);
744    ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr);
745    PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "spbas_pattern_only"
750 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
751 {
752    spbas_matrix   retval;
753    PetscInt       i, j, i0, r_nnz;
754    PetscErrorCode ierr;
755 
756    PetscFunctionBegin;
757 
758    /* Copy input values */
759    retval.nrows = nrows;
760    retval.ncols = ncols;
761    retval.nnz   = ai[nrows];
762 
763    retval.block_data   = PETSC_TRUE;
764    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
765 
766    /* Allocate output matrix */
767    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
768    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
769    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
770    /* Copy the structure */
771    for (i = 0; i<retval.nrows; i++)  {
772       i0 = ai[i];
773       r_nnz = ai[i+1]-i0;
774 
775       for (j=0; j<r_nnz; j++) {
776          retval.icols[i][j]  = aj[i0+j]-i;
777       }
778    }
779    *result = retval;
780    PetscFunctionReturn(0);
781 }
782 
783 
784 /*
785    spbas_mark_row_power:
786       Mark the columns in row 'row' which are nonzero in
787           matrix^2log(marker).
788 */
789 #undef __FUNCT__
790 #define __FUNCT__ "spbas_mark_row_power"
791 PetscErrorCode spbas_mark_row_power(
792 				    PetscInt *iwork,             /* marker-vector */
793 				    PetscInt row,                /* row for which the columns are marked */
794 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
795 				    PetscInt marker,             /* marker-value: 2^power */
796 				    PetscInt minmrk,            /* lower bound for marked points */
797 				    PetscInt maxmrk)            /* upper bound for marked points */
798 {
799    PetscErrorCode ierr;
800    PetscInt       i,j, nnz;
801 
802    PetscFunctionBegin;
803    nnz = in_matrix->row_nnz[row];
804 
805    /* For higher powers, call this function recursively */
806    if (marker>1) {
807       for (i=0; i<nnz;i++) {
808          j = row + in_matrix->icols[row][i];
809          if (minmrk<=j && j<maxmrk && iwork[j] < marker ) {
810             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
811             iwork[j] |= marker;
812          }
813       }
814    } else  {
815      /*  Mark the columns reached */
816       for (i=0; i<nnz;i++)  {
817          j = row + in_matrix->icols[row][i];
818          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
819       }
820    }
821    PetscFunctionReturn(0);
822 }
823 
824 
825 /*
826    spbas_power
827       Calculate sparseness patterns for incomplete Cholesky decompositions
828       of a given order: (almost) all nonzeros of the matrix^(order+1) which
829       are inside the band width are found and stored in the output sparseness
830       pattern.
831 */
832 #undef __FUNCT__
833 #define __FUNCT__ "spbas_power"
834 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
835 {
836    spbas_matrix   retval;
837    PetscInt       nrows = in_matrix.nrows;
838    PetscInt       ncols = in_matrix.ncols;
839    PetscInt       i, j, kend;
840    PetscInt       nnz, inz;
841    PetscInt       *iwork;
842    PetscInt       marker;
843    PetscInt       maxmrk=0;
844    PetscErrorCode ierr;
845 
846    PetscFunctionBegin;
847    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ( PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
848    if (ncols != nrows) SETERRQ( PETSC_ERR_ARG_INCOMP, "Dimension error\n");
849    if (in_matrix.values) SETERRQ( PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
850    if (power<=0) SETERRQ( PETSC_ERR_SUP_SYS, "Power must be 1 or up");
851 
852    /* Copy input values*/
853    retval.nrows = ncols;
854    retval.ncols = nrows;
855    retval.nnz   = 0;
856    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
857    retval.block_data = PETSC_FALSE;
858 
859    /* Allocate sparseness pattern */
860    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
861 
862    /* Allocate marker array */
863    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
864 
865    /* Erase the pattern for this row */
866    ierr = PetscMemzero( (void *) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
867 
868    /* Calculate marker values */
869    marker = 1; for (i=1; i<power; i++) {marker*=2;}
870 
871    for (i=0; i<nrows; i++)  {
872      /* Calculate the pattern for each row */
873 
874       nnz    = in_matrix.row_nnz[i];
875       kend   = i+in_matrix.icols[i][nnz-1];
876       if (maxmrk<=kend) {maxmrk=kend+1;}
877       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
878 
879       /* Count the columns*/
880       nnz = 0;
881       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
882 
883       /* Allocate the column indices */
884       retval.row_nnz[i] = nnz;
885       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
886 
887       /* Administrate the column indices */
888       inz = 0;
889       for (j=i; j<maxmrk; j++) {
890          if (iwork[j])  {
891             retval.icols[i][inz] = j-i;
892             inz++;
893             iwork[j]=0;
894          }
895       }
896       retval.nnz += nnz;
897    };
898    ierr = PetscFree(iwork);CHKERRQ(ierr);
899    *result = retval;
900    PetscFunctionReturn(0);
901 }
902 
903 
904 
905 /*
906    spbas_keep_upper:
907       remove the lower part of the matrix: keep the upper part
908 */
909 #undef __FUNCT__
910 #define __FUNCT__ "spbas_keep_upper"
911 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
912 {
913    PetscInt i, j;
914    PetscInt jstart;
915 
916    PetscFunctionBegin;
917    if (inout_matrix->block_data) SETERRQ( PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
918    for (i=0; i<inout_matrix->nrows; i++)  {
919        for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++){}
920        if (jstart>0) {
921           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
922              inout_matrix->icols[i][j] =  inout_matrix->icols[i][j+jstart];
923           }
924 
925           if (inout_matrix->values) {
926              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
927                 inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
928              }
929           }
930 
931           inout_matrix->row_nnz[i] -= jstart;
932 
933           inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
934 
935           if (inout_matrix->values) {
936              inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
937           }
938           inout_matrix->nnz -= jstart;
939        }
940    }
941    PetscFunctionReturn(0);
942 }
943 
944