xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision b6bfaf9a0a178e626c17e7b8aafd5e3bedcd9a6d)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l>
11 - -pc_factor_drop_tolerance
12 
13   Level: beginner
14 
15    Contributed by: Bas van 't Hof
16 
17 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage, PCFactorSetLevels(), PCFactorSetDropTolerance()
18 
19 M*/
20 
21 /*
22   spbas_memory_requirement:
23     Calculate the number of bytes needed to store tha matrix
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "spbas_memory_requirement"
27 long int spbas_memory_requirement(spbas_matrix matrix)
28 {
29   long int memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol,
30                                                n_alloc_val, col_idx_type */
31     sizeof(PetscBool)                     + /* block_data */
32     sizeof(PetscScalar**)                 + /* values */
33     sizeof(PetscScalar*)                  + /* alloc_val */
34     2 * sizeof(int**)                     + /* icols, icols0 */
35     2 * sizeof(PetscInt*)                 + /* row_nnz, alloc_icol */
36     matrix.nrows * sizeof(PetscInt)       + /* row_nnz[*] */
37     matrix.nrows * sizeof(PetscInt*);       /* icols[*] */
38 
39   /* icol0[*] */
40   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) { memreq += matrix.nrows * sizeof(PetscInt); }
41 
42   /* icols[*][*] */
43   if (matrix.block_data) { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
44   else { memreq += matrix.nnz * sizeof(PetscInt); }
45 
46   if (matrix.values) {
47     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
48     /* values[*][*] */
49       if (matrix.block_data) { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
50       else { memreq += matrix.nnz * sizeof(PetscScalar); }
51   }
52   return memreq;
53 }
54 
55 /*
56   spbas_allocate_pattern:
57     allocate the pattern arrays row_nnz, icols and optionally values
58 */
59 #undef __FUNCT__
60 #define __FUNCT__ "spbas_allocate_pattern"
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool  do_values)
62 {
63   PetscErrorCode ierr;
64   PetscInt       nrows = result->nrows;
65   PetscInt       col_idx_type = result->col_idx_type;
66 
67   PetscFunctionBegin;
68   /* Allocate sparseness pattern */
69   ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
70   ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
71 
72   /* If offsets are given wrt an array, create array */
73   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74     ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
75   } else  {
76     result->icol0 = PETSC_NULL;
77   }
78 
79   /* If values are given, allocate values array */
80   if (do_values)  {
81     ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
82   } else {
83     result->values = PETSC_NULL;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 #undef __FUNCT__
97 #define __FUNCT__ "spbas_allocate_data"
98 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
99 {
100   PetscInt       i;
101   PetscInt       nnz   = result->nnz;
102   PetscInt       nrows = result->nrows;
103   PetscInt       r_nnz;
104   PetscErrorCode ierr;
105   PetscBool      do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
106   PetscBool      block_data = result->block_data;
107 
108   PetscFunctionBegin;
109   if (block_data) {
110     /* Allocate the column number array and point to it */
111     result->n_alloc_icol = nnz;
112     ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
113     result->icols[0]= result->alloc_icol;
114     for (i=1; i<nrows; i++)  {
115       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116     }
117 
118     /* Allocate the value array and point to it */
119     if (do_values)  {
120       result->n_alloc_val= nnz;
121       ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
122       result->values[0]= result->alloc_val;
123       for (i=1; i<nrows; i++) {
124         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
125       }
126     }
127   } else {
128     for (i=0; i<nrows; i++)   {
129       r_nnz = result->row_nnz[i];
130       ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
131     }
132     if (do_values) {
133       for (i=0; i<nrows; i++)  {
134         r_nnz = result->row_nnz[i];
135         ierr = PetscMalloc(r_nnz*sizeof(PetscScalar), &result->values[i]);CHKERRQ(ierr);
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 /*
143   spbas_row_order_icol
144      determine if row i1 should come
145        + before row i2 in the sorted rows (return -1),
146        + after (return 1)
147        + is identical (return 0).
148 */
149 #undef __FUNCT__
150 #define __FUNCT__ "spbas_row_order_icol"
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153   PetscInt j;
154   PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
155   PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
156   PetscInt * icol1 = &icol_in[irow_in[i1]];
157   PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159   if (nnz1<nnz2) {return -1;}
160   if (nnz1>nnz2) {return 1;}
161 
162   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163     for (j=0; j<nnz1; j++) {
164       if (icol1[j]< icol2[j]) {return -1;}
165       if (icol1[j]> icol2[j]) {return 1;}
166     }
167   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168     for (j=0; j<nnz1; j++) {
169       if (icol1[j]-i1< icol2[j]-i2) {return -1;}
170       if (icol1[j]-i1> icol2[j]-i2) {return 1;}
171     }
172   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173     for (j=1; j<nnz1; j++) {
174       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
175       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
176     }
177   }
178   return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 #undef __FUNCT__
187 #define __FUNCT__ "spbas_mergesort_icols"
188 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
189 {
190   PetscErrorCode ierr;
191   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
192   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
193   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
194   PetscInt       *ialloc;     /* Allocated arrays */
195   PetscInt       *iswap;      /* auxiliary pointers for swapping */
196   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
197   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
198 
199   PetscFunctionBegin;
200   ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
201 
202   ihlp1 = ialloc;
203   ihlp2 = isort;
204 
205   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
206   for (istep=1; istep<nrows; istep*=2) {
207     /*
208       Combine sorted parts
209           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
210       of ihlp2 and vhlp2
211 
212       into one sorted part
213           istart:istart+2*istep-1
214       of ihlp1 and vhlp1
215     */
216     for (istart=0; istart<nrows; istart+=2*istep) {
217       /* Set counters and bound array part endings */
218       i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
219       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
220 
221       /* Merge the two array parts */
222       for (i=istart; i<i2end; i++)  {
223         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
224           ihlp1[i] = ihlp2[i1];
225           i1++;
226         } else if (i2<i2end) {
227           ihlp1[i] = ihlp2[i2];
228           i2++;
229         } else {
230           ihlp1[i] = ihlp2[i1];
231           i1++;
232         }
233       }
234     }
235 
236     /* Swap the two array sets */
237     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
238   }
239 
240   /* Copy one more time in case the sorted arrays are the temporary ones */
241   if (ihlp2 != isort) {
242     for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
243   }
244   ierr = PetscFree(ialloc);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 
249 
250 /*
251   spbas_compress_pattern:
252      calculate a compressed sparseness pattern for a sparseness pattern
253      given in compressed row storage. The compressed sparseness pattern may
254      require (much) less memory.
255 */
256 #undef __FUNCT__
257 #define __FUNCT__ "spbas_compress_pattern"
258 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
259 {
260   PetscInt         nnz = irow_in[nrows];
261   long int         mem_orig = (nrows + nnz) * sizeof(PetscInt);
262   long int         mem_compressed;
263   PetscErrorCode   ierr;
264   PetscInt         *isort;
265   PetscInt         *icols;
266   PetscInt         row_nnz;
267   PetscInt         *ipoint;
268   PetscBool        *used;
269   PetscInt         ptr;
270   PetscInt         i,j;
271   const PetscBool  no_values = PETSC_FALSE;
272 
273   PetscFunctionBegin;
274   /* Allocate the structure of the new matrix */
275   B->nrows = nrows;
276   B->ncols = ncols;
277   B->nnz   = nnz;
278   B->col_idx_type= col_idx_type;
279   B->block_data = PETSC_TRUE;
280   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
281 
282   /* When using an offset array, set it */
283   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
284     for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
285   }
286 
287   /* Allocate the ordering for the rows */
288   ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
289   ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
290   ierr = PetscMalloc(nrows*sizeof(PetscBool),&used);CHKERRQ(ierr);
291 
292   /*  Initialize the sorting */
293   ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
294   for (i = 0; i<nrows; i++)  {
295     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
296     isort[i] = i;
297     ipoint[i]= i;
298   }
299 
300   /* Sort the rows so that identical columns will be next to each other */
301   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
302   ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
303 
304   /* Replace identical rows with the first one in the list */
305   for (i=1; i<nrows; i++) {
306     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
307       ipoint[isort[i]] = ipoint[isort[i-1]];
308     }
309   }
310 
311   /* Collect the rows which are used*/
312   for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
313 
314   /* Calculate needed memory */
315   B->n_alloc_icol = 0;
316   for (i=0; i<nrows; i++)  {
317     if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
318   }
319   ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
320 
321   /* Fill in the diagonal offsets for the rows which store their own data */
322   ptr = 0;
323   for (i=0; i<B->nrows; i++) {
324     if (used[i])  {
325       B->icols[i] = &B->alloc_icol[ptr];
326       icols = &icol_in[irow_in[i]];
327       row_nnz = B->row_nnz[i];
328       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
329         for (j=0; j<row_nnz; j++) {
330           B->icols[i][j] = icols[j];
331         }
332       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
333         for (j=0; j<row_nnz; j++) {
334           B->icols[i][j] = icols[j]-i;
335         }
336       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
337         for (j=0; j<row_nnz; j++) {
338           B->icols[i][j] = icols[j]-icols[0];
339         }
340       }
341       ptr += B->row_nnz[i];
342     }
343   }
344 
345   /* Point to the right places for all data */
346   for (i=0; i<nrows; i++) {
347     B->icols[i] = B->icols[ipoint[i]];
348   }
349   ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
350   ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
351 
352   ierr=PetscFree(isort);CHKERRQ(ierr);
353   ierr=PetscFree(used);CHKERRQ(ierr);
354   ierr=PetscFree(ipoint);CHKERRQ(ierr);
355 
356   mem_compressed = spbas_memory_requirement(*B);
357   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
358   PetscFunctionReturn(0);
359 }
360 
361 /*
362    spbas_incomplete_cholesky
363        Incomplete Cholesky decomposition
364 */
365 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
366 
367 /*
368   spbas_delete : de-allocate the arrays owned by this matrix
369 */
370 #undef __FUNCT__
371 #define __FUNCT__ "spbas_delete"
372 PetscErrorCode spbas_delete(spbas_matrix matrix)
373 {
374   PetscInt       i;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   if (matrix.block_data) {
379     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
380     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
381   } else  {
382     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
383     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
384     if (matrix.values) {
385       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
386     }
387   }
388 
389   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
390   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
391   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
392   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 /*
397 spbas_matrix_to_crs:
398    Convert an spbas_matrix to compessed row storage
399 */
400 #undef __FUNCT__
401 #define __FUNCT__ "spbas_matrix_to_crs"
402 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
403 {
404   PetscInt       nrows = matrix_A.nrows;
405   PetscInt       nnz   = matrix_A.nnz;
406   PetscInt       i,j,r_nnz,i0;
407   PetscInt       *irow;
408   PetscInt       *icol;
409   PetscInt       *icol_A;
410   MatScalar      *val;
411   PetscScalar    *val_A;
412   PetscInt       col_idx_type = matrix_A.col_idx_type;
413   PetscBool      do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = PetscMalloc(sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
418   ierr = PetscMalloc(sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
419   *icol_out = icol;
420   *irow_out=irow;
421   if (do_values)  {
422   ierr = PetscMalloc(sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
423   *val_out = val; *icol_out = icol; *irow_out=irow;
424   }
425 
426   irow[0]=0;
427   for (i=0; i<nrows; i++) {
428     r_nnz = matrix_A.row_nnz[i];
429     i0 = irow[i];
430     irow[i+1] = i0 + r_nnz;
431     icol_A = matrix_A.icols[i];
432 
433     if (do_values) {
434       val_A = matrix_A.values[i];
435       for (j=0; j<r_nnz; j++) {
436           icol[i0+j] = icol_A[j];
437           val[i0+j]  = val_A[j];
438       }
439     } else {
440       for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
441     }
442 
443     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
444       for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
445     }
446     else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
447       i0 = matrix_A.icol0[i];
448       for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
449     }
450   }
451   PetscFunctionReturn(0);
452 }
453 
454 
455 /*
456     spbas_transpose
457        return the transpose of a matrix
458 */
459 #undef __FUNCT__
460 #define __FUNCT__ "spbas_transpose"
461 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
462 {
463   PetscInt       col_idx_type = in_matrix.col_idx_type;
464   PetscInt       nnz   = in_matrix.nnz;
465   PetscInt       ncols = in_matrix.nrows;
466   PetscInt       nrows = in_matrix.ncols;
467   PetscInt       i,j,k;
468   PetscInt       r_nnz;
469   PetscInt       *irow;
470   PetscInt       icol0 = 0;
471   PetscScalar    * val;
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   /* Copy input values */
476   result->nrows        = nrows;
477   result->ncols        = ncols;
478   result->nnz          = nnz;
479   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
480   result->block_data   = PETSC_TRUE;
481 
482   /* Allocate sparseness pattern */
483   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
484 
485   /*  Count the number of nonzeros in each row */
486   for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
487 
488   for (i=0; i<ncols; i++) {
489     r_nnz = in_matrix.row_nnz[i];
490     irow  = in_matrix.icols[i];
491     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
492       for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
493     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
494       for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
495     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
496       icol0=in_matrix.icol0[i];
497       for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
498     }
499   }
500 
501   /* Set the pointers to the data */
502   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
503 
504   /* Reset the number of nonzeros in each row */
505   for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
506 
507   /* Fill the data arrays */
508   if (in_matrix.values) {
509     for (i=0; i<ncols; i++) {
510       r_nnz = in_matrix.row_nnz[i];
511       irow  = in_matrix.icols[i];
512       val   = in_matrix.values[i];
513 
514       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
515       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
516       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
517       for (j=0; j<r_nnz; j++)  {
518         k = icol0 + irow[j];
519         result->icols[k][result->row_nnz[k]]  = i;
520         result->values[k][result->row_nnz[k]] = val[j];
521         result->row_nnz[k]++;
522       }
523     }
524   }  else {
525     for (i=0; i<ncols; i++) {
526       r_nnz = in_matrix.row_nnz[i];
527       irow  = in_matrix.icols[i];
528 
529       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
530       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
531       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
532 
533       for (j=0; j<r_nnz; j++) {
534         k = icol0 + irow[j];
535         result->icols[k][result->row_nnz[k]]  = i;
536         result->row_nnz[k]++;
537       }
538     }
539   }
540   PetscFunctionReturn(0);
541 }
542 
543 /*
544    spbas_mergesort
545 
546       mergesort for an array of intergers and an array of associated
547       reals
548 
549       on output, icol[0..nnz-1] is increasing;
550                   val[0..nnz-1] has undergone the same permutation as icol
551 
552       NB: val may be PETSC_NULL: in that case, only the integers are sorted
553 
554 */
555 #undef __FUNCT__
556 #define __FUNCT__ "spbas_mergesort"
557 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
558 {
559   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
560   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
561   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
562   PetscInt       *ialloc;     /* Allocated arrays */
563   PetscScalar    *valloc=PETSC_NULL;
564   PetscInt       *iswap;      /* auxiliary pointers for swapping */
565   PetscScalar    *vswap;
566   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
567   PetscScalar    *vhlp1=PETSC_NULL;  /* (arrays under construction) */
568   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
569   PetscScalar    *vhlp2=PETSC_NULL;
570   PetscErrorCode ierr;
571 
572   ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
573   ihlp1 = ialloc;
574   ihlp2 = icol;
575 
576   if (val) {
577     ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
578     vhlp1 = valloc;
579     vhlp2 = val;
580   }
581 
582 
583   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
584   for (istep=1; istep<nnz; istep*=2) {
585     /*
586       Combine sorted parts
587           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
588       of ihlp2 and vhlp2
589 
590       into one sorted part
591           istart:istart+2*istep-1
592       of ihlp1 and vhlp1
593     */
594     for (istart=0; istart<nnz; istart+=2*istep) {
595       /* Set counters and bound array part endings */
596       i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
597       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
598 
599       /* Merge the two array parts */
600       if (val) {
601         for (i=istart; i<i2end; i++) {
602           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
603               ihlp1[i] = ihlp2[i1];
604               vhlp1[i] = vhlp2[i1];
605               i1++;
606           } else if (i2<i2end) {
607               ihlp1[i] = ihlp2[i2];
608               vhlp1[i] = vhlp2[i2];
609               i2++;
610           } else {
611               ihlp1[i] = ihlp2[i1];
612               vhlp1[i] = vhlp2[i1];
613               i1++;
614           }
615         }
616       } else {
617         for (i=istart; i<i2end; i++) {
618           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
619               ihlp1[i] = ihlp2[i1];
620               i1++;
621           } else if (i2<i2end) {
622               ihlp1[i] = ihlp2[i2];
623               i2++;
624           } else {
625               ihlp1[i] = ihlp2[i1];
626               i1++;
627           }
628         }
629       }
630     }
631 
632     /* Swap the two array sets */
633     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
634     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
635   }
636 
637   /* Copy one more time in case the sorted arrays are the temporary ones */
638   if (ihlp2 != icol) {
639     for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
640     if (val) {
641       for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
642     }
643   }
644 
645   ierr = PetscFree(ialloc);CHKERRQ(ierr);
646   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
647   PetscFunctionReturn(0);
648 }
649 
650 /*
651   spbas_apply_reordering_rows:
652     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
653 */
654 #undef __FUNCT__
655 #define __FUNCT__ "spbas_apply_reordering_rows"
656 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
657 {
658   PetscInt       i,j,ip;
659   PetscInt       nrows=matrix_A->nrows;
660   PetscInt       * row_nnz;
661   PetscInt       **icols;
662   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
663   PetscScalar    **vals=PETSC_NULL;
664   PetscErrorCode ierr;
665 
666   PetscFunctionBegin;
667   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
668 
669   if (do_values)  {
670     ierr = PetscMalloc(sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
671   }
672   ierr = PetscMalloc(sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
673   ierr = PetscMalloc(sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
674 
675   for (i=0; i<nrows;i++) {
676     ip = permutation[i];
677     if (do_values) {vals[i]    = matrix_A->values[ip];}
678     icols[i]   = matrix_A->icols[ip];
679     row_nnz[i] = matrix_A->row_nnz[ip];
680     for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
681   }
682 
683   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
684   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
685   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
686 
687   if (do_values) { matrix_A->values  = vals; }
688   matrix_A->icols   = icols;
689   matrix_A->row_nnz = row_nnz;
690   PetscFunctionReturn(0);
691 }
692 
693 
694 /*
695   spbas_apply_reordering_cols:
696     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
697 */
698 #undef __FUNCT__
699 #define __FUNCT__ "spbas_apply_reordering_cols"
700 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
701 {
702   PetscInt    i,j;
703   PetscInt    nrows=matrix_A->nrows;
704   PetscInt    row_nnz;
705   PetscInt    *icols;
706   PetscBool   do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
707   PetscScalar *vals=PETSC_NULL;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
712 
713   for (i=0; i<nrows;i++) {
714     icols   = matrix_A->icols[i];
715     row_nnz = matrix_A->row_nnz[i];
716     if (do_values) { vals = matrix_A->values[i]; }
717 
718     for (j=0; j<row_nnz; j++)  {
719         icols[j] = permutation[i+icols[j]]-i;
720     }
721     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727   spbas_apply_reordering:
728     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
729 */
730 #undef __FUNCT__
731 #define __FUNCT__ "spbas_apply_reordering"
732 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
733 {
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
738   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "spbas_pattern_only"
744 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
745 {
746   spbas_matrix   retval;
747   PetscInt       i, j, i0, r_nnz;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   /* Copy input values */
752   retval.nrows = nrows;
753   retval.ncols = ncols;
754   retval.nnz   = ai[nrows];
755 
756   retval.block_data   = PETSC_TRUE;
757   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
758 
759   /* Allocate output matrix */
760   ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
761   for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
762   ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
763   /* Copy the structure */
764   for (i = 0; i<retval.nrows; i++)  {
765     i0 = ai[i];
766     r_nnz = ai[i+1]-i0;
767 
768     for (j=0; j<r_nnz; j++) {
769       retval.icols[i][j]  = aj[i0+j]-i;
770     }
771   }
772   *result = retval;
773   PetscFunctionReturn(0);
774 }
775 
776 
777 /*
778    spbas_mark_row_power:
779       Mark the columns in row 'row' which are nonzero in
780           matrix^2log(marker).
781 */
782 #undef __FUNCT__
783 #define __FUNCT__ "spbas_mark_row_power"
784 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
785                                     PetscInt row,                /* row for which the columns are marked */
786                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
787                                     PetscInt marker,             /* marker-value: 2^power */
788                                     PetscInt minmrk,             /* lower bound for marked points */
789                                     PetscInt maxmrk)             /* upper bound for marked points */
790 {
791   PetscErrorCode ierr;
792   PetscInt       i,j, nnz;
793 
794   PetscFunctionBegin;
795   nnz = in_matrix->row_nnz[row];
796 
797   /* For higher powers, call this function recursively */
798   if (marker>1) {
799     for (i=0; i<nnz;i++) {
800       j = row + in_matrix->icols[row][i];
801       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
802         ierr =  spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
803         iwork[j] |= marker;
804       }
805     }
806   } else  {
807     /*  Mark the columns reached */
808     for (i=0; i<nnz;i++)  {
809       j = row + in_matrix->icols[row][i];
810       if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
811     }
812   }
813   PetscFunctionReturn(0);
814 }
815 
816 
817 /*
818    spbas_power
819       Calculate sparseness patterns for incomplete Cholesky decompositions
820       of a given order: (almost) all nonzeros of the matrix^(order+1) which
821       are inside the band width are found and stored in the output sparseness
822       pattern.
823 */
824 #undef __FUNCT__
825 #define __FUNCT__ "spbas_power"
826 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
827 {
828   spbas_matrix   retval;
829   PetscInt       nrows = in_matrix.nrows;
830   PetscInt       ncols = in_matrix.ncols;
831   PetscInt       i, j, kend;
832   PetscInt       nnz, inz;
833   PetscInt       *iwork;
834   PetscInt       marker;
835   PetscInt       maxmrk=0;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
840   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
841   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
842   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
843 
844   /* Copy input values*/
845   retval.nrows = ncols;
846   retval.ncols = nrows;
847   retval.nnz   = 0;
848   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
849   retval.block_data = PETSC_FALSE;
850 
851   /* Allocate sparseness pattern */
852   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
853 
854   /* Allocate marker array */
855   ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
856 
857   /* Erase the pattern for this row */
858   ierr = PetscMemzero((void *) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
859 
860   /* Calculate marker values */
861   marker = 1; for (i=1; i<power; i++) {marker*=2;}
862 
863   for (i=0; i<nrows; i++)  {
864     /* Calculate the pattern for each row */
865 
866     nnz    = in_matrix.row_nnz[i];
867     kend   = i+in_matrix.icols[i][nnz-1];
868     if (maxmrk<=kend) {maxmrk=kend+1;}
869     ierr =  spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
870 
871     /* Count the columns*/
872     nnz = 0;
873     for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
874 
875     /* Allocate the column indices */
876     retval.row_nnz[i] = nnz;
877     ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
878 
879     /* Administrate the column indices */
880     inz = 0;
881     for (j=i; j<maxmrk; j++) {
882       if (iwork[j])  {
883         retval.icols[i][inz] = j-i;
884         inz++;
885         iwork[j]=0;
886       }
887     }
888     retval.nnz += nnz;
889   };
890   ierr = PetscFree(iwork);CHKERRQ(ierr);
891   *result = retval;
892   PetscFunctionReturn(0);
893 }
894 
895 
896 
897 /*
898    spbas_keep_upper:
899       remove the lower part of the matrix: keep the upper part
900 */
901 #undef __FUNCT__
902 #define __FUNCT__ "spbas_keep_upper"
903 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
904 {
905   PetscInt i, j;
906   PetscInt jstart;
907 
908   PetscFunctionBegin;
909   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
910   for (i=0; i<inout_matrix->nrows; i++)  {
911     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
912     if (jstart>0) {
913       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
914         inout_matrix->icols[i][j] =  inout_matrix->icols[i][j+jstart];
915       }
916 
917       if (inout_matrix->values) {
918         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
919           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
920         }
921       }
922 
923       inout_matrix->row_nnz[i] -= jstart;
924 
925       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
926 
927       if (inout_matrix->values) {
928         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
929       }
930       inout_matrix->nnz -= jstart;
931     }
932   }
933   PetscFunctionReturn(0);
934 }
935 
936