xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 4fc747eaadbeca11629f314a99edccbc2ed7b3d3)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes: Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
18      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
19      we recommend not using this functionality.
20 
21 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage, PCFactorSetLevels(), PCFactorSetDropTolerance()
22 
23 M*/
24 
25 /*
26   spbas_memory_requirement:
27     Calculate the number of bytes needed to store tha matrix
28 */
29 #undef __FUNCT__
30 #define __FUNCT__ "spbas_memory_requirement"
31 size_t spbas_memory_requirement(spbas_matrix matrix)
32 {
33   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
34                     sizeof(PetscBool)               + /* block_data */
35                     sizeof(PetscScalar**)           + /* values */
36                     sizeof(PetscScalar*)            + /* alloc_val */
37                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
38                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
39                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
40                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
41 
42   /* icol0[*] */
43   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
44 
45   /* icols[*][*] */
46   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
47   else memreq += matrix.nnz * sizeof(PetscInt);
48 
49   if (matrix.values) {
50     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
51     /* values[*][*] */
52     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
53     else memreq += matrix.nnz * sizeof(PetscScalar);
54   }
55   return memreq;
56 }
57 
58 /*
59   spbas_allocate_pattern:
60     allocate the pattern arrays row_nnz, icols and optionally values
61 */
62 #undef __FUNCT__
63 #define __FUNCT__ "spbas_allocate_pattern"
64 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
65 {
66   PetscErrorCode ierr;
67   PetscInt       nrows        = result->nrows;
68   PetscInt       col_idx_type = result->col_idx_type;
69 
70   PetscFunctionBegin;
71   /* Allocate sparseness pattern */
72   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
73   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
74 
75   /* If offsets are given wrt an array, create array */
76   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
77     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
78   } else  {
79     result->icol0 = NULL;
80   }
81 
82   /* If values are given, allocate values array */
83   if (do_values)  {
84     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
85   } else {
86     result->values = NULL;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 /*
92 spbas_allocate_data:
93    in case of block_data:
94        Allocate the data arrays alloc_icol and optionally alloc_val,
95        set appropriate pointers from icols and values;
96    in case of !block_data:
97        Allocate the arrays icols[i] and optionally values[i]
98 */
99 #undef __FUNCT__
100 #define __FUNCT__ "spbas_allocate_data"
101 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
102 {
103   PetscInt       i;
104   PetscInt       nnz   = result->nnz;
105   PetscInt       nrows = result->nrows;
106   PetscInt       r_nnz;
107   PetscErrorCode ierr;
108   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
109   PetscBool      block_data = result->block_data;
110 
111   PetscFunctionBegin;
112   if (block_data) {
113     /* Allocate the column number array and point to it */
114     result->n_alloc_icol = nnz;
115 
116     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
117 
118     result->icols[0] = result->alloc_icol;
119     for (i=1; i<nrows; i++)  {
120       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
121     }
122 
123     /* Allocate the value array and point to it */
124     if (do_values) {
125       result->n_alloc_val = nnz;
126 
127       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
128 
129       result->values[0] = result->alloc_val;
130       for (i=1; i<nrows; i++) {
131         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
132       }
133     }
134   } else {
135     for (i=0; i<nrows; i++)   {
136       r_nnz = result->row_nnz[i];
137       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
138     }
139     if (do_values) {
140       for (i=0; i<nrows; i++)  {
141         r_nnz = result->row_nnz[i];
142         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
143       }
144     }
145   }
146   PetscFunctionReturn(0);
147 }
148 
149 /*
150   spbas_row_order_icol
151      determine if row i1 should come
152        + before row i2 in the sorted rows (return -1),
153        + after (return 1)
154        + is identical (return 0).
155 */
156 #undef __FUNCT__
157 #define __FUNCT__ "spbas_row_order_icol"
158 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
159 {
160   PetscInt j;
161   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
162   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
163   PetscInt * icol1 = &icol_in[irow_in[i1]];
164   PetscInt * icol2 = &icol_in[irow_in[i2]];
165 
166   if (nnz1<nnz2) return -1;
167   if (nnz1>nnz2) return 1;
168 
169   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
170     for (j=0; j<nnz1; j++) {
171       if (icol1[j]< icol2[j]) return -1;
172       if (icol1[j]> icol2[j]) return 1;
173     }
174   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
175     for (j=0; j<nnz1; j++) {
176       if (icol1[j]-i1< icol2[j]-i2) return -1;
177       if (icol1[j]-i1> icol2[j]-i2) return 1;
178     }
179   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
180     for (j=1; j<nnz1; j++) {
181       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
182       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
183     }
184   }
185   return 0;
186 }
187 
188 /*
189   spbas_mergesort_icols:
190     return a sorting of the rows in which identical sparseness patterns are
191     next to each other
192 */
193 #undef __FUNCT__
194 #define __FUNCT__ "spbas_mergesort_icols"
195 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
196 {
197   PetscErrorCode ierr;
198   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
199   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
200   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
201   PetscInt       *ialloc;     /* Allocated arrays */
202   PetscInt       *iswap;      /* auxiliary pointers for swapping */
203   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
204   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
205 
206   PetscFunctionBegin;
207   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
208 
209   ihlp1 = ialloc;
210   ihlp2 = isort;
211 
212   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
213   for (istep=1; istep<nrows; istep*=2) {
214     /*
215       Combine sorted parts
216           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
217       of ihlp2 and vhlp2
218 
219       into one sorted part
220           istart:istart+2*istep-1
221       of ihlp1 and vhlp1
222     */
223     for (istart=0; istart<nrows; istart+=2*istep) {
224       /* Set counters and bound array part endings */
225       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
226       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
227 
228       /* Merge the two array parts */
229       for (i=istart; i<i2end; i++)  {
230         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
231           ihlp1[i] = ihlp2[i1];
232           i1++;
233         } else if (i2<i2end) {
234           ihlp1[i] = ihlp2[i2];
235           i2++;
236         } else {
237           ihlp1[i] = ihlp2[i1];
238           i1++;
239         }
240       }
241     }
242 
243     /* Swap the two array sets */
244     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
245   }
246 
247   /* Copy one more time in case the sorted arrays are the temporary ones */
248   if (ihlp2 != isort) {
249     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
250   }
251   ierr = PetscFree(ialloc);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 
256 
257 /*
258   spbas_compress_pattern:
259      calculate a compressed sparseness pattern for a sparseness pattern
260      given in compressed row storage. The compressed sparseness pattern may
261      require (much) less memory.
262 */
263 #undef __FUNCT__
264 #define __FUNCT__ "spbas_compress_pattern"
265 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
266 {
267   PetscInt        nnz      = irow_in[nrows];
268   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
269   size_t          mem_compressed;
270   PetscErrorCode  ierr;
271   PetscInt        *isort;
272   PetscInt        *icols;
273   PetscInt        row_nnz;
274   PetscInt        *ipoint;
275   PetscBool       *used;
276   PetscInt        ptr;
277   PetscInt        i,j;
278   const PetscBool no_values = PETSC_FALSE;
279 
280   PetscFunctionBegin;
281   /* Allocate the structure of the new matrix */
282   B->nrows        = nrows;
283   B->ncols        = ncols;
284   B->nnz          = nnz;
285   B->col_idx_type = col_idx_type;
286   B->block_data   = PETSC_TRUE;
287 
288   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
289 
290   /* When using an offset array, set it */
291   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
292     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
293   }
294 
295   /* Allocate the ordering for the rows */
296   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
297   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
298   ierr = PetscMalloc1(nrows,&used);CHKERRQ(ierr);
299 
300   /*  Initialize the sorting */
301   ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
302   for (i = 0; i<nrows; i++)  {
303     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
304     isort[i]      = i;
305     ipoint[i]     = i;
306   }
307 
308   /* Sort the rows so that identical columns will be next to each other */
309   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
310   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
311 
312   /* Replace identical rows with the first one in the list */
313   for (i=1; i<nrows; i++) {
314     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
315       ipoint[isort[i]] = ipoint[isort[i-1]];
316     }
317   }
318 
319   /* Collect the rows which are used*/
320   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
321 
322   /* Calculate needed memory */
323   B->n_alloc_icol = 0;
324   for (i=0; i<nrows; i++)  {
325     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
326   }
327   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
328 
329   /* Fill in the diagonal offsets for the rows which store their own data */
330   ptr = 0;
331   for (i=0; i<B->nrows; i++) {
332     if (used[i]) {
333       B->icols[i] = &B->alloc_icol[ptr];
334       icols = &icol_in[irow_in[i]];
335       row_nnz = B->row_nnz[i];
336       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
337         for (j=0; j<row_nnz; j++) {
338           B->icols[i][j] = icols[j];
339         }
340       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
341         for (j=0; j<row_nnz; j++) {
342           B->icols[i][j] = icols[j]-i;
343         }
344       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
345         for (j=0; j<row_nnz; j++) {
346           B->icols[i][j] = icols[j]-icols[0];
347         }
348       }
349       ptr += B->row_nnz[i];
350     }
351   }
352 
353   /* Point to the right places for all data */
354   for (i=0; i<nrows; i++) {
355     B->icols[i] = B->icols[ipoint[i]];
356   }
357   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
358   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
359 
360   ierr=PetscFree(isort);CHKERRQ(ierr);
361   ierr=PetscFree(used);CHKERRQ(ierr);
362   ierr=PetscFree(ipoint);CHKERRQ(ierr);
363 
364   mem_compressed = spbas_memory_requirement(*B);
365   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
366   PetscFunctionReturn(0);
367 }
368 
369 /*
370    spbas_incomplete_cholesky
371        Incomplete Cholesky decomposition
372 */
373 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
374 
375 /*
376   spbas_delete : de-allocate the arrays owned by this matrix
377 */
378 #undef __FUNCT__
379 #define __FUNCT__ "spbas_delete"
380 PetscErrorCode spbas_delete(spbas_matrix matrix)
381 {
382   PetscInt       i;
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   if (matrix.block_data) {
387     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
388     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
389   } else {
390     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
391     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
392     if (matrix.values) {
393       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
394     }
395   }
396 
397   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
398   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
399   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
400   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 /*
405 spbas_matrix_to_crs:
406    Convert an spbas_matrix to compessed row storage
407 */
408 #undef __FUNCT__
409 #define __FUNCT__ "spbas_matrix_to_crs"
410 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
411 {
412   PetscInt       nrows = matrix_A.nrows;
413   PetscInt       nnz   = matrix_A.nnz;
414   PetscInt       i,j,r_nnz,i0;
415   PetscInt       *irow;
416   PetscInt       *icol;
417   PetscInt       *icol_A;
418   MatScalar      *val;
419   PetscScalar    *val_A;
420   PetscInt       col_idx_type = matrix_A.col_idx_type;
421   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
422   PetscErrorCode ierr;
423 
424   PetscFunctionBegin;
425   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
426   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
427   *icol_out = icol;
428   *irow_out = irow;
429   if (do_values) {
430     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
431     *val_out = val; *icol_out = icol; *irow_out=irow;
432   }
433 
434   irow[0]=0;
435   for (i=0; i<nrows; i++) {
436     r_nnz     = matrix_A.row_nnz[i];
437     i0        = irow[i];
438     irow[i+1] = i0 + r_nnz;
439     icol_A    = matrix_A.icols[i];
440 
441     if (do_values) {
442       val_A = matrix_A.values[i];
443       for (j=0; j<r_nnz; j++) {
444         icol[i0+j] = icol_A[j];
445         val[i0+j]  = val_A[j];
446       }
447     } else {
448       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
449     }
450 
451     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
452       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
453     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
454       i0 = matrix_A.icol0[i];
455       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
456     }
457   }
458   PetscFunctionReturn(0);
459 }
460 
461 
462 /*
463     spbas_transpose
464        return the transpose of a matrix
465 */
466 #undef __FUNCT__
467 #define __FUNCT__ "spbas_transpose"
468 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
469 {
470   PetscInt       col_idx_type = in_matrix.col_idx_type;
471   PetscInt       nnz          = in_matrix.nnz;
472   PetscInt       ncols        = in_matrix.nrows;
473   PetscInt       nrows        = in_matrix.ncols;
474   PetscInt       i,j,k;
475   PetscInt       r_nnz;
476   PetscInt       *irow;
477   PetscInt       icol0 = 0;
478   PetscScalar    * val;
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   /* Copy input values */
483   result->nrows        = nrows;
484   result->ncols        = ncols;
485   result->nnz          = nnz;
486   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
487   result->block_data   = PETSC_TRUE;
488 
489   /* Allocate sparseness pattern */
490   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
491 
492   /*  Count the number of nonzeros in each row */
493   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
494 
495   for (i=0; i<ncols; i++) {
496     r_nnz = in_matrix.row_nnz[i];
497     irow  = in_matrix.icols[i];
498     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
499       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
500     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
501       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
502     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
503       icol0=in_matrix.icol0[i];
504       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
505     }
506   }
507 
508   /* Set the pointers to the data */
509   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
510 
511   /* Reset the number of nonzeros in each row */
512   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
513 
514   /* Fill the data arrays */
515   if (in_matrix.values) {
516     for (i=0; i<ncols; i++) {
517       r_nnz = in_matrix.row_nnz[i];
518       irow  = in_matrix.icols[i];
519       val   = in_matrix.values[i];
520 
521       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
522       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
523       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
524       for (j=0; j<r_nnz; j++)  {
525         k = icol0 + irow[j];
526         result->icols[k][result->row_nnz[k]]  = i;
527         result->values[k][result->row_nnz[k]] = val[j];
528         result->row_nnz[k]++;
529       }
530     }
531   } else {
532     for (i=0; i<ncols; i++) {
533       r_nnz = in_matrix.row_nnz[i];
534       irow  = in_matrix.icols[i];
535 
536       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
537       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
538       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
539 
540       for (j=0; j<r_nnz; j++) {
541         k = icol0 + irow[j];
542         result->icols[k][result->row_nnz[k]] = i;
543         result->row_nnz[k]++;
544       }
545     }
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 /*
551    spbas_mergesort
552 
553       mergesort for an array of intergers and an array of associated
554       reals
555 
556       on output, icol[0..nnz-1] is increasing;
557                   val[0..nnz-1] has undergone the same permutation as icol
558 
559       NB: val may be NULL: in that case, only the integers are sorted
560 
561 */
562 #undef __FUNCT__
563 #define __FUNCT__ "spbas_mergesort"
564 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
565 {
566   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
567   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
568   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
569   PetscInt       *ialloc;     /* Allocated arrays */
570   PetscScalar    *valloc=NULL;
571   PetscInt       *iswap;      /* auxiliary pointers for swapping */
572   PetscScalar    *vswap;
573   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
574   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
575   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
576   PetscScalar    *vhlp2=NULL;
577   PetscErrorCode ierr;
578 
579   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
580   ihlp1 = ialloc;
581   ihlp2 = icol;
582 
583   if (val) {
584     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
585     vhlp1 = valloc;
586     vhlp2 = val;
587   }
588 
589 
590   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
591   for (istep=1; istep<nnz; istep*=2) {
592     /*
593       Combine sorted parts
594           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
595       of ihlp2 and vhlp2
596 
597       into one sorted part
598           istart:istart+2*istep-1
599       of ihlp1 and vhlp1
600     */
601     for (istart=0; istart<nnz; istart+=2*istep) {
602       /* Set counters and bound array part endings */
603       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
604       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
605 
606       /* Merge the two array parts */
607       if (val) {
608         for (i=istart; i<i2end; i++) {
609           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
610             ihlp1[i] = ihlp2[i1];
611             vhlp1[i] = vhlp2[i1];
612             i1++;
613           } else if (i2<i2end) {
614             ihlp1[i] = ihlp2[i2];
615             vhlp1[i] = vhlp2[i2];
616             i2++;
617           } else {
618             ihlp1[i] = ihlp2[i1];
619             vhlp1[i] = vhlp2[i1];
620             i1++;
621           }
622         }
623       } else {
624         for (i=istart; i<i2end; i++) {
625           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
626             ihlp1[i] = ihlp2[i1];
627             i1++;
628           } else if (i2<i2end) {
629             ihlp1[i] = ihlp2[i2];
630             i2++;
631           } else {
632             ihlp1[i] = ihlp2[i1];
633             i1++;
634           }
635         }
636       }
637     }
638 
639     /* Swap the two array sets */
640     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
641     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
642   }
643 
644   /* Copy one more time in case the sorted arrays are the temporary ones */
645   if (ihlp2 != icol) {
646     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
647     if (val) {
648       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
649     }
650   }
651 
652   ierr = PetscFree(ialloc);CHKERRQ(ierr);
653   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
654   PetscFunctionReturn(0);
655 }
656 
657 /*
658   spbas_apply_reordering_rows:
659     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
660 */
661 #undef __FUNCT__
662 #define __FUNCT__ "spbas_apply_reordering_rows"
663 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
664 {
665   PetscInt       i,j,ip;
666   PetscInt       nrows=matrix_A->nrows;
667   PetscInt       * row_nnz;
668   PetscInt       **icols;
669   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
670   PetscScalar    **vals    = NULL;
671   PetscErrorCode ierr;
672 
673   PetscFunctionBegin;
674   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
675 
676   if (do_values) {
677     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
678   }
679   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
680   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
681 
682   for (i=0; i<nrows; i++) {
683     ip = permutation[i];
684     if (do_values) vals[i] = matrix_A->values[ip];
685     icols[i]   = matrix_A->icols[ip];
686     row_nnz[i] = matrix_A->row_nnz[ip];
687     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
688   }
689 
690   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
691   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
692   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
693 
694   if (do_values) matrix_A->values = vals;
695   matrix_A->icols   = icols;
696   matrix_A->row_nnz = row_nnz;
697   PetscFunctionReturn(0);
698 }
699 
700 
701 /*
702   spbas_apply_reordering_cols:
703     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
704 */
705 #undef __FUNCT__
706 #define __FUNCT__ "spbas_apply_reordering_cols"
707 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
708 {
709   PetscInt       i,j;
710   PetscInt       nrows=matrix_A->nrows;
711   PetscInt       row_nnz;
712   PetscInt       *icols;
713   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
714   PetscScalar    *vals     = NULL;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
719 
720   for (i=0; i<nrows; i++) {
721     icols   = matrix_A->icols[i];
722     row_nnz = matrix_A->row_nnz[i];
723     if (do_values) vals = matrix_A->values[i];
724 
725     for (j=0; j<row_nnz; j++) {
726       icols[j] = permutation[i+icols[j]]-i;
727     }
728     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
729   }
730   PetscFunctionReturn(0);
731 }
732 
733 /*
734   spbas_apply_reordering:
735     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
736 */
737 #undef __FUNCT__
738 #define __FUNCT__ "spbas_apply_reordering"
739 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
740 {
741   PetscErrorCode ierr;
742 
743   PetscFunctionBegin;
744   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
745   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNCT__
750 #define __FUNCT__ "spbas_pattern_only"
751 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
752 {
753   spbas_matrix   retval;
754   PetscInt       i, j, i0, r_nnz;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   /* Copy input values */
759   retval.nrows = nrows;
760   retval.ncols = ncols;
761   retval.nnz   = ai[nrows];
762 
763   retval.block_data   = PETSC_TRUE;
764   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
765 
766   /* Allocate output matrix */
767   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
768   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
769   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
770   /* Copy the structure */
771   for (i = 0; i<retval.nrows; i++)  {
772     i0    = ai[i];
773     r_nnz = ai[i+1]-i0;
774 
775     for (j=0; j<r_nnz; j++) {
776       retval.icols[i][j] = aj[i0+j]-i;
777     }
778   }
779   *result = retval;
780   PetscFunctionReturn(0);
781 }
782 
783 
784 /*
785    spbas_mark_row_power:
786       Mark the columns in row 'row' which are nonzero in
787           matrix^2log(marker).
788 */
789 #undef __FUNCT__
790 #define __FUNCT__ "spbas_mark_row_power"
791 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
792                                     PetscInt row,                /* row for which the columns are marked */
793                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
794                                     PetscInt marker,             /* marker-value: 2^power */
795                                     PetscInt minmrk,             /* lower bound for marked points */
796                                     PetscInt maxmrk)             /* upper bound for marked points */
797 {
798   PetscErrorCode ierr;
799   PetscInt       i,j, nnz;
800 
801   PetscFunctionBegin;
802   nnz = in_matrix->row_nnz[row];
803 
804   /* For higher powers, call this function recursively */
805   if (marker>1) {
806     for (i=0; i<nnz; i++) {
807       j = row + in_matrix->icols[row][i];
808       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
809         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
810         iwork[j] |= marker;
811       }
812     }
813   } else {
814     /*  Mark the columns reached */
815     for (i=0; i<nnz; i++)  {
816       j = row + in_matrix->icols[row][i];
817       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
818     }
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 
824 /*
825    spbas_power
826       Calculate sparseness patterns for incomplete Cholesky decompositions
827       of a given order: (almost) all nonzeros of the matrix^(order+1) which
828       are inside the band width are found and stored in the output sparseness
829       pattern.
830 */
831 #undef __FUNCT__
832 #define __FUNCT__ "spbas_power"
833 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
834 {
835   spbas_matrix   retval;
836   PetscInt       nrows = in_matrix.nrows;
837   PetscInt       ncols = in_matrix.ncols;
838   PetscInt       i, j, kend;
839   PetscInt       nnz, inz;
840   PetscInt       *iwork;
841   PetscInt       marker;
842   PetscInt       maxmrk=0;
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
847   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
848   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
849   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
850 
851   /* Copy input values*/
852   retval.nrows        = ncols;
853   retval.ncols        = nrows;
854   retval.nnz          = 0;
855   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
856   retval.block_data   = PETSC_FALSE;
857 
858   /* Allocate sparseness pattern */
859   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
860 
861   /* Allocate marker array */
862   ierr = PetscMalloc1(nrows, &iwork);CHKERRQ(ierr);
863 
864   /* Erase the pattern for this row */
865   ierr = PetscMemzero((void*) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
866 
867   /* Calculate marker values */
868   marker = 1; for (i=1; i<power; i++) marker*=2;
869 
870   for (i=0; i<nrows; i++)  {
871     /* Calculate the pattern for each row */
872 
873     nnz  = in_matrix.row_nnz[i];
874     kend = i+in_matrix.icols[i][nnz-1];
875     if (maxmrk<=kend) maxmrk=kend+1;
876     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
877 
878     /* Count the columns*/
879     nnz = 0;
880     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
881 
882     /* Allocate the column indices */
883     retval.row_nnz[i] = nnz;
884     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
885 
886     /* Administrate the column indices */
887     inz = 0;
888     for (j=i; j<maxmrk; j++) {
889       if (iwork[j]) {
890         retval.icols[i][inz] = j-i;
891         inz++;
892         iwork[j]=0;
893       }
894     }
895     retval.nnz += nnz;
896   };
897   ierr    = PetscFree(iwork);CHKERRQ(ierr);
898   *result = retval;
899   PetscFunctionReturn(0);
900 }
901 
902 
903 
904 /*
905    spbas_keep_upper:
906       remove the lower part of the matrix: keep the upper part
907 */
908 #undef __FUNCT__
909 #define __FUNCT__ "spbas_keep_upper"
910 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
911 {
912   PetscInt i, j;
913   PetscInt jstart;
914 
915   PetscFunctionBegin;
916   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
917   for (i=0; i<inout_matrix->nrows; i++)  {
918     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
919     if (jstart>0) {
920       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
921         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
922       }
923 
924       if (inout_matrix->values) {
925         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
926           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
927         }
928       }
929 
930       inout_matrix->row_nnz[i] -= jstart;
931 
932       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
933 
934       if (inout_matrix->values) {
935         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
936       }
937       inout_matrix->nnz -= jstart;
938     }
939   }
940   PetscFunctionReturn(0);
941 }
942 
943