xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix) {
31   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
32                   sizeof(PetscBool) +                /* block_data */
33                   sizeof(PetscScalar **) +           /* values */
34                   sizeof(PetscScalar *) +            /* alloc_val */
35                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
36                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
37                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
38                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */
39 
40   /* icol0[*] */
41   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
42 
43   /* icols[*][*] */
44   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
45   else memreq += matrix.nnz * sizeof(PetscInt);
46 
47   if (matrix.values) {
48     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
49     /* values[*][*] */
50     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
51     else memreq += matrix.nnz * sizeof(PetscScalar);
52   }
53   return memreq;
54 }
55 
56 /*
57   spbas_allocate_pattern:
58     allocate the pattern arrays row_nnz, icols and optionally values
59 */
60 PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) {
61   PetscInt nrows        = result->nrows;
62   PetscInt col_idx_type = result->col_idx_type;
63 
64   PetscFunctionBegin;
65   /* Allocate sparseness pattern */
66   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
67   PetscCall(PetscMalloc1(nrows, &result->icols));
68 
69   /* If offsets are given wrt an array, create array */
70   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
71     PetscCall(PetscMalloc1(nrows, &result->icol0));
72   } else {
73     result->icol0 = NULL;
74   }
75 
76   /* If values are given, allocate values array */
77   if (do_values) {
78     PetscCall(PetscMalloc1(nrows, &result->values));
79   } else {
80     result->values = NULL;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 /*
86 spbas_allocate_data:
87    in case of block_data:
88        Allocate the data arrays alloc_icol and optionally alloc_val,
89        set appropriate pointers from icols and values;
90    in case of !block_data:
91        Allocate the arrays icols[i] and optionally values[i]
92 */
93 PetscErrorCode spbas_allocate_data(spbas_matrix *result) {
94   PetscInt  i;
95   PetscInt  nnz   = result->nnz;
96   PetscInt  nrows = result->nrows;
97   PetscInt  r_nnz;
98   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
99   PetscBool block_data = result->block_data;
100 
101   PetscFunctionBegin;
102   if (block_data) {
103     /* Allocate the column number array and point to it */
104     result->n_alloc_icol = nnz;
105 
106     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
107 
108     result->icols[0] = result->alloc_icol;
109     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];
110 
111     /* Allocate the value array and point to it */
112     if (do_values) {
113       result->n_alloc_val = nnz;
114 
115       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
116 
117       result->values[0] = result->alloc_val;
118       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
119     }
120   } else {
121     for (i = 0; i < nrows; i++) {
122       r_nnz = result->row_nnz[i];
123       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
124     }
125     if (do_values) {
126       for (i = 0; i < nrows; i++) {
127         r_nnz = result->row_nnz[i];
128         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 /*
136   spbas_row_order_icol
137      determine if row i1 should come
138        + before row i2 in the sorted rows (return -1),
139        + after (return 1)
140        + is identical (return 0).
141 */
142 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) {
143   PetscInt  j;
144   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
145   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
146   PetscInt *icol1 = &icol_in[irow_in[i1]];
147   PetscInt *icol2 = &icol_in[irow_in[i2]];
148 
149   if (nnz1 < nnz2) return -1;
150   if (nnz1 > nnz2) return 1;
151 
152   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
153     for (j = 0; j < nnz1; j++) {
154       if (icol1[j] < icol2[j]) return -1;
155       if (icol1[j] > icol2[j]) return 1;
156     }
157   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
158     for (j = 0; j < nnz1; j++) {
159       if (icol1[j] - i1 < icol2[j] - i2) return -1;
160       if (icol1[j] - i1 > icol2[j] - i2) return 1;
161     }
162   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
163     for (j = 1; j < nnz1; j++) {
164       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
165       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
166     }
167   }
168   return 0;
169 }
170 
171 /*
172   spbas_mergesort_icols:
173     return a sorting of the rows in which identical sparseness patterns are
174     next to each other
175 */
176 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) {
177   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
178   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
179   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
180   PetscInt *ialloc;               /* Allocated arrays */
181   PetscInt *iswap;                /* auxiliary pointers for swapping */
182   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
183   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */
184 
185   PetscFunctionBegin;
186   PetscCall(PetscMalloc1(nrows, &ialloc));
187 
188   ihlp1 = ialloc;
189   ihlp2 = isort;
190 
191   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
192   for (istep = 1; istep < nrows; istep *= 2) {
193     /*
194       Combine sorted parts
195           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
196       of ihlp2 and vhlp2
197 
198       into one sorted part
199           istart:istart+2*istep-1
200       of ihlp1 and vhlp1
201     */
202     for (istart = 0; istart < nrows; istart += 2 * istep) {
203       /* Set counters and bound array part endings */
204       i1    = istart;
205       i1end = i1 + istep;
206       if (i1end > nrows) i1end = nrows;
207       i2    = istart + istep;
208       i2end = i2 + istep;
209       if (i2end > nrows) i2end = nrows;
210 
211       /* Merge the two array parts */
212       for (i = istart; i < i2end; i++) {
213         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
214           ihlp1[i] = ihlp2[i1];
215           i1++;
216         } else if (i2 < i2end) {
217           ihlp1[i] = ihlp2[i2];
218           i2++;
219         } else {
220           ihlp1[i] = ihlp2[i1];
221           i1++;
222         }
223       }
224     }
225 
226     /* Swap the two array sets */
227     iswap = ihlp2;
228     ihlp2 = ihlp1;
229     ihlp1 = iswap;
230   }
231 
232   /* Copy one more time in case the sorted arrays are the temporary ones */
233   if (ihlp2 != isort) {
234     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
235   }
236   PetscCall(PetscFree(ialloc));
237   PetscFunctionReturn(0);
238 }
239 
240 /*
241   spbas_compress_pattern:
242      calculate a compressed sparseness pattern for a sparseness pattern
243      given in compressed row storage. The compressed sparseness pattern may
244      require (much) less memory.
245 */
246 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) {
247   PetscInt        nnz      = irow_in[nrows];
248   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
249   size_t          mem_compressed;
250   PetscInt       *isort;
251   PetscInt       *icols;
252   PetscInt        row_nnz;
253   PetscInt       *ipoint;
254   PetscBool      *used;
255   PetscInt        ptr;
256   PetscInt        i, j;
257   const PetscBool no_values = PETSC_FALSE;
258 
259   PetscFunctionBegin;
260   /* Allocate the structure of the new matrix */
261   B->nrows        = nrows;
262   B->ncols        = ncols;
263   B->nnz          = nnz;
264   B->col_idx_type = col_idx_type;
265   B->block_data   = PETSC_TRUE;
266 
267   PetscCall(spbas_allocate_pattern(B, no_values));
268 
269   /* When using an offset array, set it */
270   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
271     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
272   }
273 
274   /* Allocate the ordering for the rows */
275   PetscCall(PetscMalloc1(nrows, &isort));
276   PetscCall(PetscMalloc1(nrows, &ipoint));
277   PetscCall(PetscCalloc1(nrows, &used));
278 
279   for (i = 0; i < nrows; i++) {
280     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
281     isort[i]      = i;
282     ipoint[i]     = i;
283   }
284 
285   /* Sort the rows so that identical columns will be next to each other */
286   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
287   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));
288 
289   /* Replace identical rows with the first one in the list */
290   for (i = 1; i < nrows; i++) {
291     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
292   }
293 
294   /* Collect the rows which are used*/
295   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;
296 
297   /* Calculate needed memory */
298   B->n_alloc_icol = 0;
299   for (i = 0; i < nrows; i++) {
300     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
301   }
302   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));
303 
304   /* Fill in the diagonal offsets for the rows which store their own data */
305   ptr = 0;
306   for (i = 0; i < B->nrows; i++) {
307     if (used[i]) {
308       B->icols[i] = &B->alloc_icol[ptr];
309       icols       = &icol_in[irow_in[i]];
310       row_nnz     = B->row_nnz[i];
311       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
312         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
313       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
314         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
315       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
316         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
317       }
318       ptr += B->row_nnz[i];
319     }
320   }
321 
322   /* Point to the right places for all data */
323   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
324   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
325   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));
326 
327   PetscCall(PetscFree(isort));
328   PetscCall(PetscFree(used));
329   PetscCall(PetscFree(ipoint));
330 
331   mem_compressed = spbas_memory_requirement(*B);
332   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
333   PetscFunctionReturn(0);
334 }
335 
336 /*
337    spbas_incomplete_cholesky
338        Incomplete Cholesky decomposition
339 */
340 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
341 
342 /*
343   spbas_delete : de-allocate the arrays owned by this matrix
344 */
345 PetscErrorCode spbas_delete(spbas_matrix matrix) {
346   PetscInt i;
347 
348   PetscFunctionBegin;
349   if (matrix.block_data) {
350     PetscCall(PetscFree(matrix.alloc_icol));
351     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
352   } else {
353     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
354     PetscCall(PetscFree(matrix.icols));
355     if (matrix.values) {
356       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
357     }
358   }
359 
360   PetscCall(PetscFree(matrix.row_nnz));
361   PetscCall(PetscFree(matrix.icols));
362   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
363   PetscCall(PetscFree(matrix.values));
364   PetscFunctionReturn(0);
365 }
366 
367 /*
368 spbas_matrix_to_crs:
369    Convert an spbas_matrix to compessed row storage
370 */
371 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) {
372   PetscInt     nrows = matrix_A.nrows;
373   PetscInt     nnz   = matrix_A.nnz;
374   PetscInt     i, j, r_nnz, i0;
375   PetscInt    *irow;
376   PetscInt    *icol;
377   PetscInt    *icol_A;
378   MatScalar   *val;
379   PetscScalar *val_A;
380   PetscInt     col_idx_type = matrix_A.col_idx_type;
381   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
382 
383   PetscFunctionBegin;
384   PetscCall(PetscMalloc1(nrows + 1, &irow));
385   PetscCall(PetscMalloc1(nnz, &icol));
386   *icol_out = icol;
387   *irow_out = irow;
388   if (do_values) {
389     PetscCall(PetscMalloc1(nnz, &val));
390     *val_out  = val;
391     *icol_out = icol;
392     *irow_out = irow;
393   }
394 
395   irow[0] = 0;
396   for (i = 0; i < nrows; i++) {
397     r_nnz       = matrix_A.row_nnz[i];
398     i0          = irow[i];
399     irow[i + 1] = i0 + r_nnz;
400     icol_A      = matrix_A.icols[i];
401 
402     if (do_values) {
403       val_A = matrix_A.values[i];
404       for (j = 0; j < r_nnz; j++) {
405         icol[i0 + j] = icol_A[j];
406         val[i0 + j]  = val_A[j];
407       }
408     } else {
409       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
410     }
411 
412     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
413       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
414     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
415       i0 = matrix_A.icol0[i];
416       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
417     }
418   }
419   PetscFunctionReturn(0);
420 }
421 
422 /*
423     spbas_transpose
424        return the transpose of a matrix
425 */
426 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) {
427   PetscInt     col_idx_type = in_matrix.col_idx_type;
428   PetscInt     nnz          = in_matrix.nnz;
429   PetscInt     ncols        = in_matrix.nrows;
430   PetscInt     nrows        = in_matrix.ncols;
431   PetscInt     i, j, k;
432   PetscInt     r_nnz;
433   PetscInt    *irow;
434   PetscInt     icol0 = 0;
435   PetscScalar *val;
436 
437   PetscFunctionBegin;
438   /* Copy input values */
439   result->nrows        = nrows;
440   result->ncols        = ncols;
441   result->nnz          = nnz;
442   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
443   result->block_data   = PETSC_TRUE;
444 
445   /* Allocate sparseness pattern */
446   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
447 
448   /*  Count the number of nonzeros in each row */
449   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
450 
451   for (i = 0; i < ncols; i++) {
452     r_nnz = in_matrix.row_nnz[i];
453     irow  = in_matrix.icols[i];
454     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
455       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
456     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
457       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
458     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
459       icol0 = in_matrix.icol0[i];
460       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
461     }
462   }
463 
464   /* Set the pointers to the data */
465   PetscCall(spbas_allocate_data(result));
466 
467   /* Reset the number of nonzeros in each row */
468   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
469 
470   /* Fill the data arrays */
471   if (in_matrix.values) {
472     for (i = 0; i < ncols; i++) {
473       r_nnz = in_matrix.row_nnz[i];
474       irow  = in_matrix.icols[i];
475       val   = in_matrix.values[i];
476 
477       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
478       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
479       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
480       for (j = 0; j < r_nnz; j++) {
481         k                                     = icol0 + irow[j];
482         result->icols[k][result->row_nnz[k]]  = i;
483         result->values[k][result->row_nnz[k]] = val[j];
484         result->row_nnz[k]++;
485       }
486     }
487   } else {
488     for (i = 0; i < ncols; i++) {
489       r_nnz = in_matrix.row_nnz[i];
490       irow  = in_matrix.icols[i];
491 
492       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
493       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
494       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
495 
496       for (j = 0; j < r_nnz; j++) {
497         k                                    = icol0 + irow[j];
498         result->icols[k][result->row_nnz[k]] = i;
499         result->row_nnz[k]++;
500       }
501     }
502   }
503   PetscFunctionReturn(0);
504 }
505 
506 /*
507    spbas_mergesort
508 
509       mergesort for an array of integers and an array of associated
510       reals
511 
512       on output, icol[0..nnz-1] is increasing;
513                   val[0..nnz-1] has undergone the same permutation as icol
514 
515       NB: val may be NULL: in that case, only the integers are sorted
516 
517 */
518 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) {
519   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
520   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
521   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
522   PetscInt    *ialloc;               /* Allocated arrays */
523   PetscScalar *valloc = NULL;
524   PetscInt    *iswap; /* auxiliary pointers for swapping */
525   PetscScalar *vswap;
526   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
527   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
528   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
529   PetscScalar *vhlp2 = NULL;
530 
531   PetscFunctionBegin;
532   PetscCall(PetscMalloc1(nnz, &ialloc));
533   ihlp1 = ialloc;
534   ihlp2 = icol;
535 
536   if (val) {
537     PetscCall(PetscMalloc1(nnz, &valloc));
538     vhlp1 = valloc;
539     vhlp2 = val;
540   }
541 
542   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
543   for (istep = 1; istep < nnz; istep *= 2) {
544     /*
545       Combine sorted parts
546           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
547       of ihlp2 and vhlp2
548 
549       into one sorted part
550           istart:istart+2*istep-1
551       of ihlp1 and vhlp1
552     */
553     for (istart = 0; istart < nnz; istart += 2 * istep) {
554       /* Set counters and bound array part endings */
555       i1    = istart;
556       i1end = i1 + istep;
557       if (i1end > nnz) i1end = nnz;
558       i2    = istart + istep;
559       i2end = i2 + istep;
560       if (i2end > nnz) i2end = nnz;
561 
562       /* Merge the two array parts */
563       if (val) {
564         for (i = istart; i < i2end; i++) {
565           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
566             ihlp1[i] = ihlp2[i1];
567             vhlp1[i] = vhlp2[i1];
568             i1++;
569           } else if (i2 < i2end) {
570             ihlp1[i] = ihlp2[i2];
571             vhlp1[i] = vhlp2[i2];
572             i2++;
573           } else {
574             ihlp1[i] = ihlp2[i1];
575             vhlp1[i] = vhlp2[i1];
576             i1++;
577           }
578         }
579       } else {
580         for (i = istart; i < i2end; i++) {
581           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
582             ihlp1[i] = ihlp2[i1];
583             i1++;
584           } else if (i2 < i2end) {
585             ihlp1[i] = ihlp2[i2];
586             i2++;
587           } else {
588             ihlp1[i] = ihlp2[i1];
589             i1++;
590           }
591         }
592       }
593     }
594 
595     /* Swap the two array sets */
596     iswap = ihlp2;
597     ihlp2 = ihlp1;
598     ihlp1 = iswap;
599     vswap = vhlp2;
600     vhlp2 = vhlp1;
601     vhlp1 = vswap;
602   }
603 
604   /* Copy one more time in case the sorted arrays are the temporary ones */
605   if (ihlp2 != icol) {
606     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
607     if (val) {
608       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
609     }
610   }
611 
612   PetscCall(PetscFree(ialloc));
613   if (val) PetscCall(PetscFree(valloc));
614   PetscFunctionReturn(0);
615 }
616 
617 /*
618   spbas_apply_reordering_rows:
619     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
620 */
621 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) {
622   PetscInt      i, j, ip;
623   PetscInt      nrows = matrix_A->nrows;
624   PetscInt     *row_nnz;
625   PetscInt    **icols;
626   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
627   PetscScalar **vals      = NULL;
628 
629   PetscFunctionBegin;
630   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
631 
632   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
633   PetscCall(PetscMalloc1(nrows, &row_nnz));
634   PetscCall(PetscMalloc1(nrows, &icols));
635 
636   for (i = 0; i < nrows; i++) {
637     ip = permutation[i];
638     if (do_values) vals[i] = matrix_A->values[ip];
639     icols[i]   = matrix_A->icols[ip];
640     row_nnz[i] = matrix_A->row_nnz[ip];
641     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
642   }
643 
644   if (do_values) PetscCall(PetscFree(matrix_A->values));
645   PetscCall(PetscFree(matrix_A->icols));
646   PetscCall(PetscFree(matrix_A->row_nnz));
647 
648   if (do_values) matrix_A->values = vals;
649   matrix_A->icols   = icols;
650   matrix_A->row_nnz = row_nnz;
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   spbas_apply_reordering_cols:
656     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
657 */
658 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) {
659   PetscInt     i, j;
660   PetscInt     nrows = matrix_A->nrows;
661   PetscInt     row_nnz;
662   PetscInt    *icols;
663   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
664   PetscScalar *vals      = NULL;
665 
666   PetscFunctionBegin;
667   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
668 
669   for (i = 0; i < nrows; i++) {
670     icols   = matrix_A->icols[i];
671     row_nnz = matrix_A->row_nnz[i];
672     if (do_values) vals = matrix_A->values[i];
673 
674     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
675     PetscCall(spbas_mergesort(row_nnz, icols, vals));
676   }
677   PetscFunctionReturn(0);
678 }
679 
680 /*
681   spbas_apply_reordering:
682     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
683 */
684 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) {
685   PetscFunctionBegin;
686   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
687   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) {
692   spbas_matrix retval;
693   PetscInt     i, j, i0, r_nnz;
694 
695   PetscFunctionBegin;
696   /* Copy input values */
697   retval.nrows = nrows;
698   retval.ncols = ncols;
699   retval.nnz   = ai[nrows];
700 
701   retval.block_data   = PETSC_TRUE;
702   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
703 
704   /* Allocate output matrix */
705   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
706   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
707   PetscCall(spbas_allocate_data(&retval));
708   /* Copy the structure */
709   for (i = 0; i < retval.nrows; i++) {
710     i0    = ai[i];
711     r_nnz = ai[i + 1] - i0;
712 
713     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
714   }
715   *result = retval;
716   PetscFunctionReturn(0);
717 }
718 
719 /*
720    spbas_mark_row_power:
721       Mark the columns in row 'row' which are nonzero in
722           matrix^2log(marker).
723 */
724 PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
725                                     PetscInt      row,       /* row for which the columns are marked */
726                                     spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
727                                     PetscInt      marker,    /* marker-value: 2^power */
728                                     PetscInt      minmrk,    /* lower bound for marked points */
729                                     PetscInt      maxmrk)         /* upper bound for marked points */
730 {
731   PetscInt i, j, nnz;
732 
733   PetscFunctionBegin;
734   nnz = in_matrix->row_nnz[row];
735 
736   /* For higher powers, call this function recursively */
737   if (marker > 1) {
738     for (i = 0; i < nnz; i++) {
739       j = row + in_matrix->icols[row][i];
740       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
741         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
742         iwork[j] |= marker;
743       }
744     }
745   } else {
746     /*  Mark the columns reached */
747     for (i = 0; i < nnz; i++) {
748       j = row + in_matrix->icols[row][i];
749       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
750     }
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 /*
756    spbas_power
757       Calculate sparseness patterns for incomplete Cholesky decompositions
758       of a given order: (almost) all nonzeros of the matrix^(order+1) which
759       are inside the band width are found and stored in the output sparseness
760       pattern.
761 */
762 PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) {
763   spbas_matrix retval;
764   PetscInt     nrows = in_matrix.nrows;
765   PetscInt     ncols = in_matrix.ncols;
766   PetscInt     i, j, kend;
767   PetscInt     nnz, inz;
768   PetscInt    *iwork;
769   PetscInt     marker;
770   PetscInt     maxmrk = 0;
771 
772   PetscFunctionBegin;
773   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
774   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
775   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
776   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
777 
778   /* Copy input values*/
779   retval.nrows        = ncols;
780   retval.ncols        = nrows;
781   retval.nnz          = 0;
782   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
783   retval.block_data   = PETSC_FALSE;
784 
785   /* Allocate sparseness pattern */
786   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
787 
788   /* Allocate marker array: note sure the max needed so use the max of the two */
789   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));
790 
791   /* Calculate marker values */
792   marker = 1;
793   for (i = 1; i < power; i++) marker *= 2;
794 
795   for (i = 0; i < nrows; i++) {
796     /* Calculate the pattern for each row */
797 
798     nnz  = in_matrix.row_nnz[i];
799     kend = i + in_matrix.icols[i][nnz - 1];
800     if (maxmrk <= kend) maxmrk = kend + 1;
801     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
802 
803     /* Count the columns*/
804     nnz = 0;
805     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);
806 
807     /* Allocate the column indices */
808     retval.row_nnz[i] = nnz;
809     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));
810 
811     /* Administrate the column indices */
812     inz = 0;
813     for (j = i; j < maxmrk; j++) {
814       if (iwork[j]) {
815         retval.icols[i][inz] = j - i;
816         inz++;
817         iwork[j] = 0;
818       }
819     }
820     retval.nnz += nnz;
821   };
822   PetscCall(PetscFree(iwork));
823   *result = retval;
824   PetscFunctionReturn(0);
825 }
826 
827 /*
828    spbas_keep_upper:
829       remove the lower part of the matrix: keep the upper part
830 */
831 PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) {
832   PetscInt i, j;
833   PetscInt jstart;
834 
835   PetscFunctionBegin;
836   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
837   for (i = 0; i < inout_matrix->nrows; i++) {
838     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
839     if (jstart > 0) {
840       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];
841 
842       if (inout_matrix->values) {
843         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
844       }
845 
846       inout_matrix->row_nnz[i] -= jstart;
847 
848       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));
849 
850       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
851       inout_matrix->nnz -= jstart;
852     }
853   }
854   PetscFunctionReturn(0);
855 }
856