xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with `MATAIJ`  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Note:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23 M*/
24 
25 /*
26   spbas_memory_requirement:
27     Calculate the number of bytes needed to store tha matrix
28 */
29 size_t spbas_memory_requirement(spbas_matrix matrix) {
30   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
31                   sizeof(PetscBool) +                /* block_data */
32                   sizeof(PetscScalar **) +           /* values */
33                   sizeof(PetscScalar *) +            /* alloc_val */
34                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
35                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
36                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
37                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */
38 
39   /* icol0[*] */
40   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
41 
42   /* icols[*][*] */
43   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
44   else memreq += matrix.nnz * sizeof(PetscInt);
45 
46   if (matrix.values) {
47     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
48     /* values[*][*] */
49     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
50     else memreq += matrix.nnz * sizeof(PetscScalar);
51   }
52   return memreq;
53 }
54 
55 /*
56   spbas_allocate_pattern:
57     allocate the pattern arrays row_nnz, icols and optionally values
58 */
59 PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values) {
60   PetscInt nrows        = result->nrows;
61   PetscInt col_idx_type = result->col_idx_type;
62 
63   PetscFunctionBegin;
64   /* Allocate sparseness pattern */
65   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
66   PetscCall(PetscMalloc1(nrows, &result->icols));
67 
68   /* If offsets are given wrt an array, create array */
69   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
70     PetscCall(PetscMalloc1(nrows, &result->icol0));
71   } else {
72     result->icol0 = NULL;
73   }
74 
75   /* If values are given, allocate values array */
76   if (do_values) {
77     PetscCall(PetscMalloc1(nrows, &result->values));
78   } else {
79     result->values = NULL;
80   }
81   PetscFunctionReturn(0);
82 }
83 
84 /*
85 spbas_allocate_data:
86    in case of block_data:
87        Allocate the data arrays alloc_icol and optionally alloc_val,
88        set appropriate pointers from icols and values;
89    in case of !block_data:
90        Allocate the arrays icols[i] and optionally values[i]
91 */
92 PetscErrorCode spbas_allocate_data(spbas_matrix *result) {
93   PetscInt  i;
94   PetscInt  nnz   = result->nnz;
95   PetscInt  nrows = result->nrows;
96   PetscInt  r_nnz;
97   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
98   PetscBool block_data = result->block_data;
99 
100   PetscFunctionBegin;
101   if (block_data) {
102     /* Allocate the column number array and point to it */
103     result->n_alloc_icol = nnz;
104 
105     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
106 
107     result->icols[0] = result->alloc_icol;
108     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];
109 
110     /* Allocate the value array and point to it */
111     if (do_values) {
112       result->n_alloc_val = nnz;
113 
114       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
115 
116       result->values[0] = result->alloc_val;
117       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
118     }
119   } else {
120     for (i = 0; i < nrows; i++) {
121       r_nnz = result->row_nnz[i];
122       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
123     }
124     if (do_values) {
125       for (i = 0; i < nrows; i++) {
126         r_nnz = result->row_nnz[i];
127         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
128       }
129     }
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 /*
135   spbas_row_order_icol
136      determine if row i1 should come
137        + before row i2 in the sorted rows (return -1),
138        + after (return 1)
139        + is identical (return 0).
140 */
141 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type) {
142   PetscInt  j;
143   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
144   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
145   PetscInt *icol1 = &icol_in[irow_in[i1]];
146   PetscInt *icol2 = &icol_in[irow_in[i2]];
147 
148   if (nnz1 < nnz2) return -1;
149   if (nnz1 > nnz2) return 1;
150 
151   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
152     for (j = 0; j < nnz1; j++) {
153       if (icol1[j] < icol2[j]) return -1;
154       if (icol1[j] > icol2[j]) return 1;
155     }
156   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
157     for (j = 0; j < nnz1; j++) {
158       if (icol1[j] - i1 < icol2[j] - i2) return -1;
159       if (icol1[j] - i1 > icol2[j] - i2) return 1;
160     }
161   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
162     for (j = 1; j < nnz1; j++) {
163       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
164       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
165     }
166   }
167   return 0;
168 }
169 
170 /*
171   spbas_mergesort_icols:
172     return a sorting of the rows in which identical sparseness patterns are
173     next to each other
174 */
175 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort) {
176   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
177   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
178   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
179   PetscInt *ialloc;               /* Allocated arrays */
180   PetscInt *iswap;                /* auxiliary pointers for swapping */
181   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
182   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */
183 
184   PetscFunctionBegin;
185   PetscCall(PetscMalloc1(nrows, &ialloc));
186 
187   ihlp1 = ialloc;
188   ihlp2 = isort;
189 
190   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
191   for (istep = 1; istep < nrows; istep *= 2) {
192     /*
193       Combine sorted parts
194           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
195       of ihlp2 and vhlp2
196 
197       into one sorted part
198           istart:istart+2*istep-1
199       of ihlp1 and vhlp1
200     */
201     for (istart = 0; istart < nrows; istart += 2 * istep) {
202       /* Set counters and bound array part endings */
203       i1    = istart;
204       i1end = i1 + istep;
205       if (i1end > nrows) i1end = nrows;
206       i2    = istart + istep;
207       i2end = i2 + istep;
208       if (i2end > nrows) i2end = nrows;
209 
210       /* Merge the two array parts */
211       for (i = istart; i < i2end; i++) {
212         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
213           ihlp1[i] = ihlp2[i1];
214           i1++;
215         } else if (i2 < i2end) {
216           ihlp1[i] = ihlp2[i2];
217           i2++;
218         } else {
219           ihlp1[i] = ihlp2[i1];
220           i1++;
221         }
222       }
223     }
224 
225     /* Swap the two array sets */
226     iswap = ihlp2;
227     ihlp2 = ihlp1;
228     ihlp1 = iswap;
229   }
230 
231   /* Copy one more time in case the sorted arrays are the temporary ones */
232   if (ihlp2 != isort) {
233     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
234   }
235   PetscCall(PetscFree(ialloc));
236   PetscFunctionReturn(0);
237 }
238 
239 /*
240   spbas_compress_pattern:
241      calculate a compressed sparseness pattern for a sparseness pattern
242      given in compressed row storage. The compressed sparseness pattern may
243      require (much) less memory.
244 */
245 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction) {
246   PetscInt        nnz      = irow_in[nrows];
247   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
248   size_t          mem_compressed;
249   PetscInt       *isort;
250   PetscInt       *icols;
251   PetscInt        row_nnz;
252   PetscInt       *ipoint;
253   PetscBool      *used;
254   PetscInt        ptr;
255   PetscInt        i, j;
256   const PetscBool no_values = PETSC_FALSE;
257 
258   PetscFunctionBegin;
259   /* Allocate the structure of the new matrix */
260   B->nrows        = nrows;
261   B->ncols        = ncols;
262   B->nnz          = nnz;
263   B->col_idx_type = col_idx_type;
264   B->block_data   = PETSC_TRUE;
265 
266   PetscCall(spbas_allocate_pattern(B, no_values));
267 
268   /* When using an offset array, set it */
269   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
270     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
271   }
272 
273   /* Allocate the ordering for the rows */
274   PetscCall(PetscMalloc1(nrows, &isort));
275   PetscCall(PetscMalloc1(nrows, &ipoint));
276   PetscCall(PetscCalloc1(nrows, &used));
277 
278   for (i = 0; i < nrows; i++) {
279     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
280     isort[i]      = i;
281     ipoint[i]     = i;
282   }
283 
284   /* Sort the rows so that identical columns will be next to each other */
285   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
286   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));
287 
288   /* Replace identical rows with the first one in the list */
289   for (i = 1; i < nrows; i++) {
290     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
291   }
292 
293   /* Collect the rows which are used*/
294   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;
295 
296   /* Calculate needed memory */
297   B->n_alloc_icol = 0;
298   for (i = 0; i < nrows; i++) {
299     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
300   }
301   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));
302 
303   /* Fill in the diagonal offsets for the rows which store their own data */
304   ptr = 0;
305   for (i = 0; i < B->nrows; i++) {
306     if (used[i]) {
307       B->icols[i] = &B->alloc_icol[ptr];
308       icols       = &icol_in[irow_in[i]];
309       row_nnz     = B->row_nnz[i];
310       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
311         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
312       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
313         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
314       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
315         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
316       }
317       ptr += B->row_nnz[i];
318     }
319   }
320 
321   /* Point to the right places for all data */
322   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
323   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
324   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));
325 
326   PetscCall(PetscFree(isort));
327   PetscCall(PetscFree(used));
328   PetscCall(PetscFree(ipoint));
329 
330   mem_compressed = spbas_memory_requirement(*B);
331   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
332   PetscFunctionReturn(0);
333 }
334 
335 /*
336    spbas_incomplete_cholesky
337        Incomplete Cholesky decomposition
338 */
339 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
340 
341 /*
342   spbas_delete : de-allocate the arrays owned by this matrix
343 */
344 PetscErrorCode spbas_delete(spbas_matrix matrix) {
345   PetscInt i;
346 
347   PetscFunctionBegin;
348   if (matrix.block_data) {
349     PetscCall(PetscFree(matrix.alloc_icol));
350     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
351   } else {
352     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
353     PetscCall(PetscFree(matrix.icols));
354     if (matrix.values) {
355       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
356     }
357   }
358 
359   PetscCall(PetscFree(matrix.row_nnz));
360   PetscCall(PetscFree(matrix.icols));
361   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
362   PetscCall(PetscFree(matrix.values));
363   PetscFunctionReturn(0);
364 }
365 
366 /*
367 spbas_matrix_to_crs:
368    Convert an spbas_matrix to compessed row storage
369 */
370 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out) {
371   PetscInt     nrows = matrix_A.nrows;
372   PetscInt     nnz   = matrix_A.nnz;
373   PetscInt     i, j, r_nnz, i0;
374   PetscInt    *irow;
375   PetscInt    *icol;
376   PetscInt    *icol_A;
377   MatScalar   *val;
378   PetscScalar *val_A;
379   PetscInt     col_idx_type = matrix_A.col_idx_type;
380   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
381 
382   PetscFunctionBegin;
383   PetscCall(PetscMalloc1(nrows + 1, &irow));
384   PetscCall(PetscMalloc1(nnz, &icol));
385   *icol_out = icol;
386   *irow_out = irow;
387   if (do_values) {
388     PetscCall(PetscMalloc1(nnz, &val));
389     *val_out  = val;
390     *icol_out = icol;
391     *irow_out = irow;
392   }
393 
394   irow[0] = 0;
395   for (i = 0; i < nrows; i++) {
396     r_nnz       = matrix_A.row_nnz[i];
397     i0          = irow[i];
398     irow[i + 1] = i0 + r_nnz;
399     icol_A      = matrix_A.icols[i];
400 
401     if (do_values) {
402       val_A = matrix_A.values[i];
403       for (j = 0; j < r_nnz; j++) {
404         icol[i0 + j] = icol_A[j];
405         val[i0 + j]  = val_A[j];
406       }
407     } else {
408       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
409     }
410 
411     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
412       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
413     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
414       i0 = matrix_A.icol0[i];
415       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 /*
422     spbas_transpose
423        return the transpose of a matrix
424 */
425 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result) {
426   PetscInt     col_idx_type = in_matrix.col_idx_type;
427   PetscInt     nnz          = in_matrix.nnz;
428   PetscInt     ncols        = in_matrix.nrows;
429   PetscInt     nrows        = in_matrix.ncols;
430   PetscInt     i, j, k;
431   PetscInt     r_nnz;
432   PetscInt    *irow;
433   PetscInt     icol0 = 0;
434   PetscScalar *val;
435 
436   PetscFunctionBegin;
437   /* Copy input values */
438   result->nrows        = nrows;
439   result->ncols        = ncols;
440   result->nnz          = nnz;
441   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
442   result->block_data   = PETSC_TRUE;
443 
444   /* Allocate sparseness pattern */
445   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
446 
447   /*  Count the number of nonzeros in each row */
448   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
449 
450   for (i = 0; i < ncols; i++) {
451     r_nnz = in_matrix.row_nnz[i];
452     irow  = in_matrix.icols[i];
453     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
454       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
455     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
456       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
457     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
458       icol0 = in_matrix.icol0[i];
459       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
460     }
461   }
462 
463   /* Set the pointers to the data */
464   PetscCall(spbas_allocate_data(result));
465 
466   /* Reset the number of nonzeros in each row */
467   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
468 
469   /* Fill the data arrays */
470   if (in_matrix.values) {
471     for (i = 0; i < ncols; i++) {
472       r_nnz = in_matrix.row_nnz[i];
473       irow  = in_matrix.icols[i];
474       val   = in_matrix.values[i];
475 
476       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
477       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
478       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
479       for (j = 0; j < r_nnz; j++) {
480         k                                     = icol0 + irow[j];
481         result->icols[k][result->row_nnz[k]]  = i;
482         result->values[k][result->row_nnz[k]] = val[j];
483         result->row_nnz[k]++;
484       }
485     }
486   } else {
487     for (i = 0; i < ncols; i++) {
488       r_nnz = in_matrix.row_nnz[i];
489       irow  = in_matrix.icols[i];
490 
491       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
492       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
493       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
494 
495       for (j = 0; j < r_nnz; j++) {
496         k                                    = icol0 + irow[j];
497         result->icols[k][result->row_nnz[k]] = i;
498         result->row_nnz[k]++;
499       }
500     }
501   }
502   PetscFunctionReturn(0);
503 }
504 
505 /*
506    spbas_mergesort
507 
508       mergesort for an array of integers and an array of associated
509       reals
510 
511       on output, icol[0..nnz-1] is increasing;
512                   val[0..nnz-1] has undergone the same permutation as icol
513 
514       NB: val may be NULL: in that case, only the integers are sorted
515 
516 */
517 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val) {
518   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
519   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
520   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
521   PetscInt    *ialloc;               /* Allocated arrays */
522   PetscScalar *valloc = NULL;
523   PetscInt    *iswap; /* auxiliary pointers for swapping */
524   PetscScalar *vswap;
525   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
526   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
527   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
528   PetscScalar *vhlp2 = NULL;
529 
530   PetscFunctionBegin;
531   PetscCall(PetscMalloc1(nnz, &ialloc));
532   ihlp1 = ialloc;
533   ihlp2 = icol;
534 
535   if (val) {
536     PetscCall(PetscMalloc1(nnz, &valloc));
537     vhlp1 = valloc;
538     vhlp2 = val;
539   }
540 
541   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
542   for (istep = 1; istep < nnz; istep *= 2) {
543     /*
544       Combine sorted parts
545           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
546       of ihlp2 and vhlp2
547 
548       into one sorted part
549           istart:istart+2*istep-1
550       of ihlp1 and vhlp1
551     */
552     for (istart = 0; istart < nnz; istart += 2 * istep) {
553       /* Set counters and bound array part endings */
554       i1    = istart;
555       i1end = i1 + istep;
556       if (i1end > nnz) i1end = nnz;
557       i2    = istart + istep;
558       i2end = i2 + istep;
559       if (i2end > nnz) i2end = nnz;
560 
561       /* Merge the two array parts */
562       if (val) {
563         for (i = istart; i < i2end; i++) {
564           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
565             ihlp1[i] = ihlp2[i1];
566             vhlp1[i] = vhlp2[i1];
567             i1++;
568           } else if (i2 < i2end) {
569             ihlp1[i] = ihlp2[i2];
570             vhlp1[i] = vhlp2[i2];
571             i2++;
572           } else {
573             ihlp1[i] = ihlp2[i1];
574             vhlp1[i] = vhlp2[i1];
575             i1++;
576           }
577         }
578       } else {
579         for (i = istart; i < i2end; i++) {
580           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
581             ihlp1[i] = ihlp2[i1];
582             i1++;
583           } else if (i2 < i2end) {
584             ihlp1[i] = ihlp2[i2];
585             i2++;
586           } else {
587             ihlp1[i] = ihlp2[i1];
588             i1++;
589           }
590         }
591       }
592     }
593 
594     /* Swap the two array sets */
595     iswap = ihlp2;
596     ihlp2 = ihlp1;
597     ihlp1 = iswap;
598     vswap = vhlp2;
599     vhlp2 = vhlp1;
600     vhlp1 = vswap;
601   }
602 
603   /* Copy one more time in case the sorted arrays are the temporary ones */
604   if (ihlp2 != icol) {
605     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
606     if (val) {
607       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
608     }
609   }
610 
611   PetscCall(PetscFree(ialloc));
612   if (val) PetscCall(PetscFree(valloc));
613   PetscFunctionReturn(0);
614 }
615 
616 /*
617   spbas_apply_reordering_rows:
618     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
619 */
620 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation) {
621   PetscInt      i, j, ip;
622   PetscInt      nrows = matrix_A->nrows;
623   PetscInt     *row_nnz;
624   PetscInt    **icols;
625   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
626   PetscScalar **vals      = NULL;
627 
628   PetscFunctionBegin;
629   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
630 
631   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
632   PetscCall(PetscMalloc1(nrows, &row_nnz));
633   PetscCall(PetscMalloc1(nrows, &icols));
634 
635   for (i = 0; i < nrows; i++) {
636     ip = permutation[i];
637     if (do_values) vals[i] = matrix_A->values[ip];
638     icols[i]   = matrix_A->icols[ip];
639     row_nnz[i] = matrix_A->row_nnz[ip];
640     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
641   }
642 
643   if (do_values) PetscCall(PetscFree(matrix_A->values));
644   PetscCall(PetscFree(matrix_A->icols));
645   PetscCall(PetscFree(matrix_A->row_nnz));
646 
647   if (do_values) matrix_A->values = vals;
648   matrix_A->icols   = icols;
649   matrix_A->row_nnz = row_nnz;
650   PetscFunctionReturn(0);
651 }
652 
653 /*
654   spbas_apply_reordering_cols:
655     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
656 */
657 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation) {
658   PetscInt     i, j;
659   PetscInt     nrows = matrix_A->nrows;
660   PetscInt     row_nnz;
661   PetscInt    *icols;
662   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
663   PetscScalar *vals      = NULL;
664 
665   PetscFunctionBegin;
666   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
667 
668   for (i = 0; i < nrows; i++) {
669     icols   = matrix_A->icols[i];
670     row_nnz = matrix_A->row_nnz[i];
671     if (do_values) vals = matrix_A->values[i];
672 
673     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
674     PetscCall(spbas_mergesort(row_nnz, icols, vals));
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 /*
680   spbas_apply_reordering:
681     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
682 */
683 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm) {
684   PetscFunctionBegin;
685   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
686   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
687   PetscFunctionReturn(0);
688 }
689 
690 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result) {
691   spbas_matrix retval;
692   PetscInt     i, j, i0, r_nnz;
693 
694   PetscFunctionBegin;
695   /* Copy input values */
696   retval.nrows = nrows;
697   retval.ncols = ncols;
698   retval.nnz   = ai[nrows];
699 
700   retval.block_data   = PETSC_TRUE;
701   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
702 
703   /* Allocate output matrix */
704   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
705   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
706   PetscCall(spbas_allocate_data(&retval));
707   /* Copy the structure */
708   for (i = 0; i < retval.nrows; i++) {
709     i0    = ai[i];
710     r_nnz = ai[i + 1] - i0;
711 
712     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
713   }
714   *result = retval;
715   PetscFunctionReturn(0);
716 }
717 
718 /*
719    spbas_mark_row_power:
720       Mark the columns in row 'row' which are nonzero in
721           matrix^2log(marker).
722 */
723 PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
724                                     PetscInt      row,       /* row for which the columns are marked */
725                                     spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
726                                     PetscInt      marker,    /* marker-value: 2^power */
727                                     PetscInt      minmrk,    /* lower bound for marked points */
728                                     PetscInt      maxmrk)         /* upper bound for marked points */
729 {
730   PetscInt i, j, nnz;
731 
732   PetscFunctionBegin;
733   nnz = in_matrix->row_nnz[row];
734 
735   /* For higher powers, call this function recursively */
736   if (marker > 1) {
737     for (i = 0; i < nnz; i++) {
738       j = row + in_matrix->icols[row][i];
739       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
740         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
741         iwork[j] |= marker;
742       }
743     }
744   } else {
745     /*  Mark the columns reached */
746     for (i = 0; i < nnz; i++) {
747       j = row + in_matrix->icols[row][i];
748       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
749     }
750   }
751   PetscFunctionReturn(0);
752 }
753 
754 /*
755    spbas_power
756       Calculate sparseness patterns for incomplete Cholesky decompositions
757       of a given order: (almost) all nonzeros of the matrix^(order+1) which
758       are inside the band width are found and stored in the output sparseness
759       pattern.
760 */
761 PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result) {
762   spbas_matrix retval;
763   PetscInt     nrows = in_matrix.nrows;
764   PetscInt     ncols = in_matrix.ncols;
765   PetscInt     i, j, kend;
766   PetscInt     nnz, inz;
767   PetscInt    *iwork;
768   PetscInt     marker;
769   PetscInt     maxmrk = 0;
770 
771   PetscFunctionBegin;
772   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
773   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
774   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
775   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
776 
777   /* Copy input values*/
778   retval.nrows        = ncols;
779   retval.ncols        = nrows;
780   retval.nnz          = 0;
781   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
782   retval.block_data   = PETSC_FALSE;
783 
784   /* Allocate sparseness pattern */
785   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
786 
787   /* Allocate marker array: note sure the max needed so use the max of the two */
788   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));
789 
790   /* Calculate marker values */
791   marker = 1;
792   for (i = 1; i < power; i++) marker *= 2;
793 
794   for (i = 0; i < nrows; i++) {
795     /* Calculate the pattern for each row */
796 
797     nnz  = in_matrix.row_nnz[i];
798     kend = i + in_matrix.icols[i][nnz - 1];
799     if (maxmrk <= kend) maxmrk = kend + 1;
800     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
801 
802     /* Count the columns*/
803     nnz = 0;
804     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);
805 
806     /* Allocate the column indices */
807     retval.row_nnz[i] = nnz;
808     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));
809 
810     /* Administrate the column indices */
811     inz = 0;
812     for (j = i; j < maxmrk; j++) {
813       if (iwork[j]) {
814         retval.icols[i][inz] = j - i;
815         inz++;
816         iwork[j] = 0;
817       }
818     }
819     retval.nnz += nnz;
820   };
821   PetscCall(PetscFree(iwork));
822   *result = retval;
823   PetscFunctionReturn(0);
824 }
825 
826 /*
827    spbas_keep_upper:
828       remove the lower part of the matrix: keep the upper part
829 */
830 PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix) {
831   PetscInt i, j;
832   PetscInt jstart;
833 
834   PetscFunctionBegin;
835   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
836   for (i = 0; i < inout_matrix->nrows; i++) {
837     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
838     if (jstart > 0) {
839       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];
840 
841       if (inout_matrix->values) {
842         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
843       }
844 
845       inout_matrix->row_nnz[i] -= jstart;
846 
847       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));
848 
849       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
850       inout_matrix->nnz -= jstart;
851     }
852   }
853   PetscFunctionReturn(0);
854 }
855