xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 89669be4d29968dc8d4c19ce1b69194a6a561ea4)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix)
31 {
32   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
33                     sizeof(PetscBool)               + /* block_data */
34                     sizeof(PetscScalar**)           + /* values */
35                     sizeof(PetscScalar*)            + /* alloc_val */
36                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
37                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
38                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
39                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
40 
41   /* icol0[*] */
42   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
43 
44   /* icols[*][*] */
45   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
46   else memreq += matrix.nnz * sizeof(PetscInt);
47 
48   if (matrix.values) {
49     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
50     /* values[*][*] */
51     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
52     else memreq += matrix.nnz * sizeof(PetscScalar);
53   }
54   return memreq;
55 }
56 
57 /*
58   spbas_allocate_pattern:
59     allocate the pattern arrays row_nnz, icols and optionally values
60 */
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
62 {
63   PetscInt nrows        = result->nrows;
64   PetscInt col_idx_type = result->col_idx_type;
65 
66   PetscFunctionBegin;
67   /* Allocate sparseness pattern */
68   PetscCall(PetscMalloc1(nrows,&result->row_nnz));
69   PetscCall(PetscMalloc1(nrows,&result->icols));
70 
71   /* If offsets are given wrt an array, create array */
72   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
73     PetscCall(PetscMalloc1(nrows,&result->icol0));
74   } else  {
75     result->icol0 = NULL;
76   }
77 
78   /* If values are given, allocate values array */
79   if (do_values)  {
80     PetscCall(PetscMalloc1(nrows,&result->values));
81   } else {
82     result->values = NULL;
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*
88 spbas_allocate_data:
89    in case of block_data:
90        Allocate the data arrays alloc_icol and optionally alloc_val,
91        set appropriate pointers from icols and values;
92    in case of !block_data:
93        Allocate the arrays icols[i] and optionally values[i]
94 */
95 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
96 {
97   PetscInt  i;
98   PetscInt  nnz        = result->nnz;
99   PetscInt  nrows      = result->nrows;
100   PetscInt  r_nnz;
101   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
102   PetscBool block_data = result->block_data;
103 
104   PetscFunctionBegin;
105   if (block_data) {
106     /* Allocate the column number array and point to it */
107     result->n_alloc_icol = nnz;
108 
109     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
110 
111     result->icols[0] = result->alloc_icol;
112     for (i=1; i<nrows; i++)  {
113       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
114     }
115 
116     /* Allocate the value array and point to it */
117     if (do_values) {
118       result->n_alloc_val = nnz;
119 
120       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
121 
122       result->values[0] = result->alloc_val;
123       for (i=1; i<nrows; i++) {
124         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
125       }
126     }
127   } else {
128     for (i=0; i<nrows; i++)   {
129       r_nnz = result->row_nnz[i];
130       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
131     }
132     if (do_values) {
133       for (i=0; i<nrows; i++)  {
134         r_nnz = result->row_nnz[i];
135         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 /*
143   spbas_row_order_icol
144      determine if row i1 should come
145        + before row i2 in the sorted rows (return -1),
146        + after (return 1)
147        + is identical (return 0).
148 */
149 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
150 {
151   PetscInt j;
152   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
153   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
154   PetscInt * icol1 = &icol_in[irow_in[i1]];
155   PetscInt * icol2 = &icol_in[irow_in[i2]];
156 
157   if (nnz1<nnz2) return -1;
158   if (nnz1>nnz2) return 1;
159 
160   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
161     for (j=0; j<nnz1; j++) {
162       if (icol1[j]< icol2[j]) return -1;
163       if (icol1[j]> icol2[j]) return 1;
164     }
165   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
166     for (j=0; j<nnz1; j++) {
167       if (icol1[j]-i1< icol2[j]-i2) return -1;
168       if (icol1[j]-i1> icol2[j]-i2) return 1;
169     }
170   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
171     for (j=1; j<nnz1; j++) {
172       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
173       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
174     }
175   }
176   return 0;
177 }
178 
179 /*
180   spbas_mergesort_icols:
181     return a sorting of the rows in which identical sparseness patterns are
182     next to each other
183 */
184 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
185 {
186   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
187   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
188   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
189   PetscInt       *ialloc;     /* Allocated arrays */
190   PetscInt       *iswap;      /* auxiliary pointers for swapping */
191   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
192   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
193 
194   PetscFunctionBegin;
195   PetscCall(PetscMalloc1(nrows,&ialloc));
196 
197   ihlp1 = ialloc;
198   ihlp2 = isort;
199 
200   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
201   for (istep=1; istep<nrows; istep*=2) {
202     /*
203       Combine sorted parts
204           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
205       of ihlp2 and vhlp2
206 
207       into one sorted part
208           istart:istart+2*istep-1
209       of ihlp1 and vhlp1
210     */
211     for (istart=0; istart<nrows; istart+=2*istep) {
212       /* Set counters and bound array part endings */
213       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
214       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
215 
216       /* Merge the two array parts */
217       for (i=istart; i<i2end; i++)  {
218         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
219           ihlp1[i] = ihlp2[i1];
220           i1++;
221         } else if (i2<i2end) {
222           ihlp1[i] = ihlp2[i2];
223           i2++;
224         } else {
225           ihlp1[i] = ihlp2[i1];
226           i1++;
227         }
228       }
229     }
230 
231     /* Swap the two array sets */
232     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
233   }
234 
235   /* Copy one more time in case the sorted arrays are the temporary ones */
236   if (ihlp2 != isort) {
237     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
238   }
239   PetscCall(PetscFree(ialloc));
240   PetscFunctionReturn(0);
241 }
242 
243 /*
244   spbas_compress_pattern:
245      calculate a compressed sparseness pattern for a sparseness pattern
246      given in compressed row storage. The compressed sparseness pattern may
247      require (much) less memory.
248 */
249 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
250 {
251   PetscInt        nnz      = irow_in[nrows];
252   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
253   size_t          mem_compressed;
254   PetscInt        *isort;
255   PetscInt        *icols;
256   PetscInt        row_nnz;
257   PetscInt        *ipoint;
258   PetscBool       *used;
259   PetscInt        ptr;
260   PetscInt        i,j;
261   const PetscBool no_values = PETSC_FALSE;
262 
263   PetscFunctionBegin;
264   /* Allocate the structure of the new matrix */
265   B->nrows        = nrows;
266   B->ncols        = ncols;
267   B->nnz          = nnz;
268   B->col_idx_type = col_idx_type;
269   B->block_data   = PETSC_TRUE;
270 
271   PetscCall(spbas_allocate_pattern(B, no_values));
272 
273   /* When using an offset array, set it */
274   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
275     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
276   }
277 
278   /* Allocate the ordering for the rows */
279   PetscCall(PetscMalloc1(nrows,&isort));
280   PetscCall(PetscMalloc1(nrows,&ipoint));
281   PetscCall(PetscCalloc1(nrows,&used));
282 
283   for (i = 0; i<nrows; i++)  {
284     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
285     isort[i]      = i;
286     ipoint[i]     = i;
287   }
288 
289   /* Sort the rows so that identical columns will be next to each other */
290   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
291   PetscCall(PetscInfo(NULL,"Rows have been sorted for patterns\n"));
292 
293   /* Replace identical rows with the first one in the list */
294   for (i=1; i<nrows; i++) {
295     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
296       ipoint[isort[i]] = ipoint[isort[i-1]];
297     }
298   }
299 
300   /* Collect the rows which are used*/
301   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
302 
303   /* Calculate needed memory */
304   B->n_alloc_icol = 0;
305   for (i=0; i<nrows; i++)  {
306     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
307   }
308   PetscCall(PetscMalloc1(B->n_alloc_icol,&B->alloc_icol));
309 
310   /* Fill in the diagonal offsets for the rows which store their own data */
311   ptr = 0;
312   for (i=0; i<B->nrows; i++) {
313     if (used[i]) {
314       B->icols[i] = &B->alloc_icol[ptr];
315       icols = &icol_in[irow_in[i]];
316       row_nnz = B->row_nnz[i];
317       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
318         for (j=0; j<row_nnz; j++) {
319           B->icols[i][j] = icols[j];
320         }
321       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
322         for (j=0; j<row_nnz; j++) {
323           B->icols[i][j] = icols[j]-i;
324         }
325       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
326         for (j=0; j<row_nnz; j++) {
327           B->icols[i][j] = icols[j]-icols[0];
328         }
329       }
330       ptr += B->row_nnz[i];
331     }
332   }
333 
334   /* Point to the right places for all data */
335   for (i=0; i<nrows; i++) {
336     B->icols[i] = B->icols[ipoint[i]];
337   }
338   PetscCall(PetscInfo(NULL,"Row patterns have been compressed\n"));
339   PetscCall(PetscInfo(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows)));
340 
341   PetscCall(PetscFree(isort));
342   PetscCall(PetscFree(used));
343   PetscCall(PetscFree(ipoint));
344 
345   mem_compressed = spbas_memory_requirement(*B);
346   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351    spbas_incomplete_cholesky
352        Incomplete Cholesky decomposition
353 */
354 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
355 
356 /*
357   spbas_delete : de-allocate the arrays owned by this matrix
358 */
359 PetscErrorCode spbas_delete(spbas_matrix matrix)
360 {
361   PetscInt       i;
362 
363   PetscFunctionBegin;
364   if (matrix.block_data) {
365     PetscCall(PetscFree(matrix.alloc_icol));
366     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
367   } else {
368     for (i=0; i<matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
369     PetscCall(PetscFree(matrix.icols));
370     if (matrix.values) {
371       for (i=0; i<matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
372     }
373   }
374 
375   PetscCall(PetscFree(matrix.row_nnz));
376   PetscCall(PetscFree(matrix.icols));
377   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
378   PetscCall(PetscFree(matrix.values));
379   PetscFunctionReturn(0);
380 }
381 
382 /*
383 spbas_matrix_to_crs:
384    Convert an spbas_matrix to compessed row storage
385 */
386 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
387 {
388   PetscInt       nrows = matrix_A.nrows;
389   PetscInt       nnz   = matrix_A.nnz;
390   PetscInt       i,j,r_nnz,i0;
391   PetscInt       *irow;
392   PetscInt       *icol;
393   PetscInt       *icol_A;
394   MatScalar      *val;
395   PetscScalar    *val_A;
396   PetscInt       col_idx_type = matrix_A.col_idx_type;
397   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
398 
399   PetscFunctionBegin;
400   PetscCall(PetscMalloc1(nrows+1, &irow));
401   PetscCall(PetscMalloc1(nnz, &icol));
402   *icol_out = icol;
403   *irow_out = irow;
404   if (do_values) {
405     PetscCall(PetscMalloc1(nnz, &val));
406     *val_out = val; *icol_out = icol; *irow_out=irow;
407   }
408 
409   irow[0]=0;
410   for (i=0; i<nrows; i++) {
411     r_nnz     = matrix_A.row_nnz[i];
412     i0        = irow[i];
413     irow[i+1] = i0 + r_nnz;
414     icol_A    = matrix_A.icols[i];
415 
416     if (do_values) {
417       val_A = matrix_A.values[i];
418       for (j=0; j<r_nnz; j++) {
419         icol[i0+j] = icol_A[j];
420         val[i0+j]  = val_A[j];
421       }
422     } else {
423       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
424     }
425 
426     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
427       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
428     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
429       i0 = matrix_A.icol0[i];
430       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
431     }
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437     spbas_transpose
438        return the transpose of a matrix
439 */
440 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
441 {
442   PetscInt       col_idx_type = in_matrix.col_idx_type;
443   PetscInt       nnz          = in_matrix.nnz;
444   PetscInt       ncols        = in_matrix.nrows;
445   PetscInt       nrows        = in_matrix.ncols;
446   PetscInt       i,j,k;
447   PetscInt       r_nnz;
448   PetscInt       *irow;
449   PetscInt       icol0 = 0;
450   PetscScalar    * val;
451 
452   PetscFunctionBegin;
453   /* Copy input values */
454   result->nrows        = nrows;
455   result->ncols        = ncols;
456   result->nnz          = nnz;
457   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
458   result->block_data   = PETSC_TRUE;
459 
460   /* Allocate sparseness pattern */
461   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
462 
463   /*  Count the number of nonzeros in each row */
464   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
465 
466   for (i=0; i<ncols; i++) {
467     r_nnz = in_matrix.row_nnz[i];
468     irow  = in_matrix.icols[i];
469     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
470       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
471     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
472       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
473     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
474       icol0=in_matrix.icol0[i];
475       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
476     }
477   }
478 
479   /* Set the pointers to the data */
480   PetscCall(spbas_allocate_data(result));
481 
482   /* Reset the number of nonzeros in each row */
483   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
484 
485   /* Fill the data arrays */
486   if (in_matrix.values) {
487     for (i=0; i<ncols; i++) {
488       r_nnz = in_matrix.row_nnz[i];
489       irow  = in_matrix.icols[i];
490       val   = in_matrix.values[i];
491 
492       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
493       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
494       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
495       for (j=0; j<r_nnz; j++)  {
496         k = icol0 + irow[j];
497         result->icols[k][result->row_nnz[k]]  = i;
498         result->values[k][result->row_nnz[k]] = val[j];
499         result->row_nnz[k]++;
500       }
501     }
502   } else {
503     for (i=0; i<ncols; i++) {
504       r_nnz = in_matrix.row_nnz[i];
505       irow  = in_matrix.icols[i];
506 
507       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
508       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
509       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
510 
511       for (j=0; j<r_nnz; j++) {
512         k = icol0 + irow[j];
513         result->icols[k][result->row_nnz[k]] = i;
514         result->row_nnz[k]++;
515       }
516     }
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522    spbas_mergesort
523 
524       mergesort for an array of integers and an array of associated
525       reals
526 
527       on output, icol[0..nnz-1] is increasing;
528                   val[0..nnz-1] has undergone the same permutation as icol
529 
530       NB: val may be NULL: in that case, only the integers are sorted
531 
532 */
533 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
534 {
535   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
536   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
537   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
538   PetscInt       *ialloc;     /* Allocated arrays */
539   PetscScalar    *valloc=NULL;
540   PetscInt       *iswap;      /* auxiliary pointers for swapping */
541   PetscScalar    *vswap;
542   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
543   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
544   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
545   PetscScalar    *vhlp2=NULL;
546 
547   PetscFunctionBegin;
548   PetscCall(PetscMalloc1(nnz,&ialloc));
549   ihlp1 = ialloc;
550   ihlp2 = icol;
551 
552   if (val) {
553     PetscCall(PetscMalloc1(nnz,&valloc));
554     vhlp1 = valloc;
555     vhlp2 = val;
556   }
557 
558   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
559   for (istep=1; istep<nnz; istep*=2) {
560     /*
561       Combine sorted parts
562           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
563       of ihlp2 and vhlp2
564 
565       into one sorted part
566           istart:istart+2*istep-1
567       of ihlp1 and vhlp1
568     */
569     for (istart=0; istart<nnz; istart+=2*istep) {
570       /* Set counters and bound array part endings */
571       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
572       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
573 
574       /* Merge the two array parts */
575       if (val) {
576         for (i=istart; i<i2end; i++) {
577           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
578             ihlp1[i] = ihlp2[i1];
579             vhlp1[i] = vhlp2[i1];
580             i1++;
581           } else if (i2<i2end) {
582             ihlp1[i] = ihlp2[i2];
583             vhlp1[i] = vhlp2[i2];
584             i2++;
585           } else {
586             ihlp1[i] = ihlp2[i1];
587             vhlp1[i] = vhlp2[i1];
588             i1++;
589           }
590         }
591       } else {
592         for (i=istart; i<i2end; i++) {
593           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
594             ihlp1[i] = ihlp2[i1];
595             i1++;
596           } else if (i2<i2end) {
597             ihlp1[i] = ihlp2[i2];
598             i2++;
599           } else {
600             ihlp1[i] = ihlp2[i1];
601             i1++;
602           }
603         }
604       }
605     }
606 
607     /* Swap the two array sets */
608     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
609     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
610   }
611 
612   /* Copy one more time in case the sorted arrays are the temporary ones */
613   if (ihlp2 != icol) {
614     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
615     if (val) {
616       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
617     }
618   }
619 
620   PetscCall(PetscFree(ialloc));
621   if (val) PetscCall(PetscFree(valloc));
622   PetscFunctionReturn(0);
623 }
624 
625 /*
626   spbas_apply_reordering_rows:
627     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
628 */
629 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
630 {
631   PetscInt       i,j,ip;
632   PetscInt       nrows=matrix_A->nrows;
633   PetscInt       * row_nnz;
634   PetscInt       **icols;
635   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
636   PetscScalar    **vals    = NULL;
637 
638   PetscFunctionBegin;
639   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS,PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern");
640 
641   if (do_values) {
642     PetscCall(PetscMalloc1(nrows, &vals));
643   }
644   PetscCall(PetscMalloc1(nrows, &row_nnz));
645   PetscCall(PetscMalloc1(nrows, &icols));
646 
647   for (i=0; i<nrows; i++) {
648     ip = permutation[i];
649     if (do_values) vals[i] = matrix_A->values[ip];
650     icols[i]   = matrix_A->icols[ip];
651     row_nnz[i] = matrix_A->row_nnz[ip];
652     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
653   }
654 
655   if (do_values) PetscCall(PetscFree(matrix_A->values));
656   PetscCall(PetscFree(matrix_A->icols));
657   PetscCall(PetscFree(matrix_A->row_nnz));
658 
659   if (do_values) matrix_A->values = vals;
660   matrix_A->icols   = icols;
661   matrix_A->row_nnz = row_nnz;
662   PetscFunctionReturn(0);
663 }
664 
665 /*
666   spbas_apply_reordering_cols:
667     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
668 */
669 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
670 {
671   PetscInt       i,j;
672   PetscInt       nrows=matrix_A->nrows;
673   PetscInt       row_nnz;
674   PetscInt       *icols;
675   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
676   PetscScalar    *vals     = NULL;
677 
678   PetscFunctionBegin;
679   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS,PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
680 
681   for (i=0; i<nrows; i++) {
682     icols   = matrix_A->icols[i];
683     row_nnz = matrix_A->row_nnz[i];
684     if (do_values) vals = matrix_A->values[i];
685 
686     for (j=0; j<row_nnz; j++) {
687       icols[j] = permutation[i+icols[j]]-i;
688     }
689     PetscCall(spbas_mergesort(row_nnz, icols, vals));
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 /*
695   spbas_apply_reordering:
696     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
697 */
698 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
699 {
700   PetscFunctionBegin;
701   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
702   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
703   PetscFunctionReturn(0);
704 }
705 
706 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
707 {
708   spbas_matrix   retval;
709   PetscInt       i, j, i0, r_nnz;
710 
711   PetscFunctionBegin;
712   /* Copy input values */
713   retval.nrows = nrows;
714   retval.ncols = ncols;
715   retval.nnz   = ai[nrows];
716 
717   retval.block_data   = PETSC_TRUE;
718   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
719 
720   /* Allocate output matrix */
721   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
722   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
723   PetscCall(spbas_allocate_data(&retval));
724   /* Copy the structure */
725   for (i = 0; i<retval.nrows; i++)  {
726     i0    = ai[i];
727     r_nnz = ai[i+1]-i0;
728 
729     for (j=0; j<r_nnz; j++) {
730       retval.icols[i][j] = aj[i0+j]-i;
731     }
732   }
733   *result = retval;
734   PetscFunctionReturn(0);
735 }
736 
737 /*
738    spbas_mark_row_power:
739       Mark the columns in row 'row' which are nonzero in
740           matrix^2log(marker).
741 */
742 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
743                                     PetscInt row,                /* row for which the columns are marked */
744                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
745                                     PetscInt marker,             /* marker-value: 2^power */
746                                     PetscInt minmrk,             /* lower bound for marked points */
747                                     PetscInt maxmrk)             /* upper bound for marked points */
748 {
749   PetscInt       i,j, nnz;
750 
751   PetscFunctionBegin;
752   nnz = in_matrix->row_nnz[row];
753 
754   /* For higher powers, call this function recursively */
755   if (marker>1) {
756     for (i=0; i<nnz; i++) {
757       j = row + in_matrix->icols[row][i];
758       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
759         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk));
760         iwork[j] |= marker;
761       }
762     }
763   } else {
764     /*  Mark the columns reached */
765     for (i=0; i<nnz; i++)  {
766       j = row + in_matrix->icols[row][i];
767       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
768     }
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774    spbas_power
775       Calculate sparseness patterns for incomplete Cholesky decompositions
776       of a given order: (almost) all nonzeros of the matrix^(order+1) which
777       are inside the band width are found and stored in the output sparseness
778       pattern.
779 */
780 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
781 {
782   spbas_matrix   retval;
783   PetscInt       nrows = in_matrix.nrows;
784   PetscInt       ncols = in_matrix.ncols;
785   PetscInt       i, j, kend;
786   PetscInt       nnz, inz;
787   PetscInt       *iwork;
788   PetscInt       marker;
789   PetscInt       maxmrk=0;
790 
791   PetscFunctionBegin;
792   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS,PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern");
793   PetscCheck(ncols == nrows,PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
794   PetscCheck(!in_matrix.values,PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
795   PetscCheck(power>0,PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
796 
797   /* Copy input values*/
798   retval.nrows        = ncols;
799   retval.ncols        = nrows;
800   retval.nnz          = 0;
801   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
802   retval.block_data   = PETSC_FALSE;
803 
804   /* Allocate sparseness pattern */
805   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
806 
807   /* Allocate marker array: note sure the max needed so use the max of the two */
808   PetscCall(PetscCalloc1(PetscMax(ncols,nrows), &iwork));
809 
810   /* Calculate marker values */
811   marker = 1; for (i=1; i<power; i++) marker*=2;
812 
813   for (i=0; i<nrows; i++)  {
814     /* Calculate the pattern for each row */
815 
816     nnz  = in_matrix.row_nnz[i];
817     kend = i+in_matrix.icols[i][nnz-1];
818     if (maxmrk<=kend) maxmrk=kend+1;
819     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
820 
821     /* Count the columns*/
822     nnz = 0;
823     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
824 
825     /* Allocate the column indices */
826     retval.row_nnz[i] = nnz;
827     PetscCall(PetscMalloc1(nnz,&retval.icols[i]));
828 
829     /* Administrate the column indices */
830     inz = 0;
831     for (j=i; j<maxmrk; j++) {
832       if (iwork[j]) {
833         retval.icols[i][inz] = j-i;
834         inz++;
835         iwork[j] = 0;
836       }
837     }
838     retval.nnz += nnz;
839   };
840   PetscCall(PetscFree(iwork));
841   *result = retval;
842   PetscFunctionReturn(0);
843 }
844 
845 /*
846    spbas_keep_upper:
847       remove the lower part of the matrix: keep the upper part
848 */
849 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
850 {
851   PetscInt i, j;
852   PetscInt jstart;
853 
854   PetscFunctionBegin;
855   PetscCheck(!inout_matrix->block_data,PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
856   for (i=0; i<inout_matrix->nrows; i++)  {
857     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
858     if (jstart>0) {
859       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
860         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
861       }
862 
863       if (inout_matrix->values) {
864         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
865           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
866         }
867       }
868 
869       inout_matrix->row_nnz[i] -= jstart;
870 
871       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
872 
873       if (inout_matrix->values) {
874         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
875       }
876       inout_matrix->nnz -= jstart;
877     }
878   }
879   PetscFunctionReturn(0);
880 }
881