xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 9c5460f9064ca60dd71a234a1f6faf93e7a6b0c9)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with `MATAIJ`  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Note:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `PCFactorSetLevels()`, `PCFactorSetDropTolerance()`
23 M*/
24 
25 /*
26   spbas_memory_requirement:
27     Calculate the number of bytes needed to store the matrix
28 */
spbas_memory_requirement(spbas_matrix matrix)29 size_t spbas_memory_requirement(spbas_matrix matrix)
30 {
31   size_t memreq = 6 * sizeof(PetscInt) +             /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
32                   sizeof(PetscBool) +                /* block_data */
33                   sizeof(PetscScalar **) +           /* values */
34                   sizeof(PetscScalar *) +            /* alloc_val */
35                   2 * sizeof(PetscInt **) +          /* icols, icols0 */
36                   2 * sizeof(PetscInt *) +           /* row_nnz, alloc_icol */
37                   matrix.nrows * sizeof(PetscInt) +  /* row_nnz[*] */
38                   matrix.nrows * sizeof(PetscInt *); /* icols[*] */
39 
40   /* icol0[*] */
41   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
42 
43   /* icols[*][*] */
44   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
45   else memreq += matrix.nnz * sizeof(PetscInt);
46 
47   if (matrix.values) {
48     memreq += matrix.nrows * sizeof(PetscScalar *); /* values[*] */
49     /* values[*][*] */
50     if (matrix.block_data) memreq += matrix.n_alloc_val * sizeof(PetscScalar);
51     else memreq += matrix.nnz * sizeof(PetscScalar);
52   }
53   return memreq;
54 }
55 
56 /*
57   spbas_allocate_pattern:
58     allocate the pattern arrays row_nnz, icols and optionally values
59 */
spbas_allocate_pattern(spbas_matrix * result,PetscBool do_values)60 static PetscErrorCode spbas_allocate_pattern(spbas_matrix *result, PetscBool do_values)
61 {
62   PetscInt nrows        = result->nrows;
63   PetscInt col_idx_type = result->col_idx_type;
64 
65   PetscFunctionBegin;
66   /* Allocate sparseness pattern */
67   PetscCall(PetscMalloc1(nrows, &result->row_nnz));
68   PetscCall(PetscMalloc1(nrows, &result->icols));
69 
70   /* If offsets are given wrt an array, create array */
71   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
72     PetscCall(PetscMalloc1(nrows, &result->icol0));
73   } else {
74     result->icol0 = NULL;
75   }
76 
77   /* If values are given, allocate values array */
78   if (do_values) {
79     PetscCall(PetscMalloc1(nrows, &result->values));
80   } else {
81     result->values = NULL;
82   }
83   PetscFunctionReturn(PETSC_SUCCESS);
84 }
85 
86 /*
87 spbas_allocate_data:
88    in case of block_data:
89        Allocate the data arrays alloc_icol and optionally alloc_val,
90        set appropriate pointers from icols and values;
91    in case of !block_data:
92        Allocate the arrays icols[i] and optionally values[i]
93 */
spbas_allocate_data(spbas_matrix * result)94 static PetscErrorCode spbas_allocate_data(spbas_matrix *result)
95 {
96   PetscInt  i;
97   PetscInt  nnz   = result->nnz;
98   PetscInt  nrows = result->nrows;
99   PetscInt  r_nnz;
100   PetscBool do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
101   PetscBool block_data = result->block_data;
102 
103   PetscFunctionBegin;
104   if (block_data) {
105     /* Allocate the column number array and point to it */
106     result->n_alloc_icol = nnz;
107 
108     PetscCall(PetscMalloc1(nnz, &result->alloc_icol));
109 
110     result->icols[0] = result->alloc_icol;
111     for (i = 1; i < nrows; i++) result->icols[i] = result->icols[i - 1] + result->row_nnz[i - 1];
112 
113     /* Allocate the value array and point to it */
114     if (do_values) {
115       result->n_alloc_val = nnz;
116 
117       PetscCall(PetscMalloc1(nnz, &result->alloc_val));
118 
119       result->values[0] = result->alloc_val;
120       for (i = 1; i < nrows; i++) result->values[i] = result->values[i - 1] + result->row_nnz[i - 1];
121     }
122   } else {
123     for (i = 0; i < nrows; i++) {
124       r_nnz = result->row_nnz[i];
125       PetscCall(PetscMalloc1(r_nnz, &result->icols[i]));
126     }
127     if (do_values) {
128       for (i = 0; i < nrows; i++) {
129         r_nnz = result->row_nnz[i];
130         PetscCall(PetscMalloc1(r_nnz, &result->values[i]));
131       }
132     }
133   }
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /*
138   spbas_row_order_icol
139      determine if row i1 should come
140        + before row i2 in the sorted rows (return -1),
141        + after (return 1)
142        + is identical (return 0).
143 */
spbas_row_order_icol(PetscInt i1,PetscInt i2,PetscInt * irow_in,PetscInt * icol_in,PetscInt col_idx_type)144 static int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type)
145 {
146   PetscInt  j;
147   PetscInt  nnz1  = irow_in[i1 + 1] - irow_in[i1];
148   PetscInt  nnz2  = irow_in[i2 + 1] - irow_in[i2];
149   PetscInt *icol1 = &icol_in[irow_in[i1]];
150   PetscInt *icol2 = &icol_in[irow_in[i2]];
151 
152   if (nnz1 < nnz2) return -1;
153   if (nnz1 > nnz2) return 1;
154 
155   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
156     for (j = 0; j < nnz1; j++) {
157       if (icol1[j] < icol2[j]) return -1;
158       if (icol1[j] > icol2[j]) return 1;
159     }
160   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
161     for (j = 0; j < nnz1; j++) {
162       if (icol1[j] - i1 < icol2[j] - i2) return -1;
163       if (icol1[j] - i1 > icol2[j] - i2) return 1;
164     }
165   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
166     for (j = 1; j < nnz1; j++) {
167       if (icol1[j] - icol1[0] < icol2[j] - icol2[0]) return -1;
168       if (icol1[j] - icol1[0] > icol2[j] - icol2[0]) return 1;
169     }
170   }
171   return 0;
172 }
173 
174 /*
175   spbas_mergesort_icols:
176     return a sorting of the rows in which identical sparseness patterns are
177     next to each other
178 */
spbas_mergesort_icols(PetscInt nrows,PetscInt * irow_in,PetscInt * icol_in,PetscInt col_idx_type,PetscInt * isort)179 static PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt *irow_in, PetscInt *icol_in, PetscInt col_idx_type, PetscInt *isort)
180 {
181   PetscInt  istep;                /* Chunk-sizes of already sorted parts of arrays */
182   PetscInt  i, i1, i2;            /* Loop counters for (partly) sorted arrays */
183   PetscInt  istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
184   PetscInt *ialloc;               /* Allocated arrays */
185   PetscInt *iswap;                /* auxiliary pointers for swapping */
186   PetscInt *ihlp1;                /* Pointers to new version of arrays, */
187   PetscInt *ihlp2;                /* Pointers to previous version of arrays, */
188 
189   PetscFunctionBegin;
190   PetscCall(PetscMalloc1(nrows, &ialloc));
191 
192   ihlp1 = ialloc;
193   ihlp2 = isort;
194 
195   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
196   for (istep = 1; istep < nrows; istep *= 2) {
197     /*
198       Combine sorted parts
199           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
200       of ihlp2 and vhlp2
201 
202       into one sorted part
203           istart:istart+2*istep-1
204       of ihlp1 and vhlp1
205     */
206     for (istart = 0; istart < nrows; istart += 2 * istep) {
207       /* Set counters and bound array part endings */
208       i1    = istart;
209       i1end = i1 + istep;
210       if (i1end > nrows) i1end = nrows;
211       i2    = istart + istep;
212       i2end = i2 + istep;
213       if (i2end > nrows) i2end = nrows;
214 
215       /* Merge the two array parts */
216       for (i = istart; i < i2end; i++) {
217         if (i1 < i1end && i2 < i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
218           ihlp1[i] = ihlp2[i1];
219           i1++;
220         } else if (i2 < i2end) {
221           ihlp1[i] = ihlp2[i2];
222           i2++;
223         } else {
224           ihlp1[i] = ihlp2[i1];
225           i1++;
226         }
227       }
228     }
229 
230     /* Swap the two array sets */
231     iswap = ihlp2;
232     ihlp2 = ihlp1;
233     ihlp1 = iswap;
234   }
235 
236   /* Copy one more time in case the sorted arrays are the temporary ones */
237   if (ihlp2 != isort) {
238     for (i = 0; i < nrows; i++) isort[i] = ihlp2[i];
239   }
240   PetscCall(PetscFree(ialloc));
241   PetscFunctionReturn(PETSC_SUCCESS);
242 }
243 
244 /*
245   spbas_compress_pattern:
246      calculate a compressed sparseness pattern for a sparseness pattern
247      given in compressed row storage. The compressed sparseness pattern may
248      require (much) less memory.
249 */
spbas_compress_pattern(PetscInt * irow_in,PetscInt * icol_in,PetscInt nrows,PetscInt ncols,PetscInt col_idx_type,spbas_matrix * B,PetscReal * mem_reduction)250 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B, PetscReal *mem_reduction)
251 {
252   PetscInt        nnz      = irow_in[nrows];
253   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
254   size_t          mem_compressed;
255   PetscInt       *isort;
256   PetscInt       *icols;
257   PetscInt        row_nnz;
258   PetscInt       *ipoint;
259   PetscBool      *used;
260   PetscInt        ptr;
261   PetscInt        i, j;
262   const PetscBool no_values = PETSC_FALSE;
263 
264   PetscFunctionBegin;
265   /* Allocate the structure of the new matrix */
266   B->nrows        = nrows;
267   B->ncols        = ncols;
268   B->nnz          = nnz;
269   B->col_idx_type = col_idx_type;
270   B->block_data   = PETSC_TRUE;
271 
272   PetscCall(spbas_allocate_pattern(B, no_values));
273 
274   /* When using an offset array, set it */
275   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
276     for (i = 0; i < nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
277   }
278 
279   /* Allocate the ordering for the rows */
280   PetscCall(PetscMalloc1(nrows, &isort));
281   PetscCall(PetscMalloc1(nrows, &ipoint));
282   PetscCall(PetscCalloc1(nrows, &used));
283 
284   for (i = 0; i < nrows; i++) {
285     B->row_nnz[i] = irow_in[i + 1] - irow_in[i];
286     isort[i]      = i;
287     ipoint[i]     = i;
288   }
289 
290   /* Sort the rows so that identical columns will be next to each other */
291   PetscCall(spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort));
292   PetscCall(PetscInfo(NULL, "Rows have been sorted for patterns\n"));
293 
294   /* Replace identical rows with the first one in the list */
295   for (i = 1; i < nrows; i++) {
296     if (spbas_row_order_icol(isort[i - 1], isort[i], irow_in, icol_in, col_idx_type) == 0) ipoint[isort[i]] = ipoint[isort[i - 1]];
297   }
298 
299   /* Collect the rows which are used*/
300   for (i = 0; i < nrows; i++) used[ipoint[i]] = PETSC_TRUE;
301 
302   /* Calculate needed memory */
303   B->n_alloc_icol = 0;
304   for (i = 0; i < nrows; i++) {
305     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
306   }
307   PetscCall(PetscMalloc1(B->n_alloc_icol, &B->alloc_icol));
308 
309   /* Fill in the diagonal offsets for the rows which store their own data */
310   ptr = 0;
311   for (i = 0; i < B->nrows; i++) {
312     if (used[i]) {
313       B->icols[i] = &B->alloc_icol[ptr];
314       icols       = &icol_in[irow_in[i]];
315       row_nnz     = B->row_nnz[i];
316       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
317         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j];
318       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
319         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - i;
320       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
321         for (j = 0; j < row_nnz; j++) B->icols[i][j] = icols[j] - icols[0];
322       }
323       ptr += B->row_nnz[i];
324     }
325   }
326 
327   /* Point to the right places for all data */
328   for (i = 0; i < nrows; i++) B->icols[i] = B->icols[ipoint[i]];
329   PetscCall(PetscInfo(NULL, "Row patterns have been compressed\n"));
330   PetscCall(PetscInfo(NULL, "         (%g nonzeros per row)\n", (double)((PetscReal)nnz / (PetscReal)nrows)));
331 
332   PetscCall(PetscFree(isort));
333   PetscCall(PetscFree(used));
334   PetscCall(PetscFree(ipoint));
335 
336   mem_compressed = spbas_memory_requirement(*B);
337   *mem_reduction = 100.0 * (PetscReal)(mem_orig - mem_compressed) / (PetscReal)mem_orig;
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 /*
342    spbas_incomplete_cholesky
343        Incomplete Cholesky decomposition
344 */
345 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
346 
347 /*
348   spbas_delete : de-allocate the arrays owned by this matrix
349 */
spbas_delete(spbas_matrix matrix)350 PetscErrorCode spbas_delete(spbas_matrix matrix)
351 {
352   PetscInt i;
353 
354   PetscFunctionBegin;
355   if (matrix.block_data) {
356     PetscCall(PetscFree(matrix.alloc_icol));
357     if (matrix.values) PetscCall(PetscFree(matrix.alloc_val));
358   } else {
359     for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.icols[i]));
360     PetscCall(PetscFree(matrix.icols));
361     if (matrix.values) {
362       for (i = 0; i < matrix.nrows; i++) PetscCall(PetscFree(matrix.values[i]));
363     }
364   }
365 
366   PetscCall(PetscFree(matrix.row_nnz));
367   PetscCall(PetscFree(matrix.icols));
368   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) PetscCall(PetscFree(matrix.icol0));
369   PetscCall(PetscFree(matrix.values));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 /*
374 spbas_matrix_to_crs:
375    Convert an spbas_matrix to compessed row storage
376 */
spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar ** val_out,PetscInt ** irow_out,PetscInt ** icol_out)377 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A, MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
378 {
379   PetscInt     nrows = matrix_A.nrows;
380   PetscInt     nnz   = matrix_A.nnz;
381   PetscInt     i, j, r_nnz, i0;
382   PetscInt    *irow;
383   PetscInt    *icol;
384   PetscInt    *icol_A;
385   MatScalar   *val;
386   PetscScalar *val_A;
387   PetscInt     col_idx_type = matrix_A.col_idx_type;
388   PetscBool    do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
389 
390   PetscFunctionBegin;
391   PetscCall(PetscMalloc1(nrows + 1, &irow));
392   PetscCall(PetscMalloc1(nnz, &icol));
393   *icol_out = icol;
394   *irow_out = irow;
395   if (do_values) {
396     PetscCall(PetscMalloc1(nnz, &val));
397     *val_out  = val;
398     *icol_out = icol;
399     *irow_out = irow;
400   }
401 
402   irow[0] = 0;
403   for (i = 0; i < nrows; i++) {
404     r_nnz       = matrix_A.row_nnz[i];
405     i0          = irow[i];
406     irow[i + 1] = i0 + r_nnz;
407     icol_A      = matrix_A.icols[i];
408 
409     if (do_values) {
410       val_A = matrix_A.values[i];
411       for (j = 0; j < r_nnz; j++) {
412         icol[i0 + j] = icol_A[j];
413         val[i0 + j]  = val_A[j];
414       }
415     } else {
416       for (j = 0; j < r_nnz; j++) icol[i0 + j] = icol_A[j];
417     }
418 
419     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
420       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i;
421     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
422       i0 = matrix_A.icol0[i];
423       for (j = 0; j < r_nnz; j++) icol[i0 + j] += i0;
424     }
425   }
426   PetscFunctionReturn(PETSC_SUCCESS);
427 }
428 
429 /*
430     spbas_transpose
431        return the transpose of a matrix
432 */
spbas_transpose(spbas_matrix in_matrix,spbas_matrix * result)433 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix *result)
434 {
435   PetscInt     col_idx_type = in_matrix.col_idx_type;
436   PetscInt     nnz          = in_matrix.nnz;
437   PetscInt     ncols        = in_matrix.nrows;
438   PetscInt     nrows        = in_matrix.ncols;
439   PetscInt     i, j, k;
440   PetscInt     r_nnz;
441   PetscInt    *irow;
442   PetscInt     icol0 = 0;
443   PetscScalar *val;
444 
445   PetscFunctionBegin;
446   /* Copy input values */
447   result->nrows        = nrows;
448   result->ncols        = ncols;
449   result->nnz          = nnz;
450   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
451   result->block_data   = PETSC_TRUE;
452 
453   /* Allocate sparseness pattern */
454   PetscCall(spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
455 
456   /*  Count the number of nonzeros in each row */
457   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
458 
459   for (i = 0; i < ncols; i++) {
460     r_nnz = in_matrix.row_nnz[i];
461     irow  = in_matrix.icols[i];
462     if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
463       for (j = 0; j < r_nnz; j++) result->row_nnz[irow[j]]++;
464     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
465       for (j = 0; j < r_nnz; j++) result->row_nnz[i + irow[j]]++;
466     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
467       icol0 = in_matrix.icol0[i];
468       for (j = 0; j < r_nnz; j++) result->row_nnz[icol0 + irow[j]]++;
469     }
470   }
471 
472   /* Set the pointers to the data */
473   PetscCall(spbas_allocate_data(result));
474 
475   /* Reset the number of nonzeros in each row */
476   for (i = 0; i < nrows; i++) result->row_nnz[i] = 0;
477 
478   /* Fill the data arrays */
479   if (in_matrix.values) {
480     for (i = 0; i < ncols; i++) {
481       r_nnz = in_matrix.row_nnz[i];
482       irow  = in_matrix.icols[i];
483       val   = in_matrix.values[i];
484 
485       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
486       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
487       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
488       for (j = 0; j < r_nnz; j++) {
489         k                                     = icol0 + irow[j];
490         result->icols[k][result->row_nnz[k]]  = i;
491         result->values[k][result->row_nnz[k]] = val[j];
492         result->row_nnz[k]++;
493       }
494     }
495   } else {
496     for (i = 0; i < ncols; i++) {
497       r_nnz = in_matrix.row_nnz[i];
498       irow  = in_matrix.icols[i];
499 
500       if (col_idx_type == SPBAS_COLUMN_NUMBERS) icol0 = 0;
501       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
502       else if (col_idx_type == SPBAS_OFFSET_ARRAY) icol0 = in_matrix.icol0[i];
503 
504       for (j = 0; j < r_nnz; j++) {
505         k                                    = icol0 + irow[j];
506         result->icols[k][result->row_nnz[k]] = i;
507         result->row_nnz[k]++;
508       }
509     }
510   }
511   PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 
514 /*
515    spbas_mergesort
516 
517       mergesort for an array of integers and an array of associated
518       reals
519 
520       on output, icol[0..nnz-1] is increasing;
521                   val[0..nnz-1] has undergone the same permutation as icol
522 
523       NB: val may be NULL: in that case, only the integers are sorted
524 
525 */
spbas_mergesort(PetscInt nnz,PetscInt * icol,PetscScalar * val)526 static PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
527 {
528   PetscInt     istep;                /* Chunk-sizes of already sorted parts of arrays */
529   PetscInt     i, i1, i2;            /* Loop counters for (partly) sorted arrays */
530   PetscInt     istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
531   PetscInt    *ialloc;               /* Allocated arrays */
532   PetscScalar *valloc = NULL;
533   PetscInt    *iswap; /* auxiliary pointers for swapping */
534   PetscScalar *vswap;
535   PetscInt    *ihlp1;        /* Pointers to new version of arrays, */
536   PetscScalar *vhlp1 = NULL; /* (arrays under construction) */
537   PetscInt    *ihlp2;        /* Pointers to previous version of arrays, */
538   PetscScalar *vhlp2 = NULL;
539 
540   PetscFunctionBegin;
541   PetscCall(PetscMalloc1(nnz, &ialloc));
542   ihlp1 = ialloc;
543   ihlp2 = icol;
544 
545   if (val) {
546     PetscCall(PetscMalloc1(nnz, &valloc));
547     vhlp1 = valloc;
548     vhlp2 = val;
549   }
550 
551   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
552   for (istep = 1; istep < nnz; istep *= 2) {
553     /*
554       Combine sorted parts
555           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
556       of ihlp2 and vhlp2
557 
558       into one sorted part
559           istart:istart+2*istep-1
560       of ihlp1 and vhlp1
561     */
562     for (istart = 0; istart < nnz; istart += 2 * istep) {
563       /* Set counters and bound array part endings */
564       i1    = istart;
565       i1end = i1 + istep;
566       if (i1end > nnz) i1end = nnz;
567       i2    = istart + istep;
568       i2end = i2 + istep;
569       if (i2end > nnz) i2end = nnz;
570 
571       /* Merge the two array parts */
572       if (val) {
573         for (i = istart; i < i2end; i++) {
574           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
575             ihlp1[i] = ihlp2[i1];
576             vhlp1[i] = vhlp2[i1];
577             i1++;
578           } else if (i2 < i2end) {
579             ihlp1[i] = ihlp2[i2];
580             vhlp1[i] = vhlp2[i2];
581             i2++;
582           } else {
583             ihlp1[i] = ihlp2[i1];
584             vhlp1[i] = vhlp2[i1];
585             i1++;
586           }
587         }
588       } else {
589         for (i = istart; i < i2end; i++) {
590           if (i1 < i1end && i2 < i2end && ihlp2[i1] < ihlp2[i2]) {
591             ihlp1[i] = ihlp2[i1];
592             i1++;
593           } else if (i2 < i2end) {
594             ihlp1[i] = ihlp2[i2];
595             i2++;
596           } else {
597             ihlp1[i] = ihlp2[i1];
598             i1++;
599           }
600         }
601       }
602     }
603 
604     /* Swap the two array sets */
605     iswap = ihlp2;
606     ihlp2 = ihlp1;
607     ihlp1 = iswap;
608     vswap = vhlp2;
609     vhlp2 = vhlp1;
610     vhlp1 = vswap;
611   }
612 
613   /* Copy one more time in case the sorted arrays are the temporary ones */
614   if (ihlp2 != icol) {
615     for (i = 0; i < nnz; i++) icol[i] = ihlp2[i];
616     if (val) {
617       for (i = 0; i < nnz; i++) val[i] = vhlp2[i];
618     }
619   }
620 
621   PetscCall(PetscFree(ialloc));
622   if (val) PetscCall(PetscFree(valloc));
623   PetscFunctionReturn(PETSC_SUCCESS);
624 }
625 
626 /*
627   spbas_apply_reordering_rows:
628     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
629 */
spbas_apply_reordering_rows(spbas_matrix * matrix_A,const PetscInt * permutation)630 static PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
631 {
632   PetscInt      i, j, ip;
633   PetscInt      nrows = matrix_A->nrows;
634   PetscInt     *row_nnz;
635   PetscInt    **icols;
636   PetscBool     do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
637   PetscScalar **vals      = NULL;
638 
639   PetscFunctionBegin;
640   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
641 
642   if (do_values) PetscCall(PetscMalloc1(nrows, &vals));
643   PetscCall(PetscMalloc1(nrows, &row_nnz));
644   PetscCall(PetscMalloc1(nrows, &icols));
645 
646   for (i = 0; i < nrows; i++) {
647     ip = permutation[i];
648     if (do_values) vals[i] = matrix_A->values[ip];
649     icols[i]   = matrix_A->icols[ip];
650     row_nnz[i] = matrix_A->row_nnz[ip];
651     for (j = 0; j < row_nnz[i]; j++) icols[i][j] += ip - i;
652   }
653 
654   if (do_values) PetscCall(PetscFree(matrix_A->values));
655   PetscCall(PetscFree(matrix_A->icols));
656   PetscCall(PetscFree(matrix_A->row_nnz));
657 
658   if (do_values) matrix_A->values = vals;
659   matrix_A->icols   = icols;
660   matrix_A->row_nnz = row_nnz;
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665   spbas_apply_reordering_cols:
666     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
667 */
spbas_apply_reordering_cols(spbas_matrix * matrix_A,const PetscInt * permutation)668 static PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A, const PetscInt *permutation)
669 {
670   PetscInt     i, j;
671   PetscInt     nrows = matrix_A->nrows;
672   PetscInt     row_nnz;
673   PetscInt    *icols;
674   PetscBool    do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
675   PetscScalar *vals      = NULL;
676 
677   PetscFunctionBegin;
678   PetscCheck(matrix_A->col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
679 
680   for (i = 0; i < nrows; i++) {
681     icols   = matrix_A->icols[i];
682     row_nnz = matrix_A->row_nnz[i];
683     if (do_values) vals = matrix_A->values[i];
684 
685     for (j = 0; j < row_nnz; j++) icols[j] = permutation[i + icols[j]] - i;
686     PetscCall(spbas_mergesort(row_nnz, icols, vals));
687   }
688   PetscFunctionReturn(PETSC_SUCCESS);
689 }
690 
691 /*
692   spbas_apply_reordering:
693     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
694 */
spbas_apply_reordering(spbas_matrix * matrix_A,const PetscInt * permutation,const PetscInt * inv_perm)695 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt *inv_perm)
696 {
697   PetscFunctionBegin;
698   PetscCall(spbas_apply_reordering_rows(matrix_A, inv_perm));
699   PetscCall(spbas_apply_reordering_cols(matrix_A, permutation));
700   PetscFunctionReturn(PETSC_SUCCESS);
701 }
702 
spbas_pattern_only(PetscInt nrows,PetscInt ncols,PetscInt * ai,PetscInt * aj,spbas_matrix * result)703 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix *result)
704 {
705   spbas_matrix retval;
706   PetscInt     i, j, i0, r_nnz;
707 
708   PetscFunctionBegin;
709   /* Copy input values */
710   retval.nrows = nrows;
711   retval.ncols = ncols;
712   retval.nnz   = ai[nrows];
713 
714   retval.block_data   = PETSC_TRUE;
715   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
716 
717   /* Allocate output matrix */
718   PetscCall(spbas_allocate_pattern(&retval, PETSC_FALSE));
719   for (i = 0; i < nrows; i++) retval.row_nnz[i] = ai[i + 1] - ai[i];
720   PetscCall(spbas_allocate_data(&retval));
721   /* Copy the structure */
722   for (i = 0; i < retval.nrows; i++) {
723     i0    = ai[i];
724     r_nnz = ai[i + 1] - i0;
725 
726     for (j = 0; j < r_nnz; j++) retval.icols[i][j] = aj[i0 + j] - i;
727   }
728   *result = retval;
729   PetscFunctionReturn(PETSC_SUCCESS);
730 }
731 
732 /*
733    spbas_mark_row_power:
734       Mark the columns in row 'row' which are nonzero in
735           matrix^2log(marker).
736 */
spbas_mark_row_power(PetscInt * iwork,PetscInt row,spbas_matrix * in_matrix,PetscInt marker,PetscInt minmrk,PetscInt maxmrk)737 static PetscErrorCode spbas_mark_row_power(PetscInt     *iwork,     /* marker-vector */
738                                            PetscInt      row,       /* row for which the columns are marked */
739                                            spbas_matrix *in_matrix, /* matrix for which the power is being  calculated */
740                                            PetscInt      marker,    /* marker-value: 2^power */
741                                            PetscInt      minmrk,    /* lower bound for marked points */
742                                            PetscInt      maxmrk)         /* upper bound for marked points */
743 {
744   PetscInt i, j, nnz;
745 
746   PetscFunctionBegin;
747   nnz = in_matrix->row_nnz[row];
748 
749   /* For higher powers, call this function recursively */
750   if (marker > 1) {
751     for (i = 0; i < nnz; i++) {
752       j = row + in_matrix->icols[row][i];
753       if (minmrk <= j && j < maxmrk && iwork[j] < marker) {
754         PetscCall(spbas_mark_row_power(iwork, row + in_matrix->icols[row][i], in_matrix, marker / 2, minmrk, maxmrk));
755         iwork[j] |= marker;
756       }
757     }
758   } else {
759     /*  Mark the columns reached */
760     for (i = 0; i < nnz; i++) {
761       j = row + in_matrix->icols[row][i];
762       if (minmrk <= j && j < maxmrk) iwork[j] |= 1;
763     }
764   }
765   PetscFunctionReturn(PETSC_SUCCESS);
766 }
767 
768 /*
769    spbas_power
770       Calculate sparseness patterns for incomplete Cholesky decompositions
771       of a given order: (almost) all nonzeros of the matrix^(order+1) which
772       are inside the band width are found and stored in the output sparseness
773       pattern.
774 */
spbas_power(spbas_matrix in_matrix,PetscInt power,spbas_matrix * result)775 PetscErrorCode spbas_power(spbas_matrix in_matrix, PetscInt power, spbas_matrix *result)
776 {
777   spbas_matrix retval;
778   PetscInt     nrows = in_matrix.nrows;
779   PetscInt     ncols = in_matrix.ncols;
780   PetscInt     i, j, kend;
781   PetscInt     nnz, inz;
782   PetscInt    *iwork;
783   PetscInt     marker;
784   PetscInt     maxmrk = 0;
785 
786   PetscFunctionBegin;
787   PetscCheck(in_matrix.col_idx_type == SPBAS_DIAGONAL_OFFSETS, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern");
788   PetscCheck(ncols == nrows, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error");
789   PetscCheck(!in_matrix.values, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
790   PetscCheck(power > 0, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
791 
792   /* Copy input values*/
793   retval.nrows        = ncols;
794   retval.ncols        = nrows;
795   retval.nnz          = 0;
796   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
797   retval.block_data   = PETSC_FALSE;
798 
799   /* Allocate sparseness pattern */
800   PetscCall(spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE));
801 
802   /* Allocate marker array: note sure the max needed so use the max of the two */
803   PetscCall(PetscCalloc1(PetscMax(ncols, nrows), &iwork));
804 
805   /* Calculate marker values */
806   marker = 1;
807   for (i = 1; i < power; i++) marker *= 2;
808 
809   for (i = 0; i < nrows; i++) {
810     /* Calculate the pattern for each row */
811 
812     nnz  = in_matrix.row_nnz[i];
813     kend = i + in_matrix.icols[i][nnz - 1];
814     if (maxmrk <= kend) maxmrk = kend + 1;
815     PetscCall(spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk));
816 
817     /* Count the columns*/
818     nnz = 0;
819     for (j = i; j < maxmrk; j++) nnz += (iwork[j] != 0);
820 
821     /* Allocate the column indices */
822     retval.row_nnz[i] = nnz;
823     PetscCall(PetscMalloc1(nnz, &retval.icols[i]));
824 
825     /* Administrate the column indices */
826     inz = 0;
827     for (j = i; j < maxmrk; j++) {
828       if (iwork[j]) {
829         retval.icols[i][inz] = j - i;
830         inz++;
831         iwork[j] = 0;
832       }
833     }
834     retval.nnz += nnz;
835   }
836   PetscCall(PetscFree(iwork));
837   *result = retval;
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 /*
842    spbas_keep_upper:
843       remove the lower part of the matrix: keep the upper part
844 */
spbas_keep_upper(spbas_matrix * inout_matrix)845 PetscErrorCode spbas_keep_upper(spbas_matrix *inout_matrix)
846 {
847   PetscInt i, j;
848   PetscInt jstart;
849 
850   PetscFunctionBegin;
851   PetscCheck(!inout_matrix->block_data, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices");
852   for (i = 0; i < inout_matrix->nrows; i++) {
853     for (jstart = 0; (jstart < inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart] < 0); jstart++) { }
854     if (jstart > 0) {
855       for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->icols[i][j] = inout_matrix->icols[i][j + jstart];
856 
857       if (inout_matrix->values) {
858         for (j = 0; j < inout_matrix->row_nnz[i] - jstart; j++) inout_matrix->values[i][j] = inout_matrix->values[i][j + jstart];
859       }
860 
861       inout_matrix->row_nnz[i] -= jstart;
862 
863       inout_matrix->icols[i] = (PetscInt *)realloc((void *)inout_matrix->icols[i], inout_matrix->row_nnz[i] * sizeof(PetscInt));
864 
865       if (inout_matrix->values) inout_matrix->values[i] = (PetscScalar *)realloc((void *)inout_matrix->values[i], inout_matrix->row_nnz[i] * sizeof(PetscScalar));
866       inout_matrix->nnz -= jstart;
867     }
868   }
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871