xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 4e278199b78715991f5c71ebbd945c1489263e6c)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: PCFactorSetMatSolverType(), MatSolverType, PCFactorSetLevels(), PCFactorSetDropTolerance()
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix)
31 {
32   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
33                     sizeof(PetscBool)               + /* block_data */
34                     sizeof(PetscScalar**)           + /* values */
35                     sizeof(PetscScalar*)            + /* alloc_val */
36                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
37                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
38                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
39                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
40 
41   /* icol0[*] */
42   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
43 
44   /* icols[*][*] */
45   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
46   else memreq += matrix.nnz * sizeof(PetscInt);
47 
48   if (matrix.values) {
49     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
50     /* values[*][*] */
51     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
52     else memreq += matrix.nnz * sizeof(PetscScalar);
53   }
54   return memreq;
55 }
56 
57 /*
58   spbas_allocate_pattern:
59     allocate the pattern arrays row_nnz, icols and optionally values
60 */
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
62 {
63   PetscErrorCode ierr;
64   PetscInt       nrows        = result->nrows;
65   PetscInt       col_idx_type = result->col_idx_type;
66 
67   PetscFunctionBegin;
68   /* Allocate sparseness pattern */
69   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
70   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
71 
72   /* If offsets are given wrt an array, create array */
73   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
75   } else  {
76     result->icol0 = NULL;
77   }
78 
79   /* If values are given, allocate values array */
80   if (do_values)  {
81     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
82   } else {
83     result->values = NULL;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
97 {
98   PetscInt       i;
99   PetscInt       nnz   = result->nnz;
100   PetscInt       nrows = result->nrows;
101   PetscInt       r_nnz;
102   PetscErrorCode ierr;
103   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
104   PetscBool      block_data = result->block_data;
105 
106   PetscFunctionBegin;
107   if (block_data) {
108     /* Allocate the column number array and point to it */
109     result->n_alloc_icol = nnz;
110 
111     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
112 
113     result->icols[0] = result->alloc_icol;
114     for (i=1; i<nrows; i++)  {
115       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116     }
117 
118     /* Allocate the value array and point to it */
119     if (do_values) {
120       result->n_alloc_val = nnz;
121 
122       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
123 
124       result->values[0] = result->alloc_val;
125       for (i=1; i<nrows; i++) {
126         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
127       }
128     }
129   } else {
130     for (i=0; i<nrows; i++)   {
131       r_nnz = result->row_nnz[i];
132       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
133     }
134     if (do_values) {
135       for (i=0; i<nrows; i++)  {
136         r_nnz = result->row_nnz[i];
137         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
138       }
139     }
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145   spbas_row_order_icol
146      determine if row i1 should come
147        + before row i2 in the sorted rows (return -1),
148        + after (return 1)
149        + is identical (return 0).
150 */
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153   PetscInt j;
154   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
155   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
156   PetscInt * icol1 = &icol_in[irow_in[i1]];
157   PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159   if (nnz1<nnz2) return -1;
160   if (nnz1>nnz2) return 1;
161 
162   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163     for (j=0; j<nnz1; j++) {
164       if (icol1[j]< icol2[j]) return -1;
165       if (icol1[j]> icol2[j]) return 1;
166     }
167   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168     for (j=0; j<nnz1; j++) {
169       if (icol1[j]-i1< icol2[j]-i2) return -1;
170       if (icol1[j]-i1> icol2[j]-i2) return 1;
171     }
172   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173     for (j=1; j<nnz1; j++) {
174       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
175       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
176     }
177   }
178   return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
187 {
188   PetscErrorCode ierr;
189   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
190   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
191   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
192   PetscInt       *ialloc;     /* Allocated arrays */
193   PetscInt       *iswap;      /* auxiliary pointers for swapping */
194   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
195   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
196 
197   PetscFunctionBegin;
198   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
199 
200   ihlp1 = ialloc;
201   ihlp2 = isort;
202 
203   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
204   for (istep=1; istep<nrows; istep*=2) {
205     /*
206       Combine sorted parts
207           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
208       of ihlp2 and vhlp2
209 
210       into one sorted part
211           istart:istart+2*istep-1
212       of ihlp1 and vhlp1
213     */
214     for (istart=0; istart<nrows; istart+=2*istep) {
215       /* Set counters and bound array part endings */
216       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
217       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
218 
219       /* Merge the two array parts */
220       for (i=istart; i<i2end; i++)  {
221         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
222           ihlp1[i] = ihlp2[i1];
223           i1++;
224         } else if (i2<i2end) {
225           ihlp1[i] = ihlp2[i2];
226           i2++;
227         } else {
228           ihlp1[i] = ihlp2[i1];
229           i1++;
230         }
231       }
232     }
233 
234     /* Swap the two array sets */
235     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
236   }
237 
238   /* Copy one more time in case the sorted arrays are the temporary ones */
239   if (ihlp2 != isort) {
240     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
241   }
242   ierr = PetscFree(ialloc);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 /*
247   spbas_compress_pattern:
248      calculate a compressed sparseness pattern for a sparseness pattern
249      given in compressed row storage. The compressed sparseness pattern may
250      require (much) less memory.
251 */
252 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
253 {
254   PetscInt        nnz      = irow_in[nrows];
255   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
256   size_t          mem_compressed;
257   PetscErrorCode  ierr;
258   PetscInt        *isort;
259   PetscInt        *icols;
260   PetscInt        row_nnz;
261   PetscInt        *ipoint;
262   PetscBool       *used;
263   PetscInt        ptr;
264   PetscInt        i,j;
265   const PetscBool no_values = PETSC_FALSE;
266 
267   PetscFunctionBegin;
268   /* Allocate the structure of the new matrix */
269   B->nrows        = nrows;
270   B->ncols        = ncols;
271   B->nnz          = nnz;
272   B->col_idx_type = col_idx_type;
273   B->block_data   = PETSC_TRUE;
274 
275   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
276 
277   /* When using an offset array, set it */
278   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
279     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
280   }
281 
282   /* Allocate the ordering for the rows */
283   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
284   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
285   ierr = PetscCalloc1(nrows,&used);CHKERRQ(ierr);
286 
287   for (i = 0; i<nrows; i++)  {
288     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
289     isort[i]      = i;
290     ipoint[i]     = i;
291   }
292 
293   /* Sort the rows so that identical columns will be next to each other */
294   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
295   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
296 
297   /* Replace identical rows with the first one in the list */
298   for (i=1; i<nrows; i++) {
299     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
300       ipoint[isort[i]] = ipoint[isort[i-1]];
301     }
302   }
303 
304   /* Collect the rows which are used*/
305   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
306 
307   /* Calculate needed memory */
308   B->n_alloc_icol = 0;
309   for (i=0; i<nrows; i++)  {
310     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
311   }
312   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
313 
314   /* Fill in the diagonal offsets for the rows which store their own data */
315   ptr = 0;
316   for (i=0; i<B->nrows; i++) {
317     if (used[i]) {
318       B->icols[i] = &B->alloc_icol[ptr];
319       icols = &icol_in[irow_in[i]];
320       row_nnz = B->row_nnz[i];
321       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
322         for (j=0; j<row_nnz; j++) {
323           B->icols[i][j] = icols[j];
324         }
325       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
326         for (j=0; j<row_nnz; j++) {
327           B->icols[i][j] = icols[j]-i;
328         }
329       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
330         for (j=0; j<row_nnz; j++) {
331           B->icols[i][j] = icols[j]-icols[0];
332         }
333       }
334       ptr += B->row_nnz[i];
335     }
336   }
337 
338   /* Point to the right places for all data */
339   for (i=0; i<nrows; i++) {
340     B->icols[i] = B->icols[ipoint[i]];
341   }
342   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
343   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
344 
345   ierr=PetscFree(isort);CHKERRQ(ierr);
346   ierr=PetscFree(used);CHKERRQ(ierr);
347   ierr=PetscFree(ipoint);CHKERRQ(ierr);
348 
349   mem_compressed = spbas_memory_requirement(*B);
350   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
351   PetscFunctionReturn(0);
352 }
353 
354 /*
355    spbas_incomplete_cholesky
356        Incomplete Cholesky decomposition
357 */
358 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
359 
360 /*
361   spbas_delete : de-allocate the arrays owned by this matrix
362 */
363 PetscErrorCode spbas_delete(spbas_matrix matrix)
364 {
365   PetscInt       i;
366   PetscErrorCode ierr;
367 
368   PetscFunctionBegin;
369   if (matrix.block_data) {
370     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
371     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
372   } else {
373     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
374     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
375     if (matrix.values) {
376       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
377     }
378   }
379 
380   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
381   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
382   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
383   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 /*
388 spbas_matrix_to_crs:
389    Convert an spbas_matrix to compessed row storage
390 */
391 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
392 {
393   PetscInt       nrows = matrix_A.nrows;
394   PetscInt       nnz   = matrix_A.nnz;
395   PetscInt       i,j,r_nnz,i0;
396   PetscInt       *irow;
397   PetscInt       *icol;
398   PetscInt       *icol_A;
399   MatScalar      *val;
400   PetscScalar    *val_A;
401   PetscInt       col_idx_type = matrix_A.col_idx_type;
402   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
403   PetscErrorCode ierr;
404 
405   PetscFunctionBegin;
406   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
407   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
408   *icol_out = icol;
409   *irow_out = irow;
410   if (do_values) {
411     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
412     *val_out = val; *icol_out = icol; *irow_out=irow;
413   }
414 
415   irow[0]=0;
416   for (i=0; i<nrows; i++) {
417     r_nnz     = matrix_A.row_nnz[i];
418     i0        = irow[i];
419     irow[i+1] = i0 + r_nnz;
420     icol_A    = matrix_A.icols[i];
421 
422     if (do_values) {
423       val_A = matrix_A.values[i];
424       for (j=0; j<r_nnz; j++) {
425         icol[i0+j] = icol_A[j];
426         val[i0+j]  = val_A[j];
427       }
428     } else {
429       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
430     }
431 
432     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
433       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
434     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
435       i0 = matrix_A.icol0[i];
436       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
437     }
438   }
439   PetscFunctionReturn(0);
440 }
441 
442 /*
443     spbas_transpose
444        return the transpose of a matrix
445 */
446 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
447 {
448   PetscInt       col_idx_type = in_matrix.col_idx_type;
449   PetscInt       nnz          = in_matrix.nnz;
450   PetscInt       ncols        = in_matrix.nrows;
451   PetscInt       nrows        = in_matrix.ncols;
452   PetscInt       i,j,k;
453   PetscInt       r_nnz;
454   PetscInt       *irow;
455   PetscInt       icol0 = 0;
456   PetscScalar    * val;
457   PetscErrorCode ierr;
458 
459   PetscFunctionBegin;
460   /* Copy input values */
461   result->nrows        = nrows;
462   result->ncols        = ncols;
463   result->nnz          = nnz;
464   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
465   result->block_data   = PETSC_TRUE;
466 
467   /* Allocate sparseness pattern */
468   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
469 
470   /*  Count the number of nonzeros in each row */
471   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
472 
473   for (i=0; i<ncols; i++) {
474     r_nnz = in_matrix.row_nnz[i];
475     irow  = in_matrix.icols[i];
476     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
477       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
478     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
479       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
480     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
481       icol0=in_matrix.icol0[i];
482       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
483     }
484   }
485 
486   /* Set the pointers to the data */
487   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
488 
489   /* Reset the number of nonzeros in each row */
490   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
491 
492   /* Fill the data arrays */
493   if (in_matrix.values) {
494     for (i=0; i<ncols; i++) {
495       r_nnz = in_matrix.row_nnz[i];
496       irow  = in_matrix.icols[i];
497       val   = in_matrix.values[i];
498 
499       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
500       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
501       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
502       for (j=0; j<r_nnz; j++)  {
503         k = icol0 + irow[j];
504         result->icols[k][result->row_nnz[k]]  = i;
505         result->values[k][result->row_nnz[k]] = val[j];
506         result->row_nnz[k]++;
507       }
508     }
509   } else {
510     for (i=0; i<ncols; i++) {
511       r_nnz = in_matrix.row_nnz[i];
512       irow  = in_matrix.icols[i];
513 
514       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
515       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
516       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
517 
518       for (j=0; j<r_nnz; j++) {
519         k = icol0 + irow[j];
520         result->icols[k][result->row_nnz[k]] = i;
521         result->row_nnz[k]++;
522       }
523     }
524   }
525   PetscFunctionReturn(0);
526 }
527 
528 /*
529    spbas_mergesort
530 
531       mergesort for an array of integers and an array of associated
532       reals
533 
534       on output, icol[0..nnz-1] is increasing;
535                   val[0..nnz-1] has undergone the same permutation as icol
536 
537       NB: val may be NULL: in that case, only the integers are sorted
538 
539 */
540 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
541 {
542   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
543   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
544   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
545   PetscInt       *ialloc;     /* Allocated arrays */
546   PetscScalar    *valloc=NULL;
547   PetscInt       *iswap;      /* auxiliary pointers for swapping */
548   PetscScalar    *vswap;
549   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
550   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
551   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
552   PetscScalar    *vhlp2=NULL;
553   PetscErrorCode ierr;
554 
555   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
556   ihlp1 = ialloc;
557   ihlp2 = icol;
558 
559   if (val) {
560     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
561     vhlp1 = valloc;
562     vhlp2 = val;
563   }
564 
565   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
566   for (istep=1; istep<nnz; istep*=2) {
567     /*
568       Combine sorted parts
569           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
570       of ihlp2 and vhlp2
571 
572       into one sorted part
573           istart:istart+2*istep-1
574       of ihlp1 and vhlp1
575     */
576     for (istart=0; istart<nnz; istart+=2*istep) {
577       /* Set counters and bound array part endings */
578       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
579       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
580 
581       /* Merge the two array parts */
582       if (val) {
583         for (i=istart; i<i2end; i++) {
584           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
585             ihlp1[i] = ihlp2[i1];
586             vhlp1[i] = vhlp2[i1];
587             i1++;
588           } else if (i2<i2end) {
589             ihlp1[i] = ihlp2[i2];
590             vhlp1[i] = vhlp2[i2];
591             i2++;
592           } else {
593             ihlp1[i] = ihlp2[i1];
594             vhlp1[i] = vhlp2[i1];
595             i1++;
596           }
597         }
598       } else {
599         for (i=istart; i<i2end; i++) {
600           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
601             ihlp1[i] = ihlp2[i1];
602             i1++;
603           } else if (i2<i2end) {
604             ihlp1[i] = ihlp2[i2];
605             i2++;
606           } else {
607             ihlp1[i] = ihlp2[i1];
608             i1++;
609           }
610         }
611       }
612     }
613 
614     /* Swap the two array sets */
615     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
616     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
617   }
618 
619   /* Copy one more time in case the sorted arrays are the temporary ones */
620   if (ihlp2 != icol) {
621     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
622     if (val) {
623       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
624     }
625   }
626 
627   ierr = PetscFree(ialloc);CHKERRQ(ierr);
628   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
629   PetscFunctionReturn(0);
630 }
631 
632 /*
633   spbas_apply_reordering_rows:
634     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
635 */
636 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
637 {
638   PetscInt       i,j,ip;
639   PetscInt       nrows=matrix_A->nrows;
640   PetscInt       * row_nnz;
641   PetscInt       **icols;
642   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
643   PetscScalar    **vals    = NULL;
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
648 
649   if (do_values) {
650     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
651   }
652   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
653   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
654 
655   for (i=0; i<nrows; i++) {
656     ip = permutation[i];
657     if (do_values) vals[i] = matrix_A->values[ip];
658     icols[i]   = matrix_A->icols[ip];
659     row_nnz[i] = matrix_A->row_nnz[ip];
660     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
661   }
662 
663   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
664   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
665   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
666 
667   if (do_values) matrix_A->values = vals;
668   matrix_A->icols   = icols;
669   matrix_A->row_nnz = row_nnz;
670   PetscFunctionReturn(0);
671 }
672 
673 /*
674   spbas_apply_reordering_cols:
675     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
676 */
677 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
678 {
679   PetscInt       i,j;
680   PetscInt       nrows=matrix_A->nrows;
681   PetscInt       row_nnz;
682   PetscInt       *icols;
683   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
684   PetscScalar    *vals     = NULL;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
689 
690   for (i=0; i<nrows; i++) {
691     icols   = matrix_A->icols[i];
692     row_nnz = matrix_A->row_nnz[i];
693     if (do_values) vals = matrix_A->values[i];
694 
695     for (j=0; j<row_nnz; j++) {
696       icols[j] = permutation[i+icols[j]]-i;
697     }
698     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 /*
704   spbas_apply_reordering:
705     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
706 */
707 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
708 {
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
713   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
718 {
719   spbas_matrix   retval;
720   PetscInt       i, j, i0, r_nnz;
721   PetscErrorCode ierr;
722 
723   PetscFunctionBegin;
724   /* Copy input values */
725   retval.nrows = nrows;
726   retval.ncols = ncols;
727   retval.nnz   = ai[nrows];
728 
729   retval.block_data   = PETSC_TRUE;
730   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
731 
732   /* Allocate output matrix */
733   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
734   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
735   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
736   /* Copy the structure */
737   for (i = 0; i<retval.nrows; i++)  {
738     i0    = ai[i];
739     r_nnz = ai[i+1]-i0;
740 
741     for (j=0; j<r_nnz; j++) {
742       retval.icols[i][j] = aj[i0+j]-i;
743     }
744   }
745   *result = retval;
746   PetscFunctionReturn(0);
747 }
748 
749 /*
750    spbas_mark_row_power:
751       Mark the columns in row 'row' which are nonzero in
752           matrix^2log(marker).
753 */
754 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
755                                     PetscInt row,                /* row for which the columns are marked */
756                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
757                                     PetscInt marker,             /* marker-value: 2^power */
758                                     PetscInt minmrk,             /* lower bound for marked points */
759                                     PetscInt maxmrk)             /* upper bound for marked points */
760 {
761   PetscErrorCode ierr;
762   PetscInt       i,j, nnz;
763 
764   PetscFunctionBegin;
765   nnz = in_matrix->row_nnz[row];
766 
767   /* For higher powers, call this function recursively */
768   if (marker>1) {
769     for (i=0; i<nnz; i++) {
770       j = row + in_matrix->icols[row][i];
771       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
772         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
773         iwork[j] |= marker;
774       }
775     }
776   } else {
777     /*  Mark the columns reached */
778     for (i=0; i<nnz; i++)  {
779       j = row + in_matrix->icols[row][i];
780       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
781     }
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 /*
787    spbas_power
788       Calculate sparseness patterns for incomplete Cholesky decompositions
789       of a given order: (almost) all nonzeros of the matrix^(order+1) which
790       are inside the band width are found and stored in the output sparseness
791       pattern.
792 */
793 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
794 {
795   spbas_matrix   retval;
796   PetscInt       nrows = in_matrix.nrows;
797   PetscInt       ncols = in_matrix.ncols;
798   PetscInt       i, j, kend;
799   PetscInt       nnz, inz;
800   PetscInt       *iwork;
801   PetscInt       marker;
802   PetscInt       maxmrk=0;
803   PetscErrorCode ierr;
804 
805   PetscFunctionBegin;
806   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
807   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
808   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
809   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
810 
811   /* Copy input values*/
812   retval.nrows        = ncols;
813   retval.ncols        = nrows;
814   retval.nnz          = 0;
815   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
816   retval.block_data   = PETSC_FALSE;
817 
818   /* Allocate sparseness pattern */
819   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
820 
821   /* Allocate marker array: note sure the max needed so use the max of the two */
822   ierr = PetscCalloc1(PetscMax(ncols,nrows), &iwork);CHKERRQ(ierr);
823 
824   /* Calculate marker values */
825   marker = 1; for (i=1; i<power; i++) marker*=2;
826 
827   for (i=0; i<nrows; i++)  {
828     /* Calculate the pattern for each row */
829 
830     nnz  = in_matrix.row_nnz[i];
831     kend = i+in_matrix.icols[i][nnz-1];
832     if (maxmrk<=kend) maxmrk=kend+1;
833     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
834 
835     /* Count the columns*/
836     nnz = 0;
837     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
838 
839     /* Allocate the column indices */
840     retval.row_nnz[i] = nnz;
841     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
842 
843     /* Administrate the column indices */
844     inz = 0;
845     for (j=i; j<maxmrk; j++) {
846       if (iwork[j]) {
847         retval.icols[i][inz] = j-i;
848         inz++;
849         iwork[j] = 0;
850       }
851     }
852     retval.nnz += nnz;
853   };
854   ierr    = PetscFree(iwork);CHKERRQ(ierr);
855   *result = retval;
856   PetscFunctionReturn(0);
857 }
858 
859 /*
860    spbas_keep_upper:
861       remove the lower part of the matrix: keep the upper part
862 */
863 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
864 {
865   PetscInt i, j;
866   PetscInt jstart;
867 
868   PetscFunctionBegin;
869   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
870   for (i=0; i<inout_matrix->nrows; i++)  {
871     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
872     if (jstart>0) {
873       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
874         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
875       }
876 
877       if (inout_matrix->values) {
878         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
879           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
880         }
881       }
882 
883       inout_matrix->row_nnz[i] -= jstart;
884 
885       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
886 
887       if (inout_matrix->values) {
888         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
889       }
890       inout_matrix->nnz -= jstart;
891     }
892   }
893   PetscFunctionReturn(0);
894 }
895