xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 5b6bfdb9644f185dbf5e5a09b808ec241507e1e7)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes: Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
18      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
19      we recommend not using this functionality.
20 
21 .seealso: PCFactorSetMatSolverType(), MatSolverType, PCFactorSetLevels(), PCFactorSetDropTolerance()
22 
23 M*/
24 
25 /*
26   spbas_memory_requirement:
27     Calculate the number of bytes needed to store tha matrix
28 */
29 size_t spbas_memory_requirement(spbas_matrix matrix)
30 {
31   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
32                     sizeof(PetscBool)               + /* block_data */
33                     sizeof(PetscScalar**)           + /* values */
34                     sizeof(PetscScalar*)            + /* alloc_val */
35                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
36                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
37                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
38                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
39 
40   /* icol0[*] */
41   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
42 
43   /* icols[*][*] */
44   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
45   else memreq += matrix.nnz * sizeof(PetscInt);
46 
47   if (matrix.values) {
48     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
49     /* values[*][*] */
50     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
51     else memreq += matrix.nnz * sizeof(PetscScalar);
52   }
53   return memreq;
54 }
55 
56 /*
57   spbas_allocate_pattern:
58     allocate the pattern arrays row_nnz, icols and optionally values
59 */
60 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
61 {
62   PetscErrorCode ierr;
63   PetscInt       nrows        = result->nrows;
64   PetscInt       col_idx_type = result->col_idx_type;
65 
66   PetscFunctionBegin;
67   /* Allocate sparseness pattern */
68   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
69   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
70 
71   /* If offsets are given wrt an array, create array */
72   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
73     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
74   } else  {
75     result->icol0 = NULL;
76   }
77 
78   /* If values are given, allocate values array */
79   if (do_values)  {
80     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
81   } else {
82     result->values = NULL;
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*
88 spbas_allocate_data:
89    in case of block_data:
90        Allocate the data arrays alloc_icol and optionally alloc_val,
91        set appropriate pointers from icols and values;
92    in case of !block_data:
93        Allocate the arrays icols[i] and optionally values[i]
94 */
95 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
96 {
97   PetscInt       i;
98   PetscInt       nnz   = result->nnz;
99   PetscInt       nrows = result->nrows;
100   PetscInt       r_nnz;
101   PetscErrorCode ierr;
102   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
103   PetscBool      block_data = result->block_data;
104 
105   PetscFunctionBegin;
106   if (block_data) {
107     /* Allocate the column number array and point to it */
108     result->n_alloc_icol = nnz;
109 
110     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
111 
112     result->icols[0] = result->alloc_icol;
113     for (i=1; i<nrows; i++)  {
114       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
115     }
116 
117     /* Allocate the value array and point to it */
118     if (do_values) {
119       result->n_alloc_val = nnz;
120 
121       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
122 
123       result->values[0] = result->alloc_val;
124       for (i=1; i<nrows; i++) {
125         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
126       }
127     }
128   } else {
129     for (i=0; i<nrows; i++)   {
130       r_nnz = result->row_nnz[i];
131       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
132     }
133     if (do_values) {
134       for (i=0; i<nrows; i++)  {
135         r_nnz = result->row_nnz[i];
136         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 /*
144   spbas_row_order_icol
145      determine if row i1 should come
146        + before row i2 in the sorted rows (return -1),
147        + after (return 1)
148        + is identical (return 0).
149 */
150 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
151 {
152   PetscInt j;
153   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
154   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
155   PetscInt * icol1 = &icol_in[irow_in[i1]];
156   PetscInt * icol2 = &icol_in[irow_in[i2]];
157 
158   if (nnz1<nnz2) return -1;
159   if (nnz1>nnz2) return 1;
160 
161   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
162     for (j=0; j<nnz1; j++) {
163       if (icol1[j]< icol2[j]) return -1;
164       if (icol1[j]> icol2[j]) return 1;
165     }
166   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
167     for (j=0; j<nnz1; j++) {
168       if (icol1[j]-i1< icol2[j]-i2) return -1;
169       if (icol1[j]-i1> icol2[j]-i2) return 1;
170     }
171   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
172     for (j=1; j<nnz1; j++) {
173       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
174       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
175     }
176   }
177   return 0;
178 }
179 
180 /*
181   spbas_mergesort_icols:
182     return a sorting of the rows in which identical sparseness patterns are
183     next to each other
184 */
185 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
186 {
187   PetscErrorCode ierr;
188   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
189   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
190   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
191   PetscInt       *ialloc;     /* Allocated arrays */
192   PetscInt       *iswap;      /* auxiliary pointers for swapping */
193   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
194   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
195 
196   PetscFunctionBegin;
197   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
198 
199   ihlp1 = ialloc;
200   ihlp2 = isort;
201 
202   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
203   for (istep=1; istep<nrows; istep*=2) {
204     /*
205       Combine sorted parts
206           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
207       of ihlp2 and vhlp2
208 
209       into one sorted part
210           istart:istart+2*istep-1
211       of ihlp1 and vhlp1
212     */
213     for (istart=0; istart<nrows; istart+=2*istep) {
214       /* Set counters and bound array part endings */
215       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
216       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
217 
218       /* Merge the two array parts */
219       for (i=istart; i<i2end; i++)  {
220         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
221           ihlp1[i] = ihlp2[i1];
222           i1++;
223         } else if (i2<i2end) {
224           ihlp1[i] = ihlp2[i2];
225           i2++;
226         } else {
227           ihlp1[i] = ihlp2[i1];
228           i1++;
229         }
230       }
231     }
232 
233     /* Swap the two array sets */
234     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
235   }
236 
237   /* Copy one more time in case the sorted arrays are the temporary ones */
238   if (ihlp2 != isort) {
239     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
240   }
241   ierr = PetscFree(ialloc);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 
246 
247 /*
248   spbas_compress_pattern:
249      calculate a compressed sparseness pattern for a sparseness pattern
250      given in compressed row storage. The compressed sparseness pattern may
251      require (much) less memory.
252 */
253 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
254 {
255   PetscInt        nnz      = irow_in[nrows];
256   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
257   size_t          mem_compressed;
258   PetscErrorCode  ierr;
259   PetscInt        *isort;
260   PetscInt        *icols;
261   PetscInt        row_nnz;
262   PetscInt        *ipoint;
263   PetscBool       *used;
264   PetscInt        ptr;
265   PetscInt        i,j;
266   const PetscBool no_values = PETSC_FALSE;
267 
268   PetscFunctionBegin;
269   /* Allocate the structure of the new matrix */
270   B->nrows        = nrows;
271   B->ncols        = ncols;
272   B->nnz          = nnz;
273   B->col_idx_type = col_idx_type;
274   B->block_data   = PETSC_TRUE;
275 
276   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
277 
278   /* When using an offset array, set it */
279   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
280     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
281   }
282 
283   /* Allocate the ordering for the rows */
284   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
285   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
286   ierr = PetscMalloc1(nrows,&used);CHKERRQ(ierr);
287 
288   /*  Initialize the sorting */
289   ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
290   for (i = 0; i<nrows; i++)  {
291     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
292     isort[i]      = i;
293     ipoint[i]     = i;
294   }
295 
296   /* Sort the rows so that identical columns will be next to each other */
297   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
298   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
299 
300   /* Replace identical rows with the first one in the list */
301   for (i=1; i<nrows; i++) {
302     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
303       ipoint[isort[i]] = ipoint[isort[i-1]];
304     }
305   }
306 
307   /* Collect the rows which are used*/
308   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
309 
310   /* Calculate needed memory */
311   B->n_alloc_icol = 0;
312   for (i=0; i<nrows; i++)  {
313     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
314   }
315   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
316 
317   /* Fill in the diagonal offsets for the rows which store their own data */
318   ptr = 0;
319   for (i=0; i<B->nrows; i++) {
320     if (used[i]) {
321       B->icols[i] = &B->alloc_icol[ptr];
322       icols = &icol_in[irow_in[i]];
323       row_nnz = B->row_nnz[i];
324       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
325         for (j=0; j<row_nnz; j++) {
326           B->icols[i][j] = icols[j];
327         }
328       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
329         for (j=0; j<row_nnz; j++) {
330           B->icols[i][j] = icols[j]-i;
331         }
332       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
333         for (j=0; j<row_nnz; j++) {
334           B->icols[i][j] = icols[j]-icols[0];
335         }
336       }
337       ptr += B->row_nnz[i];
338     }
339   }
340 
341   /* Point to the right places for all data */
342   for (i=0; i<nrows; i++) {
343     B->icols[i] = B->icols[ipoint[i]];
344   }
345   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
346   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
347 
348   ierr=PetscFree(isort);CHKERRQ(ierr);
349   ierr=PetscFree(used);CHKERRQ(ierr);
350   ierr=PetscFree(ipoint);CHKERRQ(ierr);
351 
352   mem_compressed = spbas_memory_requirement(*B);
353   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
354   PetscFunctionReturn(0);
355 }
356 
357 /*
358    spbas_incomplete_cholesky
359        Incomplete Cholesky decomposition
360 */
361 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
362 
363 /*
364   spbas_delete : de-allocate the arrays owned by this matrix
365 */
366 PetscErrorCode spbas_delete(spbas_matrix matrix)
367 {
368   PetscInt       i;
369   PetscErrorCode ierr;
370 
371   PetscFunctionBegin;
372   if (matrix.block_data) {
373     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
374     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
375   } else {
376     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
377     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
378     if (matrix.values) {
379       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
380     }
381   }
382 
383   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
384   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
385   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
386   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
390 /*
391 spbas_matrix_to_crs:
392    Convert an spbas_matrix to compessed row storage
393 */
394 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
395 {
396   PetscInt       nrows = matrix_A.nrows;
397   PetscInt       nnz   = matrix_A.nnz;
398   PetscInt       i,j,r_nnz,i0;
399   PetscInt       *irow;
400   PetscInt       *icol;
401   PetscInt       *icol_A;
402   MatScalar      *val;
403   PetscScalar    *val_A;
404   PetscInt       col_idx_type = matrix_A.col_idx_type;
405   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
406   PetscErrorCode ierr;
407 
408   PetscFunctionBegin;
409   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
410   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
411   *icol_out = icol;
412   *irow_out = irow;
413   if (do_values) {
414     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
415     *val_out = val; *icol_out = icol; *irow_out=irow;
416   }
417 
418   irow[0]=0;
419   for (i=0; i<nrows; i++) {
420     r_nnz     = matrix_A.row_nnz[i];
421     i0        = irow[i];
422     irow[i+1] = i0 + r_nnz;
423     icol_A    = matrix_A.icols[i];
424 
425     if (do_values) {
426       val_A = matrix_A.values[i];
427       for (j=0; j<r_nnz; j++) {
428         icol[i0+j] = icol_A[j];
429         val[i0+j]  = val_A[j];
430       }
431     } else {
432       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
433     }
434 
435     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
436       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
437     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
438       i0 = matrix_A.icol0[i];
439       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 
446 /*
447     spbas_transpose
448        return the transpose of a matrix
449 */
450 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
451 {
452   PetscInt       col_idx_type = in_matrix.col_idx_type;
453   PetscInt       nnz          = in_matrix.nnz;
454   PetscInt       ncols        = in_matrix.nrows;
455   PetscInt       nrows        = in_matrix.ncols;
456   PetscInt       i,j,k;
457   PetscInt       r_nnz;
458   PetscInt       *irow;
459   PetscInt       icol0 = 0;
460   PetscScalar    * val;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   /* Copy input values */
465   result->nrows        = nrows;
466   result->ncols        = ncols;
467   result->nnz          = nnz;
468   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
469   result->block_data   = PETSC_TRUE;
470 
471   /* Allocate sparseness pattern */
472   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
473 
474   /*  Count the number of nonzeros in each row */
475   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
476 
477   for (i=0; i<ncols; i++) {
478     r_nnz = in_matrix.row_nnz[i];
479     irow  = in_matrix.icols[i];
480     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
481       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
482     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
483       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
484     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
485       icol0=in_matrix.icol0[i];
486       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
487     }
488   }
489 
490   /* Set the pointers to the data */
491   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
492 
493   /* Reset the number of nonzeros in each row */
494   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
495 
496   /* Fill the data arrays */
497   if (in_matrix.values) {
498     for (i=0; i<ncols; i++) {
499       r_nnz = in_matrix.row_nnz[i];
500       irow  = in_matrix.icols[i];
501       val   = in_matrix.values[i];
502 
503       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
504       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
505       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
506       for (j=0; j<r_nnz; j++)  {
507         k = icol0 + irow[j];
508         result->icols[k][result->row_nnz[k]]  = i;
509         result->values[k][result->row_nnz[k]] = val[j];
510         result->row_nnz[k]++;
511       }
512     }
513   } else {
514     for (i=0; i<ncols; i++) {
515       r_nnz = in_matrix.row_nnz[i];
516       irow  = in_matrix.icols[i];
517 
518       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
519       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
520       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
521 
522       for (j=0; j<r_nnz; j++) {
523         k = icol0 + irow[j];
524         result->icols[k][result->row_nnz[k]] = i;
525         result->row_nnz[k]++;
526       }
527     }
528   }
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533    spbas_mergesort
534 
535       mergesort for an array of integers and an array of associated
536       reals
537 
538       on output, icol[0..nnz-1] is increasing;
539                   val[0..nnz-1] has undergone the same permutation as icol
540 
541       NB: val may be NULL: in that case, only the integers are sorted
542 
543 */
544 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
545 {
546   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
547   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
548   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
549   PetscInt       *ialloc;     /* Allocated arrays */
550   PetscScalar    *valloc=NULL;
551   PetscInt       *iswap;      /* auxiliary pointers for swapping */
552   PetscScalar    *vswap;
553   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
554   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
555   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
556   PetscScalar    *vhlp2=NULL;
557   PetscErrorCode ierr;
558 
559   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
560   ihlp1 = ialloc;
561   ihlp2 = icol;
562 
563   if (val) {
564     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
565     vhlp1 = valloc;
566     vhlp2 = val;
567   }
568 
569 
570   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
571   for (istep=1; istep<nnz; istep*=2) {
572     /*
573       Combine sorted parts
574           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
575       of ihlp2 and vhlp2
576 
577       into one sorted part
578           istart:istart+2*istep-1
579       of ihlp1 and vhlp1
580     */
581     for (istart=0; istart<nnz; istart+=2*istep) {
582       /* Set counters and bound array part endings */
583       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
584       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
585 
586       /* Merge the two array parts */
587       if (val) {
588         for (i=istart; i<i2end; i++) {
589           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
590             ihlp1[i] = ihlp2[i1];
591             vhlp1[i] = vhlp2[i1];
592             i1++;
593           } else if (i2<i2end) {
594             ihlp1[i] = ihlp2[i2];
595             vhlp1[i] = vhlp2[i2];
596             i2++;
597           } else {
598             ihlp1[i] = ihlp2[i1];
599             vhlp1[i] = vhlp2[i1];
600             i1++;
601           }
602         }
603       } else {
604         for (i=istart; i<i2end; i++) {
605           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
606             ihlp1[i] = ihlp2[i1];
607             i1++;
608           } else if (i2<i2end) {
609             ihlp1[i] = ihlp2[i2];
610             i2++;
611           } else {
612             ihlp1[i] = ihlp2[i1];
613             i1++;
614           }
615         }
616       }
617     }
618 
619     /* Swap the two array sets */
620     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
621     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
622   }
623 
624   /* Copy one more time in case the sorted arrays are the temporary ones */
625   if (ihlp2 != icol) {
626     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
627     if (val) {
628       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
629     }
630   }
631 
632   ierr = PetscFree(ialloc);CHKERRQ(ierr);
633   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638   spbas_apply_reordering_rows:
639     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
640 */
641 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
642 {
643   PetscInt       i,j,ip;
644   PetscInt       nrows=matrix_A->nrows;
645   PetscInt       * row_nnz;
646   PetscInt       **icols;
647   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
648   PetscScalar    **vals    = NULL;
649   PetscErrorCode ierr;
650 
651   PetscFunctionBegin;
652   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
653 
654   if (do_values) {
655     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
656   }
657   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
658   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
659 
660   for (i=0; i<nrows; i++) {
661     ip = permutation[i];
662     if (do_values) vals[i] = matrix_A->values[ip];
663     icols[i]   = matrix_A->icols[ip];
664     row_nnz[i] = matrix_A->row_nnz[ip];
665     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
666   }
667 
668   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
669   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
670   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
671 
672   if (do_values) matrix_A->values = vals;
673   matrix_A->icols   = icols;
674   matrix_A->row_nnz = row_nnz;
675   PetscFunctionReturn(0);
676 }
677 
678 
679 /*
680   spbas_apply_reordering_cols:
681     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
682 */
683 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
684 {
685   PetscInt       i,j;
686   PetscInt       nrows=matrix_A->nrows;
687   PetscInt       row_nnz;
688   PetscInt       *icols;
689   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
690   PetscScalar    *vals     = NULL;
691   PetscErrorCode ierr;
692 
693   PetscFunctionBegin;
694   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
695 
696   for (i=0; i<nrows; i++) {
697     icols   = matrix_A->icols[i];
698     row_nnz = matrix_A->row_nnz[i];
699     if (do_values) vals = matrix_A->values[i];
700 
701     for (j=0; j<row_nnz; j++) {
702       icols[j] = permutation[i+icols[j]]-i;
703     }
704     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   spbas_apply_reordering:
711     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
712 */
713 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
714 {
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
719   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
724 {
725   spbas_matrix   retval;
726   PetscInt       i, j, i0, r_nnz;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   /* Copy input values */
731   retval.nrows = nrows;
732   retval.ncols = ncols;
733   retval.nnz   = ai[nrows];
734 
735   retval.block_data   = PETSC_TRUE;
736   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
737 
738   /* Allocate output matrix */
739   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
740   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
741   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
742   /* Copy the structure */
743   for (i = 0; i<retval.nrows; i++)  {
744     i0    = ai[i];
745     r_nnz = ai[i+1]-i0;
746 
747     for (j=0; j<r_nnz; j++) {
748       retval.icols[i][j] = aj[i0+j]-i;
749     }
750   }
751   *result = retval;
752   PetscFunctionReturn(0);
753 }
754 
755 
756 /*
757    spbas_mark_row_power:
758       Mark the columns in row 'row' which are nonzero in
759           matrix^2log(marker).
760 */
761 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
762                                     PetscInt row,                /* row for which the columns are marked */
763                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
764                                     PetscInt marker,             /* marker-value: 2^power */
765                                     PetscInt minmrk,             /* lower bound for marked points */
766                                     PetscInt maxmrk)             /* upper bound for marked points */
767 {
768   PetscErrorCode ierr;
769   PetscInt       i,j, nnz;
770 
771   PetscFunctionBegin;
772   nnz = in_matrix->row_nnz[row];
773 
774   /* For higher powers, call this function recursively */
775   if (marker>1) {
776     for (i=0; i<nnz; i++) {
777       j = row + in_matrix->icols[row][i];
778       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
779         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
780         iwork[j] |= marker;
781       }
782     }
783   } else {
784     /*  Mark the columns reached */
785     for (i=0; i<nnz; i++)  {
786       j = row + in_matrix->icols[row][i];
787       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
788     }
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 
794 /*
795    spbas_power
796       Calculate sparseness patterns for incomplete Cholesky decompositions
797       of a given order: (almost) all nonzeros of the matrix^(order+1) which
798       are inside the band width are found and stored in the output sparseness
799       pattern.
800 */
801 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
802 {
803   spbas_matrix   retval;
804   PetscInt       nrows = in_matrix.nrows;
805   PetscInt       ncols = in_matrix.ncols;
806   PetscInt       i, j, kend;
807   PetscInt       nnz, inz;
808   PetscInt       *iwork;
809   PetscInt       marker;
810   PetscInt       maxmrk=0;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
815   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
816   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
817   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
818 
819   /* Copy input values*/
820   retval.nrows        = ncols;
821   retval.ncols        = nrows;
822   retval.nnz          = 0;
823   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
824   retval.block_data   = PETSC_FALSE;
825 
826   /* Allocate sparseness pattern */
827   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
828 
829   /* Allocate marker array */
830   ierr = PetscMalloc1(nrows, &iwork);CHKERRQ(ierr);
831 
832   /* Erase the pattern for this row */
833   ierr = PetscMemzero((void*) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
834 
835   /* Calculate marker values */
836   marker = 1; for (i=1; i<power; i++) marker*=2;
837 
838   for (i=0; i<nrows; i++)  {
839     /* Calculate the pattern for each row */
840 
841     nnz  = in_matrix.row_nnz[i];
842     kend = i+in_matrix.icols[i][nnz-1];
843     if (maxmrk<=kend) maxmrk=kend+1;
844     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
845 
846     /* Count the columns*/
847     nnz = 0;
848     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
849 
850     /* Allocate the column indices */
851     retval.row_nnz[i] = nnz;
852     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
853 
854     /* Administrate the column indices */
855     inz = 0;
856     for (j=i; j<maxmrk; j++) {
857       if (iwork[j]) {
858         retval.icols[i][inz] = j-i;
859         inz++;
860         iwork[j]=0;
861       }
862     }
863     retval.nnz += nnz;
864   };
865   ierr    = PetscFree(iwork);CHKERRQ(ierr);
866   *result = retval;
867   PetscFunctionReturn(0);
868 }
869 
870 
871 
872 /*
873    spbas_keep_upper:
874       remove the lower part of the matrix: keep the upper part
875 */
876 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
877 {
878   PetscInt i, j;
879   PetscInt jstart;
880 
881   PetscFunctionBegin;
882   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
883   for (i=0; i<inout_matrix->nrows; i++)  {
884     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
885     if (jstart>0) {
886       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
887         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
888       }
889 
890       if (inout_matrix->values) {
891         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
892           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
893         }
894       }
895 
896       inout_matrix->row_nnz[i] -= jstart;
897 
898       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
899 
900       if (inout_matrix->values) {
901         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
902       }
903       inout_matrix->nnz -= jstart;
904     }
905   }
906   PetscFunctionReturn(0);
907 }
908 
909