xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: PCFactorSetMatSolverType(), MatSolverType, PCFactorSetLevels(), PCFactorSetDropTolerance()
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix)
31 {
32   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
33                     sizeof(PetscBool)               + /* block_data */
34                     sizeof(PetscScalar**)           + /* values */
35                     sizeof(PetscScalar*)            + /* alloc_val */
36                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
37                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
38                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
39                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
40 
41   /* icol0[*] */
42   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
43 
44   /* icols[*][*] */
45   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
46   else memreq += matrix.nnz * sizeof(PetscInt);
47 
48   if (matrix.values) {
49     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
50     /* values[*][*] */
51     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
52     else memreq += matrix.nnz * sizeof(PetscScalar);
53   }
54   return memreq;
55 }
56 
57 /*
58   spbas_allocate_pattern:
59     allocate the pattern arrays row_nnz, icols and optionally values
60 */
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
62 {
63   PetscErrorCode ierr;
64   PetscInt       nrows        = result->nrows;
65   PetscInt       col_idx_type = result->col_idx_type;
66 
67   PetscFunctionBegin;
68   /* Allocate sparseness pattern */
69   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
70   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
71 
72   /* If offsets are given wrt an array, create array */
73   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
75   } else  {
76     result->icol0 = NULL;
77   }
78 
79   /* If values are given, allocate values array */
80   if (do_values)  {
81     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
82   } else {
83     result->values = NULL;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
97 {
98   PetscInt       i;
99   PetscInt       nnz   = result->nnz;
100   PetscInt       nrows = result->nrows;
101   PetscInt       r_nnz;
102   PetscErrorCode ierr;
103   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
104   PetscBool      block_data = result->block_data;
105 
106   PetscFunctionBegin;
107   if (block_data) {
108     /* Allocate the column number array and point to it */
109     result->n_alloc_icol = nnz;
110 
111     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
112 
113     result->icols[0] = result->alloc_icol;
114     for (i=1; i<nrows; i++)  {
115       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116     }
117 
118     /* Allocate the value array and point to it */
119     if (do_values) {
120       result->n_alloc_val = nnz;
121 
122       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
123 
124       result->values[0] = result->alloc_val;
125       for (i=1; i<nrows; i++) {
126         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
127       }
128     }
129   } else {
130     for (i=0; i<nrows; i++)   {
131       r_nnz = result->row_nnz[i];
132       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
133     }
134     if (do_values) {
135       for (i=0; i<nrows; i++)  {
136         r_nnz = result->row_nnz[i];
137         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
138       }
139     }
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145   spbas_row_order_icol
146      determine if row i1 should come
147        + before row i2 in the sorted rows (return -1),
148        + after (return 1)
149        + is identical (return 0).
150 */
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153   PetscInt j;
154   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
155   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
156   PetscInt * icol1 = &icol_in[irow_in[i1]];
157   PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159   if (nnz1<nnz2) return -1;
160   if (nnz1>nnz2) return 1;
161 
162   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163     for (j=0; j<nnz1; j++) {
164       if (icol1[j]< icol2[j]) return -1;
165       if (icol1[j]> icol2[j]) return 1;
166     }
167   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168     for (j=0; j<nnz1; j++) {
169       if (icol1[j]-i1< icol2[j]-i2) return -1;
170       if (icol1[j]-i1> icol2[j]-i2) return 1;
171     }
172   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173     for (j=1; j<nnz1; j++) {
174       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
175       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
176     }
177   }
178   return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
187 {
188   PetscErrorCode ierr;
189   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
190   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
191   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
192   PetscInt       *ialloc;     /* Allocated arrays */
193   PetscInt       *iswap;      /* auxiliary pointers for swapping */
194   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
195   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
196 
197   PetscFunctionBegin;
198   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
199 
200   ihlp1 = ialloc;
201   ihlp2 = isort;
202 
203   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
204   for (istep=1; istep<nrows; istep*=2) {
205     /*
206       Combine sorted parts
207           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
208       of ihlp2 and vhlp2
209 
210       into one sorted part
211           istart:istart+2*istep-1
212       of ihlp1 and vhlp1
213     */
214     for (istart=0; istart<nrows; istart+=2*istep) {
215       /* Set counters and bound array part endings */
216       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
217       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
218 
219       /* Merge the two array parts */
220       for (i=istart; i<i2end; i++)  {
221         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
222           ihlp1[i] = ihlp2[i1];
223           i1++;
224         } else if (i2<i2end) {
225           ihlp1[i] = ihlp2[i2];
226           i2++;
227         } else {
228           ihlp1[i] = ihlp2[i1];
229           i1++;
230         }
231       }
232     }
233 
234     /* Swap the two array sets */
235     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
236   }
237 
238   /* Copy one more time in case the sorted arrays are the temporary ones */
239   if (ihlp2 != isort) {
240     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
241   }
242   ierr = PetscFree(ialloc);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 
247 
248 /*
249   spbas_compress_pattern:
250      calculate a compressed sparseness pattern for a sparseness pattern
251      given in compressed row storage. The compressed sparseness pattern may
252      require (much) less memory.
253 */
254 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
255 {
256   PetscInt        nnz      = irow_in[nrows];
257   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
258   size_t          mem_compressed;
259   PetscErrorCode  ierr;
260   PetscInt        *isort;
261   PetscInt        *icols;
262   PetscInt        row_nnz;
263   PetscInt        *ipoint;
264   PetscBool       *used;
265   PetscInt        ptr;
266   PetscInt        i,j;
267   const PetscBool no_values = PETSC_FALSE;
268 
269   PetscFunctionBegin;
270   /* Allocate the structure of the new matrix */
271   B->nrows        = nrows;
272   B->ncols        = ncols;
273   B->nnz          = nnz;
274   B->col_idx_type = col_idx_type;
275   B->block_data   = PETSC_TRUE;
276 
277   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
278 
279   /* When using an offset array, set it */
280   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
281     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
282   }
283 
284   /* Allocate the ordering for the rows */
285   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
286   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
287   ierr = PetscCalloc1(nrows,&used);CHKERRQ(ierr);
288 
289   for (i = 0; i<nrows; i++)  {
290     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
291     isort[i]      = i;
292     ipoint[i]     = i;
293   }
294 
295   /* Sort the rows so that identical columns will be next to each other */
296   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
297   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
298 
299   /* Replace identical rows with the first one in the list */
300   for (i=1; i<nrows; i++) {
301     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
302       ipoint[isort[i]] = ipoint[isort[i-1]];
303     }
304   }
305 
306   /* Collect the rows which are used*/
307   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
308 
309   /* Calculate needed memory */
310   B->n_alloc_icol = 0;
311   for (i=0; i<nrows; i++)  {
312     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
313   }
314   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
315 
316   /* Fill in the diagonal offsets for the rows which store their own data */
317   ptr = 0;
318   for (i=0; i<B->nrows; i++) {
319     if (used[i]) {
320       B->icols[i] = &B->alloc_icol[ptr];
321       icols = &icol_in[irow_in[i]];
322       row_nnz = B->row_nnz[i];
323       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
324         for (j=0; j<row_nnz; j++) {
325           B->icols[i][j] = icols[j];
326         }
327       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
328         for (j=0; j<row_nnz; j++) {
329           B->icols[i][j] = icols[j]-i;
330         }
331       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
332         for (j=0; j<row_nnz; j++) {
333           B->icols[i][j] = icols[j]-icols[0];
334         }
335       }
336       ptr += B->row_nnz[i];
337     }
338   }
339 
340   /* Point to the right places for all data */
341   for (i=0; i<nrows; i++) {
342     B->icols[i] = B->icols[ipoint[i]];
343   }
344   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
345   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
346 
347   ierr=PetscFree(isort);CHKERRQ(ierr);
348   ierr=PetscFree(used);CHKERRQ(ierr);
349   ierr=PetscFree(ipoint);CHKERRQ(ierr);
350 
351   mem_compressed = spbas_memory_requirement(*B);
352   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
353   PetscFunctionReturn(0);
354 }
355 
356 /*
357    spbas_incomplete_cholesky
358        Incomplete Cholesky decomposition
359 */
360 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
361 
362 /*
363   spbas_delete : de-allocate the arrays owned by this matrix
364 */
365 PetscErrorCode spbas_delete(spbas_matrix matrix)
366 {
367   PetscInt       i;
368   PetscErrorCode ierr;
369 
370   PetscFunctionBegin;
371   if (matrix.block_data) {
372     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
373     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
374   } else {
375     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
376     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
377     if (matrix.values) {
378       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
379     }
380   }
381 
382   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
383   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
384   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
385   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 /*
390 spbas_matrix_to_crs:
391    Convert an spbas_matrix to compessed row storage
392 */
393 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
394 {
395   PetscInt       nrows = matrix_A.nrows;
396   PetscInt       nnz   = matrix_A.nnz;
397   PetscInt       i,j,r_nnz,i0;
398   PetscInt       *irow;
399   PetscInt       *icol;
400   PetscInt       *icol_A;
401   MatScalar      *val;
402   PetscScalar    *val_A;
403   PetscInt       col_idx_type = matrix_A.col_idx_type;
404   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
405   PetscErrorCode ierr;
406 
407   PetscFunctionBegin;
408   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
409   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
410   *icol_out = icol;
411   *irow_out = irow;
412   if (do_values) {
413     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
414     *val_out = val; *icol_out = icol; *irow_out=irow;
415   }
416 
417   irow[0]=0;
418   for (i=0; i<nrows; i++) {
419     r_nnz     = matrix_A.row_nnz[i];
420     i0        = irow[i];
421     irow[i+1] = i0 + r_nnz;
422     icol_A    = matrix_A.icols[i];
423 
424     if (do_values) {
425       val_A = matrix_A.values[i];
426       for (j=0; j<r_nnz; j++) {
427         icol[i0+j] = icol_A[j];
428         val[i0+j]  = val_A[j];
429       }
430     } else {
431       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
432     }
433 
434     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
435       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
436     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
437       i0 = matrix_A.icol0[i];
438       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
439     }
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 
445 /*
446     spbas_transpose
447        return the transpose of a matrix
448 */
449 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
450 {
451   PetscInt       col_idx_type = in_matrix.col_idx_type;
452   PetscInt       nnz          = in_matrix.nnz;
453   PetscInt       ncols        = in_matrix.nrows;
454   PetscInt       nrows        = in_matrix.ncols;
455   PetscInt       i,j,k;
456   PetscInt       r_nnz;
457   PetscInt       *irow;
458   PetscInt       icol0 = 0;
459   PetscScalar    * val;
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   /* Copy input values */
464   result->nrows        = nrows;
465   result->ncols        = ncols;
466   result->nnz          = nnz;
467   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
468   result->block_data   = PETSC_TRUE;
469 
470   /* Allocate sparseness pattern */
471   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
472 
473   /*  Count the number of nonzeros in each row */
474   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
475 
476   for (i=0; i<ncols; i++) {
477     r_nnz = in_matrix.row_nnz[i];
478     irow  = in_matrix.icols[i];
479     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
480       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
481     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
482       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
483     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
484       icol0=in_matrix.icol0[i];
485       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
486     }
487   }
488 
489   /* Set the pointers to the data */
490   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
491 
492   /* Reset the number of nonzeros in each row */
493   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
494 
495   /* Fill the data arrays */
496   if (in_matrix.values) {
497     for (i=0; i<ncols; i++) {
498       r_nnz = in_matrix.row_nnz[i];
499       irow  = in_matrix.icols[i];
500       val   = in_matrix.values[i];
501 
502       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
503       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
504       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
505       for (j=0; j<r_nnz; j++)  {
506         k = icol0 + irow[j];
507         result->icols[k][result->row_nnz[k]]  = i;
508         result->values[k][result->row_nnz[k]] = val[j];
509         result->row_nnz[k]++;
510       }
511     }
512   } else {
513     for (i=0; i<ncols; i++) {
514       r_nnz = in_matrix.row_nnz[i];
515       irow  = in_matrix.icols[i];
516 
517       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
518       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
519       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
520 
521       for (j=0; j<r_nnz; j++) {
522         k = icol0 + irow[j];
523         result->icols[k][result->row_nnz[k]] = i;
524         result->row_nnz[k]++;
525       }
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 /*
532    spbas_mergesort
533 
534       mergesort for an array of integers and an array of associated
535       reals
536 
537       on output, icol[0..nnz-1] is increasing;
538                   val[0..nnz-1] has undergone the same permutation as icol
539 
540       NB: val may be NULL: in that case, only the integers are sorted
541 
542 */
543 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
544 {
545   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
546   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
547   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
548   PetscInt       *ialloc;     /* Allocated arrays */
549   PetscScalar    *valloc=NULL;
550   PetscInt       *iswap;      /* auxiliary pointers for swapping */
551   PetscScalar    *vswap;
552   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
553   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
554   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
555   PetscScalar    *vhlp2=NULL;
556   PetscErrorCode ierr;
557 
558   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
559   ihlp1 = ialloc;
560   ihlp2 = icol;
561 
562   if (val) {
563     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
564     vhlp1 = valloc;
565     vhlp2 = val;
566   }
567 
568 
569   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
570   for (istep=1; istep<nnz; istep*=2) {
571     /*
572       Combine sorted parts
573           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
574       of ihlp2 and vhlp2
575 
576       into one sorted part
577           istart:istart+2*istep-1
578       of ihlp1 and vhlp1
579     */
580     for (istart=0; istart<nnz; istart+=2*istep) {
581       /* Set counters and bound array part endings */
582       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
583       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
584 
585       /* Merge the two array parts */
586       if (val) {
587         for (i=istart; i<i2end; i++) {
588           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
589             ihlp1[i] = ihlp2[i1];
590             vhlp1[i] = vhlp2[i1];
591             i1++;
592           } else if (i2<i2end) {
593             ihlp1[i] = ihlp2[i2];
594             vhlp1[i] = vhlp2[i2];
595             i2++;
596           } else {
597             ihlp1[i] = ihlp2[i1];
598             vhlp1[i] = vhlp2[i1];
599             i1++;
600           }
601         }
602       } else {
603         for (i=istart; i<i2end; i++) {
604           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
605             ihlp1[i] = ihlp2[i1];
606             i1++;
607           } else if (i2<i2end) {
608             ihlp1[i] = ihlp2[i2];
609             i2++;
610           } else {
611             ihlp1[i] = ihlp2[i1];
612             i1++;
613           }
614         }
615       }
616     }
617 
618     /* Swap the two array sets */
619     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
620     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
621   }
622 
623   /* Copy one more time in case the sorted arrays are the temporary ones */
624   if (ihlp2 != icol) {
625     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
626     if (val) {
627       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
628     }
629   }
630 
631   ierr = PetscFree(ialloc);CHKERRQ(ierr);
632   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
633   PetscFunctionReturn(0);
634 }
635 
636 /*
637   spbas_apply_reordering_rows:
638     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
639 */
640 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
641 {
642   PetscInt       i,j,ip;
643   PetscInt       nrows=matrix_A->nrows;
644   PetscInt       * row_nnz;
645   PetscInt       **icols;
646   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
647   PetscScalar    **vals    = NULL;
648   PetscErrorCode ierr;
649 
650   PetscFunctionBegin;
651   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
652 
653   if (do_values) {
654     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
655   }
656   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
657   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
658 
659   for (i=0; i<nrows; i++) {
660     ip = permutation[i];
661     if (do_values) vals[i] = matrix_A->values[ip];
662     icols[i]   = matrix_A->icols[ip];
663     row_nnz[i] = matrix_A->row_nnz[ip];
664     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
665   }
666 
667   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
668   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
669   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
670 
671   if (do_values) matrix_A->values = vals;
672   matrix_A->icols   = icols;
673   matrix_A->row_nnz = row_nnz;
674   PetscFunctionReturn(0);
675 }
676 
677 
678 /*
679   spbas_apply_reordering_cols:
680     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
681 */
682 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
683 {
684   PetscInt       i,j;
685   PetscInt       nrows=matrix_A->nrows;
686   PetscInt       row_nnz;
687   PetscInt       *icols;
688   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
689   PetscScalar    *vals     = NULL;
690   PetscErrorCode ierr;
691 
692   PetscFunctionBegin;
693   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
694 
695   for (i=0; i<nrows; i++) {
696     icols   = matrix_A->icols[i];
697     row_nnz = matrix_A->row_nnz[i];
698     if (do_values) vals = matrix_A->values[i];
699 
700     for (j=0; j<row_nnz; j++) {
701       icols[j] = permutation[i+icols[j]]-i;
702     }
703     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
704   }
705   PetscFunctionReturn(0);
706 }
707 
708 /*
709   spbas_apply_reordering:
710     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
711 */
712 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
718   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
723 {
724   spbas_matrix   retval;
725   PetscInt       i, j, i0, r_nnz;
726   PetscErrorCode ierr;
727 
728   PetscFunctionBegin;
729   /* Copy input values */
730   retval.nrows = nrows;
731   retval.ncols = ncols;
732   retval.nnz   = ai[nrows];
733 
734   retval.block_data   = PETSC_TRUE;
735   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
736 
737   /* Allocate output matrix */
738   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
739   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
740   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
741   /* Copy the structure */
742   for (i = 0; i<retval.nrows; i++)  {
743     i0    = ai[i];
744     r_nnz = ai[i+1]-i0;
745 
746     for (j=0; j<r_nnz; j++) {
747       retval.icols[i][j] = aj[i0+j]-i;
748     }
749   }
750   *result = retval;
751   PetscFunctionReturn(0);
752 }
753 
754 
755 /*
756    spbas_mark_row_power:
757       Mark the columns in row 'row' which are nonzero in
758           matrix^2log(marker).
759 */
760 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
761                                     PetscInt row,                /* row for which the columns are marked */
762                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
763                                     PetscInt marker,             /* marker-value: 2^power */
764                                     PetscInt minmrk,             /* lower bound for marked points */
765                                     PetscInt maxmrk)             /* upper bound for marked points */
766 {
767   PetscErrorCode ierr;
768   PetscInt       i,j, nnz;
769 
770   PetscFunctionBegin;
771   nnz = in_matrix->row_nnz[row];
772 
773   /* For higher powers, call this function recursively */
774   if (marker>1) {
775     for (i=0; i<nnz; i++) {
776       j = row + in_matrix->icols[row][i];
777       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
778         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
779         iwork[j] |= marker;
780       }
781     }
782   } else {
783     /*  Mark the columns reached */
784     for (i=0; i<nnz; i++)  {
785       j = row + in_matrix->icols[row][i];
786       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
787     }
788   }
789   PetscFunctionReturn(0);
790 }
791 
792 
793 /*
794    spbas_power
795       Calculate sparseness patterns for incomplete Cholesky decompositions
796       of a given order: (almost) all nonzeros of the matrix^(order+1) which
797       are inside the band width are found and stored in the output sparseness
798       pattern.
799 */
800 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
801 {
802   spbas_matrix   retval;
803   PetscInt       nrows = in_matrix.nrows;
804   PetscInt       ncols = in_matrix.ncols;
805   PetscInt       i, j, kend;
806   PetscInt       nnz, inz;
807   PetscInt       *iwork;
808   PetscInt       marker;
809   PetscInt       maxmrk=0;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
814   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
815   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
816   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
817 
818   /* Copy input values*/
819   retval.nrows        = ncols;
820   retval.ncols        = nrows;
821   retval.nnz          = 0;
822   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
823   retval.block_data   = PETSC_FALSE;
824 
825   /* Allocate sparseness pattern */
826   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
827 
828   /* Allocate marker array: note sure the max needed so use the max of the two */
829   ierr = PetscCalloc1(PetscMax(ncols,nrows), &iwork);CHKERRQ(ierr);
830 
831   /* Calculate marker values */
832   marker = 1; for (i=1; i<power; i++) marker*=2;
833 
834   for (i=0; i<nrows; i++)  {
835     /* Calculate the pattern for each row */
836 
837     nnz  = in_matrix.row_nnz[i];
838     kend = i+in_matrix.icols[i][nnz-1];
839     if (maxmrk<=kend) maxmrk=kend+1;
840     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
841 
842     /* Count the columns*/
843     nnz = 0;
844     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
845 
846     /* Allocate the column indices */
847     retval.row_nnz[i] = nnz;
848     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
849 
850     /* Administrate the column indices */
851     inz = 0;
852     for (j=i; j<maxmrk; j++) {
853       if (iwork[j]) {
854         retval.icols[i][inz] = j-i;
855         inz++;
856         iwork[j] = 0;
857       }
858     }
859     retval.nnz += nnz;
860   };
861   ierr    = PetscFree(iwork);CHKERRQ(ierr);
862   *result = retval;
863   PetscFunctionReturn(0);
864 }
865 
866 
867 
868 /*
869    spbas_keep_upper:
870       remove the lower part of the matrix: keep the upper part
871 */
872 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
873 {
874   PetscInt i, j;
875   PetscInt jstart;
876 
877   PetscFunctionBegin;
878   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
879   for (i=0; i<inout_matrix->nrows; i++)  {
880     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
881     if (jstart>0) {
882       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
883         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
884       }
885 
886       if (inout_matrix->values) {
887         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
888           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
889         }
890       }
891 
892       inout_matrix->row_nnz[i] -= jstart;
893 
894       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
895 
896       if (inout_matrix->values) {
897         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
898       }
899       inout_matrix->nnz -= jstart;
900     }
901   }
902   PetscFunctionReturn(0);
903 }
904 
905