xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l>
11 - -pc_factor_drop_tolerance
12 
13   Level: beginner
14 
15    Contributed by: Bas van 't Hof
16 
17 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage, PCFactorSetLevels(), PCFactorSetDropTolerance()
18 
19 M*/
20 
21 /*
22   spbas_memory_requirement:
23     Calculate the number of bytes needed to store tha matrix
24 */
25 #undef __FUNCT__
26 #define __FUNCT__ "spbas_memory_requirement"
27 long int spbas_memory_requirement(spbas_matrix matrix)
28 {
29   long int memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
30                     sizeof(PetscBool)               + /* block_data */
31                     sizeof(PetscScalar**)           + /* values */
32                     sizeof(PetscScalar*)            + /* alloc_val */
33                     2 * sizeof(int**)               + /* icols, icols0 */
34                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
35                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
36                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
37 
38   /* icol0[*] */
39   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
40 
41   /* icols[*][*] */
42   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
43   else memreq += matrix.nnz * sizeof(PetscInt);
44 
45   if (matrix.values) {
46     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
47     /* values[*][*] */
48     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
49     else memreq += matrix.nnz * sizeof(PetscScalar);
50   }
51   return memreq;
52 }
53 
54 /*
55   spbas_allocate_pattern:
56     allocate the pattern arrays row_nnz, icols and optionally values
57 */
58 #undef __FUNCT__
59 #define __FUNCT__ "spbas_allocate_pattern"
60 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
61 {
62   PetscErrorCode ierr;
63   PetscInt       nrows        = result->nrows;
64   PetscInt       col_idx_type = result->col_idx_type;
65 
66   PetscFunctionBegin;
67   /* Allocate sparseness pattern */
68   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
69   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
70 
71   /* If offsets are given wrt an array, create array */
72   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
73     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
74   } else  {
75     result->icol0 = NULL;
76   }
77 
78   /* If values are given, allocate values array */
79   if (do_values)  {
80     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
81   } else {
82     result->values = NULL;
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 /*
88 spbas_allocate_data:
89    in case of block_data:
90        Allocate the data arrays alloc_icol and optionally alloc_val,
91        set appropriate pointers from icols and values;
92    in case of !block_data:
93        Allocate the arrays icols[i] and optionally values[i]
94 */
95 #undef __FUNCT__
96 #define __FUNCT__ "spbas_allocate_data"
97 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
98 {
99   PetscInt       i;
100   PetscInt       nnz   = result->nnz;
101   PetscInt       nrows = result->nrows;
102   PetscInt       r_nnz;
103   PetscErrorCode ierr;
104   PetscBool      do_values  = (result->values != NULL) ? PETSC_TRUE : PETSC_FALSE;
105   PetscBool      block_data = result->block_data;
106 
107   PetscFunctionBegin;
108   if (block_data) {
109     /* Allocate the column number array and point to it */
110     result->n_alloc_icol = nnz;
111 
112     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
113 
114     result->icols[0] = result->alloc_icol;
115     for (i=1; i<nrows; i++)  {
116       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
117     }
118 
119     /* Allocate the value array and point to it */
120     if (do_values) {
121       result->n_alloc_val = nnz;
122 
123       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
124 
125       result->values[0] = result->alloc_val;
126       for (i=1; i<nrows; i++) {
127         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
128       }
129     }
130   } else {
131     for (i=0; i<nrows; i++)   {
132       r_nnz = result->row_nnz[i];
133       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
134     }
135     if (do_values) {
136       for (i=0; i<nrows; i++)  {
137         r_nnz = result->row_nnz[i];
138         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
139       }
140     }
141   }
142   PetscFunctionReturn(0);
143 }
144 
145 /*
146   spbas_row_order_icol
147      determine if row i1 should come
148        + before row i2 in the sorted rows (return -1),
149        + after (return 1)
150        + is identical (return 0).
151 */
152 #undef __FUNCT__
153 #define __FUNCT__ "spbas_row_order_icol"
154 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
155 {
156   PetscInt j;
157   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
158   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
159   PetscInt * icol1 = &icol_in[irow_in[i1]];
160   PetscInt * icol2 = &icol_in[irow_in[i2]];
161 
162   if (nnz1<nnz2) return -1;
163   if (nnz1>nnz2) return 1;
164 
165   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
166     for (j=0; j<nnz1; j++) {
167       if (icol1[j]< icol2[j]) return -1;
168       if (icol1[j]> icol2[j]) return 1;
169     }
170   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
171     for (j=0; j<nnz1; j++) {
172       if (icol1[j]-i1< icol2[j]-i2) return -1;
173       if (icol1[j]-i1> icol2[j]-i2) return 1;
174     }
175   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
176     for (j=1; j<nnz1; j++) {
177       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
178       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
179     }
180   }
181   return 0;
182 }
183 
184 /*
185   spbas_mergesort_icols:
186     return a sorting of the rows in which identical sparseness patterns are
187     next to each other
188 */
189 #undef __FUNCT__
190 #define __FUNCT__ "spbas_mergesort_icols"
191 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
192 {
193   PetscErrorCode ierr;
194   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
195   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
196   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
197   PetscInt       *ialloc;     /* Allocated arrays */
198   PetscInt       *iswap;      /* auxiliary pointers for swapping */
199   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
200   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
201 
202   PetscFunctionBegin;
203   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
204 
205   ihlp1 = ialloc;
206   ihlp2 = isort;
207 
208   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
209   for (istep=1; istep<nrows; istep*=2) {
210     /*
211       Combine sorted parts
212           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
213       of ihlp2 and vhlp2
214 
215       into one sorted part
216           istart:istart+2*istep-1
217       of ihlp1 and vhlp1
218     */
219     for (istart=0; istart<nrows; istart+=2*istep) {
220       /* Set counters and bound array part endings */
221       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
222       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
223 
224       /* Merge the two array parts */
225       for (i=istart; i<i2end; i++)  {
226         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
227           ihlp1[i] = ihlp2[i1];
228           i1++;
229         } else if (i2<i2end) {
230           ihlp1[i] = ihlp2[i2];
231           i2++;
232         } else {
233           ihlp1[i] = ihlp2[i1];
234           i1++;
235         }
236       }
237     }
238 
239     /* Swap the two array sets */
240     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
241   }
242 
243   /* Copy one more time in case the sorted arrays are the temporary ones */
244   if (ihlp2 != isort) {
245     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
246   }
247   ierr = PetscFree(ialloc);CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 
252 
253 /*
254   spbas_compress_pattern:
255      calculate a compressed sparseness pattern for a sparseness pattern
256      given in compressed row storage. The compressed sparseness pattern may
257      require (much) less memory.
258 */
259 #undef __FUNCT__
260 #define __FUNCT__ "spbas_compress_pattern"
261 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
262 {
263   PetscInt        nnz      = irow_in[nrows];
264   long int        mem_orig = (nrows + nnz) * sizeof(PetscInt);
265   long int        mem_compressed;
266   PetscErrorCode  ierr;
267   PetscInt        *isort;
268   PetscInt        *icols;
269   PetscInt        row_nnz;
270   PetscInt        *ipoint;
271   PetscBool       *used;
272   PetscInt        ptr;
273   PetscInt        i,j;
274   const PetscBool no_values = PETSC_FALSE;
275 
276   PetscFunctionBegin;
277   /* Allocate the structure of the new matrix */
278   B->nrows        = nrows;
279   B->ncols        = ncols;
280   B->nnz          = nnz;
281   B->col_idx_type = col_idx_type;
282   B->block_data   = PETSC_TRUE;
283 
284   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
285 
286   /* When using an offset array, set it */
287   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
288     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
289   }
290 
291   /* Allocate the ordering for the rows */
292   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
293   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
294   ierr = PetscMalloc1(nrows,&used);CHKERRQ(ierr);
295 
296   /*  Initialize the sorting */
297   ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
298   for (i = 0; i<nrows; i++)  {
299     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
300     isort[i]      = i;
301     ipoint[i]     = i;
302   }
303 
304   /* Sort the rows so that identical columns will be next to each other */
305   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
306   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
307 
308   /* Replace identical rows with the first one in the list */
309   for (i=1; i<nrows; i++) {
310     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
311       ipoint[isort[i]] = ipoint[isort[i-1]];
312     }
313   }
314 
315   /* Collect the rows which are used*/
316   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
317 
318   /* Calculate needed memory */
319   B->n_alloc_icol = 0;
320   for (i=0; i<nrows; i++)  {
321     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
322   }
323   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
324 
325   /* Fill in the diagonal offsets for the rows which store their own data */
326   ptr = 0;
327   for (i=0; i<B->nrows; i++) {
328     if (used[i]) {
329       B->icols[i] = &B->alloc_icol[ptr];
330       icols = &icol_in[irow_in[i]];
331       row_nnz = B->row_nnz[i];
332       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
333         for (j=0; j<row_nnz; j++) {
334           B->icols[i][j] = icols[j];
335         }
336       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
337         for (j=0; j<row_nnz; j++) {
338           B->icols[i][j] = icols[j]-i;
339         }
340       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
341         for (j=0; j<row_nnz; j++) {
342           B->icols[i][j] = icols[j]-icols[0];
343         }
344       }
345       ptr += B->row_nnz[i];
346     }
347   }
348 
349   /* Point to the right places for all data */
350   for (i=0; i<nrows; i++) {
351     B->icols[i] = B->icols[ipoint[i]];
352   }
353   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
354   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
355 
356   ierr=PetscFree(isort);CHKERRQ(ierr);
357   ierr=PetscFree(used);CHKERRQ(ierr);
358   ierr=PetscFree(ipoint);CHKERRQ(ierr);
359 
360   mem_compressed = spbas_memory_requirement(*B);
361   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
362   PetscFunctionReturn(0);
363 }
364 
365 /*
366    spbas_incomplete_cholesky
367        Incomplete Cholesky decomposition
368 */
369 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
370 
371 /*
372   spbas_delete : de-allocate the arrays owned by this matrix
373 */
374 #undef __FUNCT__
375 #define __FUNCT__ "spbas_delete"
376 PetscErrorCode spbas_delete(spbas_matrix matrix)
377 {
378   PetscInt       i;
379   PetscErrorCode ierr;
380 
381   PetscFunctionBegin;
382   if (matrix.block_data) {
383     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
384     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
385   } else {
386     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
387     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
388     if (matrix.values) {
389       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
390     }
391   }
392 
393   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
394   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
395   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
396   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
397   PetscFunctionReturn(0);
398 }
399 
400 /*
401 spbas_matrix_to_crs:
402    Convert an spbas_matrix to compessed row storage
403 */
404 #undef __FUNCT__
405 #define __FUNCT__ "spbas_matrix_to_crs"
406 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
407 {
408   PetscInt       nrows = matrix_A.nrows;
409   PetscInt       nnz   = matrix_A.nnz;
410   PetscInt       i,j,r_nnz,i0;
411   PetscInt       *irow;
412   PetscInt       *icol;
413   PetscInt       *icol_A;
414   MatScalar      *val;
415   PetscScalar    *val_A;
416   PetscInt       col_idx_type = matrix_A.col_idx_type;
417   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
422   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
423   *icol_out = icol;
424   *irow_out = irow;
425   if (do_values) {
426     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
427     *val_out = val; *icol_out = icol; *irow_out=irow;
428   }
429 
430   irow[0]=0;
431   for (i=0; i<nrows; i++) {
432     r_nnz     = matrix_A.row_nnz[i];
433     i0        = irow[i];
434     irow[i+1] = i0 + r_nnz;
435     icol_A    = matrix_A.icols[i];
436 
437     if (do_values) {
438       val_A = matrix_A.values[i];
439       for (j=0; j<r_nnz; j++) {
440         icol[i0+j] = icol_A[j];
441         val[i0+j]  = val_A[j];
442       }
443     } else {
444       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
445     }
446 
447     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
448       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
449     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
450       i0 = matrix_A.icol0[i];
451       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 
458 /*
459     spbas_transpose
460        return the transpose of a matrix
461 */
462 #undef __FUNCT__
463 #define __FUNCT__ "spbas_transpose"
464 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
465 {
466   PetscInt       col_idx_type = in_matrix.col_idx_type;
467   PetscInt       nnz          = in_matrix.nnz;
468   PetscInt       ncols        = in_matrix.nrows;
469   PetscInt       nrows        = in_matrix.ncols;
470   PetscInt       i,j,k;
471   PetscInt       r_nnz;
472   PetscInt       *irow;
473   PetscInt       icol0 = 0;
474   PetscScalar    * val;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   /* Copy input values */
479   result->nrows        = nrows;
480   result->ncols        = ncols;
481   result->nnz          = nnz;
482   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
483   result->block_data   = PETSC_TRUE;
484 
485   /* Allocate sparseness pattern */
486   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
487 
488   /*  Count the number of nonzeros in each row */
489   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
490 
491   for (i=0; i<ncols; i++) {
492     r_nnz = in_matrix.row_nnz[i];
493     irow  = in_matrix.icols[i];
494     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
495       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
496     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
497       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
498     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
499       icol0=in_matrix.icol0[i];
500       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
501     }
502   }
503 
504   /* Set the pointers to the data */
505   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
506 
507   /* Reset the number of nonzeros in each row */
508   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
509 
510   /* Fill the data arrays */
511   if (in_matrix.values) {
512     for (i=0; i<ncols; i++) {
513       r_nnz = in_matrix.row_nnz[i];
514       irow  = in_matrix.icols[i];
515       val   = in_matrix.values[i];
516 
517       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
518       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
519       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
520       for (j=0; j<r_nnz; j++)  {
521         k = icol0 + irow[j];
522         result->icols[k][result->row_nnz[k]]  = i;
523         result->values[k][result->row_nnz[k]] = val[j];
524         result->row_nnz[k]++;
525       }
526     }
527   } else {
528     for (i=0; i<ncols; i++) {
529       r_nnz = in_matrix.row_nnz[i];
530       irow  = in_matrix.icols[i];
531 
532       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
533       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
534       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
535 
536       for (j=0; j<r_nnz; j++) {
537         k = icol0 + irow[j];
538         result->icols[k][result->row_nnz[k]] = i;
539         result->row_nnz[k]++;
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 /*
547    spbas_mergesort
548 
549       mergesort for an array of intergers and an array of associated
550       reals
551 
552       on output, icol[0..nnz-1] is increasing;
553                   val[0..nnz-1] has undergone the same permutation as icol
554 
555       NB: val may be NULL: in that case, only the integers are sorted
556 
557 */
558 #undef __FUNCT__
559 #define __FUNCT__ "spbas_mergesort"
560 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
561 {
562   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
563   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
564   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
565   PetscInt       *ialloc;     /* Allocated arrays */
566   PetscScalar    *valloc=NULL;
567   PetscInt       *iswap;      /* auxiliary pointers for swapping */
568   PetscScalar    *vswap;
569   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
570   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
571   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
572   PetscScalar    *vhlp2=NULL;
573   PetscErrorCode ierr;
574 
575   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
576   ihlp1 = ialloc;
577   ihlp2 = icol;
578 
579   if (val) {
580     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
581     vhlp1 = valloc;
582     vhlp2 = val;
583   }
584 
585 
586   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
587   for (istep=1; istep<nnz; istep*=2) {
588     /*
589       Combine sorted parts
590           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
591       of ihlp2 and vhlp2
592 
593       into one sorted part
594           istart:istart+2*istep-1
595       of ihlp1 and vhlp1
596     */
597     for (istart=0; istart<nnz; istart+=2*istep) {
598       /* Set counters and bound array part endings */
599       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
600       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
601 
602       /* Merge the two array parts */
603       if (val) {
604         for (i=istart; i<i2end; i++) {
605           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
606             ihlp1[i] = ihlp2[i1];
607             vhlp1[i] = vhlp2[i1];
608             i1++;
609           } else if (i2<i2end) {
610             ihlp1[i] = ihlp2[i2];
611             vhlp1[i] = vhlp2[i2];
612             i2++;
613           } else {
614             ihlp1[i] = ihlp2[i1];
615             vhlp1[i] = vhlp2[i1];
616             i1++;
617           }
618         }
619       } else {
620         for (i=istart; i<i2end; i++) {
621           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
622             ihlp1[i] = ihlp2[i1];
623             i1++;
624           } else if (i2<i2end) {
625             ihlp1[i] = ihlp2[i2];
626             i2++;
627           } else {
628             ihlp1[i] = ihlp2[i1];
629             i1++;
630           }
631         }
632       }
633     }
634 
635     /* Swap the two array sets */
636     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
637     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
638   }
639 
640   /* Copy one more time in case the sorted arrays are the temporary ones */
641   if (ihlp2 != icol) {
642     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
643     if (val) {
644       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
645     }
646   }
647 
648   ierr = PetscFree(ialloc);CHKERRQ(ierr);
649   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
650   PetscFunctionReturn(0);
651 }
652 
653 /*
654   spbas_apply_reordering_rows:
655     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
656 */
657 #undef __FUNCT__
658 #define __FUNCT__ "spbas_apply_reordering_rows"
659 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
660 {
661   PetscInt       i,j,ip;
662   PetscInt       nrows=matrix_A->nrows;
663   PetscInt       * row_nnz;
664   PetscInt       **icols;
665   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
666   PetscScalar    **vals    = NULL;
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
671 
672   if (do_values) {
673     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
674   }
675   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
676   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
677 
678   for (i=0; i<nrows; i++) {
679     ip = permutation[i];
680     if (do_values) vals[i] = matrix_A->values[ip];
681     icols[i]   = matrix_A->icols[ip];
682     row_nnz[i] = matrix_A->row_nnz[ip];
683     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
684   }
685 
686   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
687   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
688   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
689 
690   if (do_values) matrix_A->values = vals;
691   matrix_A->icols   = icols;
692   matrix_A->row_nnz = row_nnz;
693   PetscFunctionReturn(0);
694 }
695 
696 
697 /*
698   spbas_apply_reordering_cols:
699     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
700 */
701 #undef __FUNCT__
702 #define __FUNCT__ "spbas_apply_reordering_cols"
703 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
704 {
705   PetscInt       i,j;
706   PetscInt       nrows=matrix_A->nrows;
707   PetscInt       row_nnz;
708   PetscInt       *icols;
709   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
710   PetscScalar    *vals     = NULL;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
715 
716   for (i=0; i<nrows; i++) {
717     icols   = matrix_A->icols[i];
718     row_nnz = matrix_A->row_nnz[i];
719     if (do_values) vals = matrix_A->values[i];
720 
721     for (j=0; j<row_nnz; j++) {
722       icols[j] = permutation[i+icols[j]]-i;
723     }
724     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 /*
730   spbas_apply_reordering:
731     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
732 */
733 #undef __FUNCT__
734 #define __FUNCT__ "spbas_apply_reordering"
735 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
736 {
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
741   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "spbas_pattern_only"
747 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
748 {
749   spbas_matrix   retval;
750   PetscInt       i, j, i0, r_nnz;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Copy input values */
755   retval.nrows = nrows;
756   retval.ncols = ncols;
757   retval.nnz   = ai[nrows];
758 
759   retval.block_data   = PETSC_TRUE;
760   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
761 
762   /* Allocate output matrix */
763   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
764   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
765   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
766   /* Copy the structure */
767   for (i = 0; i<retval.nrows; i++)  {
768     i0    = ai[i];
769     r_nnz = ai[i+1]-i0;
770 
771     for (j=0; j<r_nnz; j++) {
772       retval.icols[i][j] = aj[i0+j]-i;
773     }
774   }
775   *result = retval;
776   PetscFunctionReturn(0);
777 }
778 
779 
780 /*
781    spbas_mark_row_power:
782       Mark the columns in row 'row' which are nonzero in
783           matrix^2log(marker).
784 */
785 #undef __FUNCT__
786 #define __FUNCT__ "spbas_mark_row_power"
787 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
788                                     PetscInt row,                /* row for which the columns are marked */
789                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
790                                     PetscInt marker,             /* marker-value: 2^power */
791                                     PetscInt minmrk,             /* lower bound for marked points */
792                                     PetscInt maxmrk)             /* upper bound for marked points */
793 {
794   PetscErrorCode ierr;
795   PetscInt       i,j, nnz;
796 
797   PetscFunctionBegin;
798   nnz = in_matrix->row_nnz[row];
799 
800   /* For higher powers, call this function recursively */
801   if (marker>1) {
802     for (i=0; i<nnz; i++) {
803       j = row + in_matrix->icols[row][i];
804       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
805         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
806         iwork[j] |= marker;
807       }
808     }
809   } else {
810     /*  Mark the columns reached */
811     for (i=0; i<nnz; i++)  {
812       j = row + in_matrix->icols[row][i];
813       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
814     }
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 
820 /*
821    spbas_power
822       Calculate sparseness patterns for incomplete Cholesky decompositions
823       of a given order: (almost) all nonzeros of the matrix^(order+1) which
824       are inside the band width are found and stored in the output sparseness
825       pattern.
826 */
827 #undef __FUNCT__
828 #define __FUNCT__ "spbas_power"
829 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
830 {
831   spbas_matrix   retval;
832   PetscInt       nrows = in_matrix.nrows;
833   PetscInt       ncols = in_matrix.ncols;
834   PetscInt       i, j, kend;
835   PetscInt       nnz, inz;
836   PetscInt       *iwork;
837   PetscInt       marker;
838   PetscInt       maxmrk=0;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
843   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
844   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
845   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
846 
847   /* Copy input values*/
848   retval.nrows        = ncols;
849   retval.ncols        = nrows;
850   retval.nnz          = 0;
851   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
852   retval.block_data   = PETSC_FALSE;
853 
854   /* Allocate sparseness pattern */
855   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
856 
857   /* Allocate marker array */
858   ierr = PetscMalloc1(nrows, &iwork);CHKERRQ(ierr);
859 
860   /* Erase the pattern for this row */
861   ierr = PetscMemzero((void*) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
862 
863   /* Calculate marker values */
864   marker = 1; for (i=1; i<power; i++) marker*=2;
865 
866   for (i=0; i<nrows; i++)  {
867     /* Calculate the pattern for each row */
868 
869     nnz  = in_matrix.row_nnz[i];
870     kend = i+in_matrix.icols[i][nnz-1];
871     if (maxmrk<=kend) maxmrk=kend+1;
872     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
873 
874     /* Count the columns*/
875     nnz = 0;
876     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
877 
878     /* Allocate the column indices */
879     retval.row_nnz[i] = nnz;
880     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
881 
882     /* Administrate the column indices */
883     inz = 0;
884     for (j=i; j<maxmrk; j++) {
885       if (iwork[j]) {
886         retval.icols[i][inz] = j-i;
887         inz++;
888         iwork[j]=0;
889       }
890     }
891     retval.nnz += nnz;
892   };
893   ierr    = PetscFree(iwork);CHKERRQ(ierr);
894   *result = retval;
895   PetscFunctionReturn(0);
896 }
897 
898 
899 
900 /*
901    spbas_keep_upper:
902       remove the lower part of the matrix: keep the upper part
903 */
904 #undef __FUNCT__
905 #define __FUNCT__ "spbas_keep_upper"
906 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
907 {
908   PetscInt i, j;
909   PetscInt jstart;
910 
911   PetscFunctionBegin;
912   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
913   for (i=0; i<inout_matrix->nrows; i++)  {
914     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
915     if (jstart>0) {
916       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
917         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
918       }
919 
920       if (inout_matrix->values) {
921         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
922           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
923         }
924       }
925 
926       inout_matrix->row_nnz[i] -= jstart;
927 
928       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
929 
930       if (inout_matrix->values) {
931         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
932       }
933       inout_matrix->nnz -= jstart;
934     }
935   }
936   PetscFunctionReturn(0);
937 }
938 
939