xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision fa2bb9fe3cba50bac3b093e83d6f95e65b517df9)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <../src/mat/impls/aij/seq/bas/spbas.h>
3 
4 /*MC
5   MATSOLVERBAS -  Provides ICC(k) with drop tolerance
6 
7   Works with MATAIJ  matrices
8 
9   Options Database Keys:
10 + -pc_factor_levels <l> - number of levels of fill
11 - -pc_factor_drop_tolerance - is not currently hooked up to do anything
12 
13   Level: intermediate
14 
15    Contributed by: Bas van 't Hof
16 
17    Notes:
18     Since this currently hooked up to use drop tolerance it should produce the same factors and hence convergence as the PETSc ICC, for higher
19      levels of fill it does not. This needs to be investigated. Unless you are interested in drop tolerance ICC and willing to work through the code
20      we recommend not using this functionality.
21 
22 .seealso: PCFactorSetMatSolverType(), MatSolverType, PCFactorSetLevels(), PCFactorSetDropTolerance()
23 
24 M*/
25 
26 /*
27   spbas_memory_requirement:
28     Calculate the number of bytes needed to store tha matrix
29 */
30 size_t spbas_memory_requirement(spbas_matrix matrix)
31 {
32   size_t memreq = 6 * sizeof(PetscInt)  + /* nrows, ncols, nnz, n_alloc_icol, n_alloc_val, col_idx_type */
33                     sizeof(PetscBool)               + /* block_data */
34                     sizeof(PetscScalar**)           + /* values */
35                     sizeof(PetscScalar*)            + /* alloc_val */
36                     2 * sizeof(PetscInt**)          + /* icols, icols0 */
37                     2 * sizeof(PetscInt*)           + /* row_nnz, alloc_icol */
38                     matrix.nrows * sizeof(PetscInt) + /* row_nnz[*] */
39                     matrix.nrows * sizeof(PetscInt*); /* icols[*] */
40 
41   /* icol0[*] */
42   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) memreq += matrix.nrows * sizeof(PetscInt);
43 
44   /* icols[*][*] */
45   if (matrix.block_data) memreq += matrix.n_alloc_icol * sizeof(PetscInt);
46   else memreq += matrix.nnz * sizeof(PetscInt);
47 
48   if (matrix.values) {
49     memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
50     /* values[*][*] */
51     if (matrix.block_data) memreq += matrix.n_alloc_val  * sizeof(PetscScalar);
52     else memreq += matrix.nnz * sizeof(PetscScalar);
53   }
54   return memreq;
55 }
56 
57 /*
58   spbas_allocate_pattern:
59     allocate the pattern arrays row_nnz, icols and optionally values
60 */
61 PetscErrorCode spbas_allocate_pattern(spbas_matrix * result, PetscBool do_values)
62 {
63   PetscErrorCode ierr;
64   PetscInt       nrows        = result->nrows;
65   PetscInt       col_idx_type = result->col_idx_type;
66 
67   PetscFunctionBegin;
68   /* Allocate sparseness pattern */
69   ierr = PetscMalloc1(nrows,&result->row_nnz);CHKERRQ(ierr);
70   ierr = PetscMalloc1(nrows,&result->icols);CHKERRQ(ierr);
71 
72   /* If offsets are given wrt an array, create array */
73   if (col_idx_type == SPBAS_OFFSET_ARRAY) {
74     ierr = PetscMalloc1(nrows,&result->icol0);CHKERRQ(ierr);
75   } else  {
76     result->icol0 = NULL;
77   }
78 
79   /* If values are given, allocate values array */
80   if (do_values)  {
81     ierr = PetscMalloc1(nrows,&result->values);CHKERRQ(ierr);
82   } else {
83     result->values = NULL;
84   }
85   PetscFunctionReturn(0);
86 }
87 
88 /*
89 spbas_allocate_data:
90    in case of block_data:
91        Allocate the data arrays alloc_icol and optionally alloc_val,
92        set appropriate pointers from icols and values;
93    in case of !block_data:
94        Allocate the arrays icols[i] and optionally values[i]
95 */
96 PetscErrorCode spbas_allocate_data(spbas_matrix * result)
97 {
98   PetscInt       i;
99   PetscInt       nnz   = result->nnz;
100   PetscInt       nrows = result->nrows;
101   PetscInt       r_nnz;
102   PetscErrorCode ierr;
103   PetscBool      do_values  = (result->values) ? PETSC_TRUE : PETSC_FALSE;
104   PetscBool      block_data = result->block_data;
105 
106   PetscFunctionBegin;
107   if (block_data) {
108     /* Allocate the column number array and point to it */
109     result->n_alloc_icol = nnz;
110 
111     ierr = PetscMalloc1(nnz, &result->alloc_icol);CHKERRQ(ierr);
112 
113     result->icols[0] = result->alloc_icol;
114     for (i=1; i<nrows; i++)  {
115       result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116     }
117 
118     /* Allocate the value array and point to it */
119     if (do_values) {
120       result->n_alloc_val = nnz;
121 
122       ierr = PetscMalloc1(nnz, &result->alloc_val);CHKERRQ(ierr);
123 
124       result->values[0] = result->alloc_val;
125       for (i=1; i<nrows; i++) {
126         result->values[i] = result->values[i-1] + result->row_nnz[i-1];
127       }
128     }
129   } else {
130     for (i=0; i<nrows; i++)   {
131       r_nnz = result->row_nnz[i];
132       ierr  = PetscMalloc1(r_nnz, &result->icols[i]);CHKERRQ(ierr);
133     }
134     if (do_values) {
135       for (i=0; i<nrows; i++)  {
136         r_nnz = result->row_nnz[i];
137         ierr  = PetscMalloc1(r_nnz, &result->values[i]);CHKERRQ(ierr);
138       }
139     }
140   }
141   PetscFunctionReturn(0);
142 }
143 
144 /*
145   spbas_row_order_icol
146      determine if row i1 should come
147        + before row i2 in the sorted rows (return -1),
148        + after (return 1)
149        + is identical (return 0).
150 */
151 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
152 {
153   PetscInt j;
154   PetscInt nnz1    = irow_in[i1+1] - irow_in[i1];
155   PetscInt nnz2    = irow_in[i2+1] - irow_in[i2];
156   PetscInt * icol1 = &icol_in[irow_in[i1]];
157   PetscInt * icol2 = &icol_in[irow_in[i2]];
158 
159   if (nnz1<nnz2) return -1;
160   if (nnz1>nnz2) return 1;
161 
162   if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
163     for (j=0; j<nnz1; j++) {
164       if (icol1[j]< icol2[j]) return -1;
165       if (icol1[j]> icol2[j]) return 1;
166     }
167   } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
168     for (j=0; j<nnz1; j++) {
169       if (icol1[j]-i1< icol2[j]-i2) return -1;
170       if (icol1[j]-i1> icol2[j]-i2) return 1;
171     }
172   } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
173     for (j=1; j<nnz1; j++) {
174       if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) return -1;
175       if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) return 1;
176     }
177   }
178   return 0;
179 }
180 
181 /*
182   spbas_mergesort_icols:
183     return a sorting of the rows in which identical sparseness patterns are
184     next to each other
185 */
186 PetscErrorCode spbas_mergesort_icols(PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
187 {
188   PetscErrorCode ierr;
189   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
190   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
191   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
192   PetscInt       *ialloc;     /* Allocated arrays */
193   PetscInt       *iswap;      /* auxiliary pointers for swapping */
194   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
195   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
196 
197   PetscFunctionBegin;
198   ierr = PetscMalloc1(nrows,&ialloc);CHKERRQ(ierr);
199 
200   ihlp1 = ialloc;
201   ihlp2 = isort;
202 
203   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
204   for (istep=1; istep<nrows; istep*=2) {
205     /*
206       Combine sorted parts
207           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
208       of ihlp2 and vhlp2
209 
210       into one sorted part
211           istart:istart+2*istep-1
212       of ihlp1 and vhlp1
213     */
214     for (istart=0; istart<nrows; istart+=2*istep) {
215       /* Set counters and bound array part endings */
216       i1=istart;        i1end = i1+istep;  if (i1end>nrows) i1end=nrows;
217       i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) i2end=nrows;
218 
219       /* Merge the two array parts */
220       for (i=istart; i<i2end; i++)  {
221         if (i1<i1end && i2<i2end && spbas_row_order_icol(ihlp2[i1], ihlp2[i2], irow_in, icol_in, col_idx_type) < 0) {
222           ihlp1[i] = ihlp2[i1];
223           i1++;
224         } else if (i2<i2end) {
225           ihlp1[i] = ihlp2[i2];
226           i2++;
227         } else {
228           ihlp1[i] = ihlp2[i1];
229           i1++;
230         }
231       }
232     }
233 
234     /* Swap the two array sets */
235     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
236   }
237 
238   /* Copy one more time in case the sorted arrays are the temporary ones */
239   if (ihlp2 != isort) {
240     for (i=0; i<nrows; i++) isort[i] = ihlp2[i];
241   }
242   ierr = PetscFree(ialloc);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 
247 
248 /*
249   spbas_compress_pattern:
250      calculate a compressed sparseness pattern for a sparseness pattern
251      given in compressed row storage. The compressed sparseness pattern may
252      require (much) less memory.
253 */
254 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols, PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
255 {
256   PetscInt        nnz      = irow_in[nrows];
257   size_t          mem_orig = (nrows + nnz) * sizeof(PetscInt);
258   size_t          mem_compressed;
259   PetscErrorCode  ierr;
260   PetscInt        *isort;
261   PetscInt        *icols;
262   PetscInt        row_nnz;
263   PetscInt        *ipoint;
264   PetscBool       *used;
265   PetscInt        ptr;
266   PetscInt        i,j;
267   const PetscBool no_values = PETSC_FALSE;
268 
269   PetscFunctionBegin;
270   /* Allocate the structure of the new matrix */
271   B->nrows        = nrows;
272   B->ncols        = ncols;
273   B->nnz          = nnz;
274   B->col_idx_type = col_idx_type;
275   B->block_data   = PETSC_TRUE;
276 
277   ierr = spbas_allocate_pattern(B, no_values);CHKERRQ(ierr);
278 
279   /* When using an offset array, set it */
280   if (col_idx_type==SPBAS_OFFSET_ARRAY)  {
281     for (i=0; i<nrows; i++) B->icol0[i] = icol_in[irow_in[i]];
282   }
283 
284   /* Allocate the ordering for the rows */
285   ierr = PetscMalloc1(nrows,&isort);CHKERRQ(ierr);
286   ierr = PetscMalloc1(nrows,&ipoint);CHKERRQ(ierr);
287   ierr = PetscMalloc1(nrows,&used);CHKERRQ(ierr);
288 
289   /*  Initialize the sorting */
290   ierr = PetscMemzero((void*) used, nrows*sizeof(PetscBool));CHKERRQ(ierr);
291   for (i = 0; i<nrows; i++)  {
292     B->row_nnz[i] = irow_in[i+1]-irow_in[i];
293     isort[i]      = i;
294     ipoint[i]     = i;
295   }
296 
297   /* Sort the rows so that identical columns will be next to each other */
298   ierr = spbas_mergesort_icols(nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
299   ierr = PetscInfo(NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
300 
301   /* Replace identical rows with the first one in the list */
302   for (i=1; i<nrows; i++) {
303     if (spbas_row_order_icol(isort[i-1], isort[i], irow_in, icol_in, col_idx_type) == 0) {
304       ipoint[isort[i]] = ipoint[isort[i-1]];
305     }
306   }
307 
308   /* Collect the rows which are used*/
309   for (i=0; i<nrows; i++) used[ipoint[i]] = PETSC_TRUE;
310 
311   /* Calculate needed memory */
312   B->n_alloc_icol = 0;
313   for (i=0; i<nrows; i++)  {
314     if (used[i]) B->n_alloc_icol += B->row_nnz[i];
315   }
316   ierr = PetscMalloc1(B->n_alloc_icol,&B->alloc_icol);CHKERRQ(ierr);
317 
318   /* Fill in the diagonal offsets for the rows which store their own data */
319   ptr = 0;
320   for (i=0; i<B->nrows; i++) {
321     if (used[i]) {
322       B->icols[i] = &B->alloc_icol[ptr];
323       icols = &icol_in[irow_in[i]];
324       row_nnz = B->row_nnz[i];
325       if (col_idx_type == SPBAS_COLUMN_NUMBERS) {
326         for (j=0; j<row_nnz; j++) {
327           B->icols[i][j] = icols[j];
328         }
329       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
330         for (j=0; j<row_nnz; j++) {
331           B->icols[i][j] = icols[j]-i;
332         }
333       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
334         for (j=0; j<row_nnz; j++) {
335           B->icols[i][j] = icols[j]-icols[0];
336         }
337       }
338       ptr += B->row_nnz[i];
339     }
340   }
341 
342   /* Point to the right places for all data */
343   for (i=0; i<nrows; i++) {
344     B->icols[i] = B->icols[ipoint[i]];
345   }
346   ierr = PetscInfo(NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
347   ierr = PetscInfo1(NULL,"         (%g nonzeros per row)\n", (double) ((PetscReal) nnz / (PetscReal) nrows));CHKERRQ(ierr);
348 
349   ierr=PetscFree(isort);CHKERRQ(ierr);
350   ierr=PetscFree(used);CHKERRQ(ierr);
351   ierr=PetscFree(ipoint);CHKERRQ(ierr);
352 
353   mem_compressed = spbas_memory_requirement(*B);
354   *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/ (PetscReal) mem_orig;
355   PetscFunctionReturn(0);
356 }
357 
358 /*
359    spbas_incomplete_cholesky
360        Incomplete Cholesky decomposition
361 */
362 #include <../src/mat/impls/aij/seq/bas/spbas_cholesky.h>
363 
364 /*
365   spbas_delete : de-allocate the arrays owned by this matrix
366 */
367 PetscErrorCode spbas_delete(spbas_matrix matrix)
368 {
369   PetscInt       i;
370   PetscErrorCode ierr;
371 
372   PetscFunctionBegin;
373   if (matrix.block_data) {
374     ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr);
375     if (matrix.values) {ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
376   } else {
377     for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
378     ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
379     if (matrix.values) {
380       for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
381     }
382   }
383 
384   ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
385   ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
386   if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
387   ierr=PetscFree(matrix.values);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 /*
392 spbas_matrix_to_crs:
393    Convert an spbas_matrix to compessed row storage
394 */
395 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
396 {
397   PetscInt       nrows = matrix_A.nrows;
398   PetscInt       nnz   = matrix_A.nnz;
399   PetscInt       i,j,r_nnz,i0;
400   PetscInt       *irow;
401   PetscInt       *icol;
402   PetscInt       *icol_A;
403   MatScalar      *val;
404   PetscScalar    *val_A;
405   PetscInt       col_idx_type = matrix_A.col_idx_type;
406   PetscBool      do_values    = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   ierr      = PetscMalloc1(nrows+1, &irow);CHKERRQ(ierr);
411   ierr      = PetscMalloc1(nnz, &icol);CHKERRQ(ierr);
412   *icol_out = icol;
413   *irow_out = irow;
414   if (do_values) {
415     ierr     = PetscMalloc1(nnz, &val);CHKERRQ(ierr);
416     *val_out = val; *icol_out = icol; *irow_out=irow;
417   }
418 
419   irow[0]=0;
420   for (i=0; i<nrows; i++) {
421     r_nnz     = matrix_A.row_nnz[i];
422     i0        = irow[i];
423     irow[i+1] = i0 + r_nnz;
424     icol_A    = matrix_A.icols[i];
425 
426     if (do_values) {
427       val_A = matrix_A.values[i];
428       for (j=0; j<r_nnz; j++) {
429         icol[i0+j] = icol_A[j];
430         val[i0+j]  = val_A[j];
431       }
432     } else {
433       for (j=0; j<r_nnz; j++) icol[i0+j] = icol_A[j];
434     }
435 
436     if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
437       for (j=0; j<r_nnz; j++) icol[i0+j] += i;
438     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
439       i0 = matrix_A.icol0[i];
440       for (j=0; j<r_nnz; j++) icol[i0+j] += i0;
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 
447 /*
448     spbas_transpose
449        return the transpose of a matrix
450 */
451 PetscErrorCode spbas_transpose(spbas_matrix in_matrix, spbas_matrix * result)
452 {
453   PetscInt       col_idx_type = in_matrix.col_idx_type;
454   PetscInt       nnz          = in_matrix.nnz;
455   PetscInt       ncols        = in_matrix.nrows;
456   PetscInt       nrows        = in_matrix.ncols;
457   PetscInt       i,j,k;
458   PetscInt       r_nnz;
459   PetscInt       *irow;
460   PetscInt       icol0 = 0;
461   PetscScalar    * val;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   /* Copy input values */
466   result->nrows        = nrows;
467   result->ncols        = ncols;
468   result->nnz          = nnz;
469   result->col_idx_type = SPBAS_COLUMN_NUMBERS;
470   result->block_data   = PETSC_TRUE;
471 
472   /* Allocate sparseness pattern */
473   ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
474 
475   /*  Count the number of nonzeros in each row */
476   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
477 
478   for (i=0; i<ncols; i++) {
479     r_nnz = in_matrix.row_nnz[i];
480     irow  = in_matrix.icols[i];
481     if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
482       for (j=0; j<r_nnz; j++) result->row_nnz[irow[j]]++;
483     } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
484       for (j=0; j<r_nnz; j++) result->row_nnz[i+irow[j]]++;
485     } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
486       icol0=in_matrix.icol0[i];
487       for (j=0; j<r_nnz; j++) result->row_nnz[icol0+irow[j]]++;
488     }
489   }
490 
491   /* Set the pointers to the data */
492   ierr = spbas_allocate_data(result);CHKERRQ(ierr);
493 
494   /* Reset the number of nonzeros in each row */
495   for (i = 0; i<nrows; i++) result->row_nnz[i] = 0;
496 
497   /* Fill the data arrays */
498   if (in_matrix.values) {
499     for (i=0; i<ncols; i++) {
500       r_nnz = in_matrix.row_nnz[i];
501       irow  = in_matrix.icols[i];
502       val   = in_matrix.values[i];
503 
504       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0 = 0;
505       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0 = i;
506       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0 = in_matrix.icol0[i];
507       for (j=0; j<r_nnz; j++)  {
508         k = icol0 + irow[j];
509         result->icols[k][result->row_nnz[k]]  = i;
510         result->values[k][result->row_nnz[k]] = val[j];
511         result->row_nnz[k]++;
512       }
513     }
514   } else {
515     for (i=0; i<ncols; i++) {
516       r_nnz = in_matrix.row_nnz[i];
517       irow  = in_matrix.icols[i];
518 
519       if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   icol0=0;
520       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) icol0=i;
521       else if (col_idx_type == SPBAS_OFFSET_ARRAY)     icol0=in_matrix.icol0[i];
522 
523       for (j=0; j<r_nnz; j++) {
524         k = icol0 + irow[j];
525         result->icols[k][result->row_nnz[k]] = i;
526         result->row_nnz[k]++;
527       }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 /*
534    spbas_mergesort
535 
536       mergesort for an array of integers and an array of associated
537       reals
538 
539       on output, icol[0..nnz-1] is increasing;
540                   val[0..nnz-1] has undergone the same permutation as icol
541 
542       NB: val may be NULL: in that case, only the integers are sorted
543 
544 */
545 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
546 {
547   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
548   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
549   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
550   PetscInt       *ialloc;     /* Allocated arrays */
551   PetscScalar    *valloc=NULL;
552   PetscInt       *iswap;      /* auxiliary pointers for swapping */
553   PetscScalar    *vswap;
554   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
555   PetscScalar    *vhlp1=NULL;  /* (arrays under construction) */
556   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
557   PetscScalar    *vhlp2=NULL;
558   PetscErrorCode ierr;
559 
560   ierr  = PetscMalloc1(nnz,&ialloc);CHKERRQ(ierr);
561   ihlp1 = ialloc;
562   ihlp2 = icol;
563 
564   if (val) {
565     ierr  = PetscMalloc1(nnz,&valloc);CHKERRQ(ierr);
566     vhlp1 = valloc;
567     vhlp2 = val;
568   }
569 
570 
571   /* Sorted array chunks are first 1 long, and increase until they are the complete array */
572   for (istep=1; istep<nnz; istep*=2) {
573     /*
574       Combine sorted parts
575           istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
576       of ihlp2 and vhlp2
577 
578       into one sorted part
579           istart:istart+2*istep-1
580       of ihlp1 and vhlp1
581     */
582     for (istart=0; istart<nnz; istart+=2*istep) {
583       /* Set counters and bound array part endings */
584       i1=istart;        i1end = i1+istep;  if (i1end>nnz) i1end=nnz;
585       i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) i2end=nnz;
586 
587       /* Merge the two array parts */
588       if (val) {
589         for (i=istart; i<i2end; i++) {
590           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
591             ihlp1[i] = ihlp2[i1];
592             vhlp1[i] = vhlp2[i1];
593             i1++;
594           } else if (i2<i2end) {
595             ihlp1[i] = ihlp2[i2];
596             vhlp1[i] = vhlp2[i2];
597             i2++;
598           } else {
599             ihlp1[i] = ihlp2[i1];
600             vhlp1[i] = vhlp2[i1];
601             i1++;
602           }
603         }
604       } else {
605         for (i=istart; i<i2end; i++) {
606           if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
607             ihlp1[i] = ihlp2[i1];
608             i1++;
609           } else if (i2<i2end) {
610             ihlp1[i] = ihlp2[i2];
611             i2++;
612           } else {
613             ihlp1[i] = ihlp2[i1];
614             i1++;
615           }
616         }
617       }
618     }
619 
620     /* Swap the two array sets */
621     iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
622     vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
623   }
624 
625   /* Copy one more time in case the sorted arrays are the temporary ones */
626   if (ihlp2 != icol) {
627     for (i=0; i<nnz; i++) icol[i] = ihlp2[i];
628     if (val) {
629       for (i=0; i<nnz; i++) val[i] = vhlp2[i];
630     }
631   }
632 
633   ierr = PetscFree(ialloc);CHKERRQ(ierr);
634   if (val) {ierr = PetscFree(valloc);CHKERRQ(ierr);}
635   PetscFunctionReturn(0);
636 }
637 
638 /*
639   spbas_apply_reordering_rows:
640     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
641 */
642 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
643 {
644   PetscInt       i,j,ip;
645   PetscInt       nrows=matrix_A->nrows;
646   PetscInt       * row_nnz;
647   PetscInt       **icols;
648   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
649   PetscScalar    **vals    = NULL;
650   PetscErrorCode ierr;
651 
652   PetscFunctionBegin;
653   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
654 
655   if (do_values) {
656     ierr = PetscMalloc1(nrows, &vals);CHKERRQ(ierr);
657   }
658   ierr = PetscMalloc1(nrows, &row_nnz);CHKERRQ(ierr);
659   ierr = PetscMalloc1(nrows, &icols);CHKERRQ(ierr);
660 
661   for (i=0; i<nrows; i++) {
662     ip = permutation[i];
663     if (do_values) vals[i] = matrix_A->values[ip];
664     icols[i]   = matrix_A->icols[ip];
665     row_nnz[i] = matrix_A->row_nnz[ip];
666     for (j=0; j<row_nnz[i]; j++) icols[i][j] += ip-i;
667   }
668 
669   if (do_values) { ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
670   ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
671   ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
672 
673   if (do_values) matrix_A->values = vals;
674   matrix_A->icols   = icols;
675   matrix_A->row_nnz = row_nnz;
676   PetscFunctionReturn(0);
677 }
678 
679 
680 /*
681   spbas_apply_reordering_cols:
682     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
683 */
684 PetscErrorCode spbas_apply_reordering_cols(spbas_matrix *matrix_A,const PetscInt *permutation)
685 {
686   PetscInt       i,j;
687   PetscInt       nrows=matrix_A->nrows;
688   PetscInt       row_nnz;
689   PetscInt       *icols;
690   PetscBool      do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
691   PetscScalar    *vals     = NULL;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBegin;
695   if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "must have diagonal offsets in pattern\n");
696 
697   for (i=0; i<nrows; i++) {
698     icols   = matrix_A->icols[i];
699     row_nnz = matrix_A->row_nnz[i];
700     if (do_values) vals = matrix_A->values[i];
701 
702     for (j=0; j<row_nnz; j++) {
703       icols[j] = permutation[i+icols[j]]-i;
704     }
705     ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
706   }
707   PetscFunctionReturn(0);
708 }
709 
710 /*
711   spbas_apply_reordering:
712     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
713 */
714 PetscErrorCode spbas_apply_reordering(spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
715 {
716   PetscErrorCode ierr;
717 
718   PetscFunctionBegin;
719   ierr = spbas_apply_reordering_rows(matrix_A, inv_perm);CHKERRQ(ierr);
720   ierr = spbas_apply_reordering_cols(matrix_A, permutation);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
725 {
726   spbas_matrix   retval;
727   PetscInt       i, j, i0, r_nnz;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   /* Copy input values */
732   retval.nrows = nrows;
733   retval.ncols = ncols;
734   retval.nnz   = ai[nrows];
735 
736   retval.block_data   = PETSC_TRUE;
737   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
738 
739   /* Allocate output matrix */
740   ierr = spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
741   for (i=0; i<nrows; i++) retval.row_nnz[i] = ai[i+1]-ai[i];
742   ierr = spbas_allocate_data(&retval);CHKERRQ(ierr);
743   /* Copy the structure */
744   for (i = 0; i<retval.nrows; i++)  {
745     i0    = ai[i];
746     r_nnz = ai[i+1]-i0;
747 
748     for (j=0; j<r_nnz; j++) {
749       retval.icols[i][j] = aj[i0+j]-i;
750     }
751   }
752   *result = retval;
753   PetscFunctionReturn(0);
754 }
755 
756 
757 /*
758    spbas_mark_row_power:
759       Mark the columns in row 'row' which are nonzero in
760           matrix^2log(marker).
761 */
762 PetscErrorCode spbas_mark_row_power(PetscInt *iwork,             /* marker-vector */
763                                     PetscInt row,                /* row for which the columns are marked */
764                                     spbas_matrix * in_matrix,    /* matrix for which the power is being  calculated */
765                                     PetscInt marker,             /* marker-value: 2^power */
766                                     PetscInt minmrk,             /* lower bound for marked points */
767                                     PetscInt maxmrk)             /* upper bound for marked points */
768 {
769   PetscErrorCode ierr;
770   PetscInt       i,j, nnz;
771 
772   PetscFunctionBegin;
773   nnz = in_matrix->row_nnz[row];
774 
775   /* For higher powers, call this function recursively */
776   if (marker>1) {
777     for (i=0; i<nnz; i++) {
778       j = row + in_matrix->icols[row][i];
779       if (minmrk<=j && j<maxmrk && iwork[j] < marker) {
780         ierr      = spbas_mark_row_power(iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
781         iwork[j] |= marker;
782       }
783     }
784   } else {
785     /*  Mark the columns reached */
786     for (i=0; i<nnz; i++)  {
787       j = row + in_matrix->icols[row][i];
788       if (minmrk<=j && j<maxmrk) iwork[j] |= 1;
789     }
790   }
791   PetscFunctionReturn(0);
792 }
793 
794 
795 /*
796    spbas_power
797       Calculate sparseness patterns for incomplete Cholesky decompositions
798       of a given order: (almost) all nonzeros of the matrix^(order+1) which
799       are inside the band width are found and stored in the output sparseness
800       pattern.
801 */
802 PetscErrorCode spbas_power(spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
803 {
804   spbas_matrix   retval;
805   PetscInt       nrows = in_matrix.nrows;
806   PetscInt       ncols = in_matrix.ncols;
807   PetscInt       i, j, kend;
808   PetscInt       nnz, inz;
809   PetscInt       *iwork;
810   PetscInt       marker;
811   PetscInt       maxmrk=0;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS,"must have diagonal offsets in pattern\n");
816   if (ncols != nrows) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Dimension error\n");
817   if (in_matrix.values) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Input array must be sparseness pattern (no values)");
818   if (power<=0) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Power must be 1 or up");
819 
820   /* Copy input values*/
821   retval.nrows        = ncols;
822   retval.ncols        = nrows;
823   retval.nnz          = 0;
824   retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
825   retval.block_data   = PETSC_FALSE;
826 
827   /* Allocate sparseness pattern */
828   ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
829 
830   /* Allocate marker array */
831   ierr = PetscMalloc1(nrows, &iwork);CHKERRQ(ierr);
832 
833   /* Erase the pattern for this row */
834   ierr = PetscMemzero((void*) iwork, retval.nrows*sizeof(PetscInt));CHKERRQ(ierr);
835 
836   /* Calculate marker values */
837   marker = 1; for (i=1; i<power; i++) marker*=2;
838 
839   for (i=0; i<nrows; i++)  {
840     /* Calculate the pattern for each row */
841 
842     nnz  = in_matrix.row_nnz[i];
843     kend = i+in_matrix.icols[i][nnz-1];
844     if (maxmrk<=kend) maxmrk=kend+1;
845     ierr = spbas_mark_row_power(iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
846 
847     /* Count the columns*/
848     nnz = 0;
849     for (j=i; j<maxmrk; j++) nnz+= (iwork[j]!=0);
850 
851     /* Allocate the column indices */
852     retval.row_nnz[i] = nnz;
853     ierr = PetscMalloc1(nnz,&retval.icols[i]);CHKERRQ(ierr);
854 
855     /* Administrate the column indices */
856     inz = 0;
857     for (j=i; j<maxmrk; j++) {
858       if (iwork[j]) {
859         retval.icols[i][inz] = j-i;
860         inz++;
861         iwork[j]=0;
862       }
863     }
864     retval.nnz += nnz;
865   };
866   ierr    = PetscFree(iwork);CHKERRQ(ierr);
867   *result = retval;
868   PetscFunctionReturn(0);
869 }
870 
871 
872 
873 /*
874    spbas_keep_upper:
875       remove the lower part of the matrix: keep the upper part
876 */
877 PetscErrorCode spbas_keep_upper(spbas_matrix * inout_matrix)
878 {
879   PetscInt i, j;
880   PetscInt jstart;
881 
882   PetscFunctionBegin;
883   if (inout_matrix->block_data) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Not yet for block data matrices\n");
884   for (i=0; i<inout_matrix->nrows; i++)  {
885     for (jstart=0; (jstart<inout_matrix->row_nnz[i]) && (inout_matrix->icols[i][jstart]<0); jstart++) {}
886     if (jstart>0) {
887       for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
888         inout_matrix->icols[i][j] = inout_matrix->icols[i][j+jstart];
889       }
890 
891       if (inout_matrix->values) {
892         for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++) {
893           inout_matrix->values[i][j] = inout_matrix->values[i][j+jstart];
894         }
895       }
896 
897       inout_matrix->row_nnz[i] -= jstart;
898 
899       inout_matrix->icols[i] = (PetscInt*) realloc((void*) inout_matrix->icols[i], inout_matrix->row_nnz[i]*sizeof(PetscInt));
900 
901       if (inout_matrix->values) {
902         inout_matrix->values[i] = (PetscScalar*) realloc((void*) inout_matrix->values[i], inout_matrix->row_nnz[i]*sizeof(PetscScalar));
903       }
904       inout_matrix->nnz -= jstart;
905     }
906   }
907   PetscFunctionReturn(0);
908 }
909 
910