xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 9ccc4a9b96fc3663f3953ac586c9c547e88efdff)
1 #include "petsc.h"
2 #include "../src/mat/impls/aij/seq/aij.h"
3 #include "spbas.h"
4 
5 
6 /*
7   spbas_memory_requirement:
8     Calculate the number of bytes needed to store tha matrix
9 */
10 #undef __FUNCT__
11 #define __FUNCT__ "spbas_memory_requirement"
12 long int spbas_memory_requirement( spbas_matrix matrix)
13 {
14    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
15 						       n_alloc_val, col_idx_type */
16      sizeof(PetscTruth)                 + /* block_data */
17      sizeof(PetscScalar**)        + /* values */
18      sizeof(PetscScalar*)         + /* alloc_val */
19      2 * sizeof(int**)            + /* icols, icols0 */
20      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
21      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
22      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
23 
24    /* icol0[*] */
25    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY)
26       { memreq += matrix.nrows * sizeof(PetscInt); }
27 
28    /* icols[*][*] */
29    if (matrix.block_data)
30       { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
31    else
32       { memreq += matrix.nnz * sizeof(PetscInt); }
33 
34    if (matrix.values)
35    {
36      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
37      /* values[*][*] */
38        if (matrix.block_data)
39           { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
40        else
41           { memreq += matrix.nnz * sizeof(PetscScalar); }
42    }
43 
44    return memreq;
45 }
46 
47 /*
48   spbas_allocate_pattern:
49     allocate the pattern arrays row_nnz, icols and optionally values
50 */
51 #undef __FUNCT__
52 #define __FUNCT__ "spbas_allocate_pattern"
53 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values)
54 {
55    PetscErrorCode ierr;
56    PetscInt nrows = result->nrows;
57    PetscInt col_idx_type = result->col_idx_type;
58 
59    PetscFunctionBegin;
60    /* Allocate sparseness pattern */
61    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
62    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
63 
64    /* If offsets are given wrt an array, create array */
65    if (col_idx_type == SPBAS_OFFSET_ARRAY)
66    {
67       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
68    }
69    else
70    {
71       result->icol0 = PETSC_NULL;
72    }
73 
74    /* If values are given, allocate values array */
75    if (do_values)
76    {
77       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
78    }
79    else
80    {
81       result->values = PETSC_NULL;
82    }
83 
84    PetscFunctionReturn(0);
85 }
86 
87 /*
88 spbas_allocate_data:
89    in case of block_data:
90        Allocate the data arrays alloc_icol and optionally alloc_val,
91        set appropriate pointers from icols and values;
92    in case of !block_data:
93        Allocate the arrays icols[i] and optionally values[i]
94 */
95 #undef __FUNCT__
96 #define __FUNCT__ "spbas_allocate_data"
97 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
98 {
99    PetscInt i;
100    PetscInt nnz   = result->nnz;
101    PetscInt nrows = result->nrows;
102    PetscInt r_nnz;
103    PetscErrorCode ierr;
104    PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
105    PetscTruth block_data = result->block_data;
106 
107    PetscFunctionBegin;
108    if (block_data)
109    {
110      /* Allocate the column number array and point to it */
111       result->n_alloc_icol = nnz;
112       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
113       result->icols[0]= result->alloc_icol;
114       for (i=1; i<nrows; i++)
115       {
116           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
117       }
118 
119       /* Allocate the value array and point to it */
120       if (do_values)
121       {
122          result->n_alloc_val= nnz;
123          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
124          result->values[0]= result->alloc_val;
125          for (i=1; i<nrows; i++)
126          {
127              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
128          }
129       }
130    }
131    else
132    {
133       for (i=0; i<nrows; i++)
134       {
135          r_nnz = result->row_nnz[i];
136          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
137       }
138       if (do_values)
139       {
140          for (i=0; i<nrows; i++)
141          {
142             r_nnz = result->row_nnz[i];
143             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar),
144                                &result->values[i]);CHKERRQ(ierr);
145          }
146       }
147    }
148 
149    PetscFunctionReturn(0);
150 }
151 
152 /*
153   spbas_row_order_icol
154      determine if row i1 should come
155        + before row i2 in the sorted rows (return -1),
156        + after (return 1)
157        + is identical (return 0).
158 */
159 #undef __FUNCT__
160 #define __FUNCT__ "spbas_row_order_icol"
161 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
162 {
163    PetscInt j;
164    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
165    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
166 
167    PetscInt * icol1 = &icol_in[irow_in[i1]];
168    PetscInt * icol2 = &icol_in[irow_in[i2]];
169 
170    if (nnz1<nnz2) {return -1;}
171    if (nnz1>nnz2) {return 1;}
172 
173    if (col_idx_type == SPBAS_COLUMN_NUMBERS)
174    {
175       for (j=0; j<nnz1; j++)
176       {
177          if (icol1[j]< icol2[j]) {return -1;}
178          if (icol1[j]> icol2[j]) {return 1;}
179       }
180    }
181    else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
182    {
183       for (j=0; j<nnz1; j++)
184       {
185          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
186          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
187       }
188    }
189    else if (col_idx_type == SPBAS_OFFSET_ARRAY)
190    {
191       for (j=1; j<nnz1; j++)
192       {
193          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
194          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
195       }
196    }
197    return 0;
198 }
199 
200 /*
201   spbas_mergesort_icols:
202     return a sorting of the rows in which identical sparseness patterns are
203     next to each other
204 */
205 #undef __FUNCT__
206 #define __FUNCT__ "spbas_mergesort_icols"
207 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
208 {
209    PetscErrorCode ierr;
210    PetscInt istep;       /* Chunk-sizes of already sorted parts of arrays */
211 
212    PetscInt i, i1, i2;   /* Loop counters for (partly) sorted arrays */
213 
214    PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
215 
216    PetscInt *ialloc;     /* Allocated arrays */
217    PetscInt *iswap;      /* auxiliary pointers for swapping */
218    PetscInt *ihlp1;      /* Pointers to new version of arrays, */
219    PetscInt *ihlp2;      /* Pointers to previous version of arrays, */
220 
221    PetscFunctionBegin;
222 
223    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
224 
225    ihlp1 = ialloc;
226    ihlp2 = isort;
227 
228    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
229    for (istep=1; istep<nrows; istep*=2)
230    {
231      /*
232        Combine sorted parts
233             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
234        of ihlp2 and vhlp2
235 
236        into one sorted part
237             istart:istart+2*istep-1
238        of ihlp1 and vhlp1
239      */
240       for (istart=0; istart<nrows; istart+=2*istep)
241       {
242 
243 	/* Set counters and bound array part endings */
244          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
245          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
246 
247          /* Merge the two array parts */
248          for (i=istart; i<i2end; i++)
249          {
250             if (i1<i1end && i2<i2end &&
251                 spbas_row_order_icol( ihlp2[i1], ihlp2[i2],
252                              irow_in, icol_in, col_idx_type) < 0)
253             {
254                 ihlp1[i] = ihlp2[i1];
255                 i1++;
256             }
257             else if (i2<i2end )
258             {
259                 ihlp1[i] = ihlp2[i2];
260                 i2++;
261             }
262             else
263             {
264                 ihlp1[i] = ihlp2[i1];
265                 i1++;
266             }
267          }
268       }
269 
270       /* Swap the two array sets */
271       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
272    }
273 
274    /* Copy one more time in case the sorted arrays are the temporary ones */
275    if (ihlp2 != isort)
276    {
277       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
278    }
279 
280    ierr = PetscFree(ialloc);CHKERRQ(ierr);
281    PetscFunctionReturn(0);
282 }
283 
284 
285 
286 /*
287   spbas_compress_pattern:
288      calculate a compressed sparseness pattern for a sparseness pattern
289      given in compressed row storage. The compressed sparseness pattern may
290      require (much) less memory.
291 */
292 #undef __FUNCT__
293 #define __FUNCT__ "spbas_compress_pattern"
294 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols,
295 				      PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
296 {
297    PetscInt nnz = irow_in[nrows];
298    long int mem_orig = (nrows + nnz) * sizeof(PetscInt);
299    long int mem_compressed;
300    PetscErrorCode ierr;
301    PetscInt *isort;
302    PetscInt *icols;
303    PetscInt row_nnz;
304    PetscInt *ipoint;
305    PetscTruth *used;
306    PetscInt ptr;
307    PetscInt i,j;
308 
309    const PetscTruth no_values = PETSC_FALSE;
310 
311    PetscFunctionBegin;
312 
313    /* Allocate the structure of the new matrix */
314    B->nrows = nrows;
315    B->ncols = ncols;
316    B->nnz   = nnz;
317    B->col_idx_type= col_idx_type;
318    B->block_data = PETSC_TRUE;
319    ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr);
320 
321    /* When using an offset array, set it */
322    if (col_idx_type==SPBAS_OFFSET_ARRAY)
323    {
324       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
325    }
326 
327    /* Allocate the ordering for the rows */
328    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
329    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
330    ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr);
331 
332    /*  Initialize the sorting */
333    memset((void*) used, 0, nrows*sizeof(PetscTruth));
334    for (i = 0; i<nrows; i++)
335    {
336       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
337       isort[i] = i;
338       ipoint[i]= i;
339    }
340 
341    /* Sort the rows so that identical columns will be next to each other */
342    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
343    ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
344 
345    /* Replace identical rows with the first one in the list */
346    for (i=1; i<nrows; i++)
347    {
348       if (spbas_row_order_icol(isort[i-1], isort[i],
349                       irow_in, icol_in, col_idx_type) == 0)
350       {
351          ipoint[isort[i]] = ipoint[isort[i-1]];
352       }
353    }
354 
355    /* Collect the rows which are used*/
356    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
357 
358    /* Calculate needed memory */
359    B->n_alloc_icol = 0;
360    for (i=0; i<nrows; i++)
361    {
362       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
363    }
364 
365    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
366 
367    /* Fill in the diagonal offsets for the rows which store their own data */
368    ptr = 0;
369    for (i=0; i<B->nrows; i++)
370    {
371       if (used[i])
372       {
373          B->icols[i] = &B->alloc_icol[ptr];
374          icols = &icol_in[irow_in[i]];
375          row_nnz = B->row_nnz[i];
376          if (col_idx_type == SPBAS_COLUMN_NUMBERS)
377          {
378             for (j=0; j<row_nnz; j++)
379             {
380                B->icols[i][j] = icols[j];
381             }
382          }
383          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
384          {
385             for (j=0; j<row_nnz; j++)
386             {
387                B->icols[i][j] = icols[j]-i;
388             }
389          }
390          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
391          {
392             for (j=0; j<row_nnz; j++)
393             {
394                B->icols[i][j] = icols[j]-icols[0];
395             }
396          }
397          ptr += B->row_nnz[i];
398       }
399    }
400 
401    /* Point to the right places for all data */
402    for (i=0; i<nrows; i++)
403    {
404       B->icols[i] = B->icols[ipoint[i]];
405    }
406    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
407    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
408 
409    ierr=PetscFree(isort);CHKERRQ(ierr);
410    ierr=PetscFree(used);CHKERRQ(ierr);
411    ierr=PetscFree(ipoint);CHKERRQ(ierr);
412 
413    mem_compressed = spbas_memory_requirement( *B );
414    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/
415                             (PetscReal) mem_orig;
416    PetscFunctionReturn(0);
417 }
418 
419 /*
420    spbas_incomplete_cholesky
421        Incomplete Cholesky decomposition
422 */
423 #include "spbas_cholesky.h"
424 
425 /*
426   spbas_delete : de-allocate the arrays owned by this matrix
427 */
428 #undef __FUNCT__
429 #define __FUNCT__ "spbas_delete"
430 PetscErrorCode spbas_delete(spbas_matrix matrix)
431 {
432    PetscInt       i;
433    PetscErrorCode ierr;
434 
435    PetscFunctionBegin;
436    if (matrix.block_data) {
437       ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr)
438       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
439    } else  {
440      for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
441      ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
442      if (matrix.values) {
443          for (i=0; i<matrix.nrows; i++) { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
444       }
445    }
446 
447    ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
448    ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
449    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY) {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
450    if (matrix.values)  {ierr=PetscFree(matrix.values);CHKERRQ(ierr);}
451    PetscFunctionReturn(0);
452 }
453 
454 /*
455 spbas_matrix_to_crs:
456    Convert an spbas_matrix to compessed row storage
457 */
458 #undef __FUNCT__
459 #define __FUNCT__ "spbas_matrix_to_crs"
460 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
461 {
462    PetscInt       nrows = matrix_A.nrows;
463    PetscInt       nnz   = matrix_A.nnz;
464    PetscInt       i,j,r_nnz,i0;
465    PetscInt       *irow;
466    PetscInt       *icol;
467    PetscInt       *icol_A;
468    MatScalar      *val;
469    PetscScalar    *val_A;
470    PetscInt       col_idx_type = matrix_A.col_idx_type;
471    PetscTruth     do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
472    PetscErrorCode ierr;
473 
474    PetscFunctionBegin;
475    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
476    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
477    *icol_out = icol;
478    *irow_out=irow;
479    if (do_values)  {
480       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
481       *val_out = val; *icol_out = icol; *irow_out=irow;
482    }
483 
484    irow[0]=0;
485    for (i=0; i<nrows; i++) {
486       r_nnz = matrix_A.row_nnz[i];
487       i0 = irow[i];
488       irow[i+1] = i0 + r_nnz;
489       icol_A = matrix_A.icols[i];
490 
491       if (do_values) {
492           val_A = matrix_A.values[i];
493           for (j=0; j<r_nnz; j++) {
494              icol[i0+j] = icol_A[j];
495              val[i0+j]  = val_A[j];
496           }
497       } else {
498           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
499       }
500 
501       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {
502          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
503       }
504       else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
505          i0 = matrix_A.icol0[i];
506          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
507       }
508    }
509    PetscFunctionReturn(0);
510 }
511 
512 
513 /*
514     spbas_transpose
515        return the transpose of a matrix
516 */
517 #undef __FUNCT__
518 #define __FUNCT__ "spbas_transpose"
519 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
520 {
521    PetscInt       col_idx_type = in_matrix.col_idx_type;
522    PetscInt       nnz   = in_matrix.nnz;
523    PetscInt       ncols = in_matrix.nrows;
524    PetscInt       nrows = in_matrix.ncols;
525    PetscInt       i,j,k;
526    PetscInt       r_nnz;
527    PetscInt       *irow;
528    PetscInt       icol0;
529    PetscScalar    * val;
530    PetscErrorCode ierr;
531    PetscFunctionBegin;
532 
533    /* Copy input values */
534    result->nrows        = nrows;
535    result->ncols        = ncols;
536    result->nnz          = nnz;
537    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
538    result->block_data   = PETSC_TRUE;
539 
540    /* Allocate sparseness pattern */
541    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
542 
543    /*  Count the number of nonzeros in each row */
544    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
545 
546    for (i=0; i<ncols; i++) {
547       r_nnz = in_matrix.row_nnz[i];
548       irow  = in_matrix.icols[i];
549       if (col_idx_type == SPBAS_COLUMN_NUMBERS)  {
550          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
551       } else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)  {
552          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
553       } else if (col_idx_type == SPBAS_OFFSET_ARRAY) {
554          icol0=in_matrix.icol0[i];
555          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
556       }
557    }
558 
559    /* Set the pointers to the data */
560    ierr = spbas_allocate_data(result);CHKERRQ(ierr);
561 
562    /* Reset the number of nonzeros in each row */
563    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
564 
565    /* Fill the data arrays */
566    if (in_matrix.values) {
567       for (i=0; i<ncols; i++) {
568          r_nnz = in_matrix.row_nnz[i];
569          irow  = in_matrix.icols[i];
570          val   = in_matrix.values[i];
571 
572          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
573          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
574          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
575                  {icol0=in_matrix.icol0[i];}
576          for (j=0; j<r_nnz; j++)  {
577             k = icol0 + irow[j];
578             result->icols[k][result->row_nnz[k]]  = i;
579             result->values[k][result->row_nnz[k]] = val[j];
580             result->row_nnz[k]++;
581          }
582       }
583    }  else {
584       for (i=0; i<ncols; i++) {
585          r_nnz = in_matrix.row_nnz[i];
586          irow  = in_matrix.icols[i];
587 
588          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
589          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
590          else if (col_idx_type == SPBAS_OFFSET_ARRAY)     {icol0=in_matrix.icol0[i];}
591 
592          for (j=0; j<r_nnz; j++) {
593             k = icol0 + irow[j];
594             result->icols[k][result->row_nnz[k]]  = i;
595             result->row_nnz[k]++;
596          }
597       }
598    }
599    PetscFunctionReturn(0);
600 }
601 
602 /*
603    spbas_mergesort
604 
605       mergesort for an array of intergers and an array of associated
606       reals
607 
608       on output, icol[0..nnz-1] is increasing;
609                   val[0..nnz-1] has undergone the same permutation as icol
610 
611       NB: val may be PETSC_NULL: in that case, only the integers are sorted
612 
613 */
614 #undef __FUNCT__
615 #define __FUNCT__ "spbas_mergesort"
616 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
617 {
618   PetscInt       istep;       /* Chunk-sizes of already sorted parts of arrays */
619   PetscInt       i, i1, i2;   /* Loop counters for (partly) sorted arrays */
620   PetscInt       istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
621   PetscInt       *ialloc;     /* Allocated arrays */
622   PetscScalar    *valloc=PETSC_NULL;
623   PetscInt       *iswap;      /* auxiliary pointers for swapping */
624   PetscScalar    *vswap;
625   PetscInt       *ihlp1;      /* Pointers to new version of arrays, */
626   PetscScalar    *vhlp1=PETSC_NULL;  /* (arrays under construction) */
627   PetscInt       *ihlp2;      /* Pointers to previous version of arrays, */
628   PetscScalar    *vhlp2=PETSC_NULL;
629   PetscErrorCode ierr;
630 
631    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
632    ihlp1 = ialloc;
633    ihlp2 = icol;
634 
635    if (val) {
636       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
637       vhlp1 = valloc;
638       vhlp2 = val;
639    }
640 
641 
642    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
643    for (istep=1; istep<nnz; istep*=2) {
644      /*
645        Combine sorted parts
646             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
647        of ihlp2 and vhlp2
648 
649        into one sorted part
650             istart:istart+2*istep-1
651        of ihlp1 and vhlp1
652      */
653       for (istart=0; istart<nnz; istart+=2*istep) {
654 
655 	/* Set counters and bound array part endings */
656          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
657          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
658 
659          /* Merge the two array parts */
660          if (val) {
661             for (i=istart; i<i2end; i++) {
662                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2]) {
663                    ihlp1[i] = ihlp2[i1];
664                    vhlp1[i] = vhlp2[i1];
665                    i1++;
666                } else if (i2<i2end ){
667                    ihlp1[i] = ihlp2[i2];
668                    vhlp1[i] = vhlp2[i2];
669                    i2++;
670                } else {
671                    ihlp1[i] = ihlp2[i1];
672                    vhlp1[i] = vhlp2[i1];
673                    i1++;
674                }
675             }
676          } else {
677             for (i=istart; i<i2end; i++)
678             {
679                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2])
680                {
681                    ihlp1[i] = ihlp2[i1];
682                    i1++;
683                }
684                else if (i2<i2end )
685                {
686                    ihlp1[i] = ihlp2[i2];
687                    i2++;
688                }
689                else
690                {
691                    ihlp1[i] = ihlp2[i1];
692                    i1++;
693                }
694             }
695          }
696       }
697 
698       /* Swap the two array sets */
699       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
700       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
701    }
702 
703    /* Copy one more time in case the sorted arrays are the temporary ones */
704    if (ihlp2 != icol)
705    {
706       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
707       if (val)
708       {
709          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
710       }
711    }
712 
713    ierr = PetscFree(ialloc);CHKERRQ(ierr);
714    if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);}
715    PetscFunctionReturn(0);
716 }
717 
718 /*
719   spbas_apply_reordering_rows:
720     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
721 */
722 #undef __FUNCT__
723 #define __FUNCT__ "spbas_apply_reordering_rows"
724 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
725 {
726    PetscInt i,j,ip;
727    PetscInt nrows=matrix_A->nrows;
728    PetscInt * row_nnz;
729    PetscInt **icols;
730    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
731    PetscScalar **vals=PETSC_NULL;
732    PetscErrorCode ierr;
733 
734    PetscFunctionBegin;
735    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
736    {
737       SETERRQ( PETSC_ERR_SUP_SYS,
738                "must have diagonal offsets in pattern\n");
739    }
740 
741    if (do_values)
742    {
743      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
744    }
745    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
746    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
747 
748    for (i=0; i<nrows;i++)
749    {
750       ip = permutation[i];
751       if (do_values) {vals[i]    = matrix_A->values[ip];}
752       icols[i]   = matrix_A->icols[ip];
753       row_nnz[i] = matrix_A->row_nnz[ip];
754       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
755    }
756 
757    if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
758    ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
759    ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
760 
761    if (do_values) { matrix_A->values  = vals; }
762    matrix_A->icols   = icols;
763    matrix_A->row_nnz = row_nnz;
764 
765    PetscFunctionReturn(0);
766 }
767 
768 
769 /*
770   spbas_apply_reordering_cols:
771     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
772 */
773 #undef __FUNCT__
774 #define __FUNCT__ "spbas_apply_reordering_cols"
775 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
776 {
777    PetscInt i,j;
778    PetscInt nrows=matrix_A->nrows;
779    PetscInt row_nnz;
780    PetscInt *icols;
781    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
782    PetscScalar *vals=PETSC_NULL;
783 
784    PetscErrorCode ierr;
785 
786    PetscFunctionBegin;
787 
788    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
789    {
790       SETERRQ( PETSC_ERR_SUP_SYS,
791                "must have diagonal offsets in pattern\n");
792    }
793 
794    for (i=0; i<nrows;i++)
795    {
796       icols   = matrix_A->icols[i];
797       row_nnz = matrix_A->row_nnz[i];
798       if (do_values) { vals = matrix_A->values[i]; }
799 
800       for (j=0; j<row_nnz; j++)
801       {
802          icols[j] = permutation[i+icols[j]]-i;
803       }
804       ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
805    }
806 
807    PetscFunctionReturn(0);
808 }
809 
810 /*
811   spbas_apply_reordering:
812     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
813 */
814 #undef __FUNCT__
815 #define __FUNCT__ "spbas_apply_reordering"
816 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
817 {
818    PetscErrorCode ierr;
819    PetscFunctionBegin;
820    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr);
821    ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr);
822    PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "spbas_pattern_only"
827 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
828 {
829    spbas_matrix retval;
830    PetscInt i, j, i0, r_nnz;
831    PetscErrorCode ierr;
832 
833    PetscFunctionBegin;
834 
835    /* Copy input values */
836    retval.nrows = nrows;
837    retval.ncols = ncols;
838    retval.nnz   = ai[nrows];
839 
840    retval.block_data   = PETSC_TRUE;
841    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
842 
843    /* Allocate output matrix */
844    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
845    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
846    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
847    /* Copy the structure */
848    for (i = 0; i<retval.nrows; i++)
849    {
850       i0 = ai[i];
851       r_nnz = ai[i+1]-i0;
852 
853       for (j=0; j<r_nnz; j++)
854       {
855          retval.icols[i][j]   = aj[i0+j]-i;
856       }
857    }
858 
859    *result = retval;
860    PetscFunctionReturn(0);
861 }
862 
863 
864 /*
865    spbas_mark_row_power:
866       Mark the columns in row 'row' which are nonzero in
867           matrix^2log(marker).
868 */
869 #undef __FUNCT__
870 #define __FUNCT__ "spbas_mark_row_power"
871 PetscErrorCode spbas_mark_row_power(
872 				    PetscInt *iwork,             /* marker-vector */
873 				    PetscInt row,                /* row for which the columns are marked */
874 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
875 				    PetscInt marker,             /* marker-value: 2^power */
876 				    PetscInt minmrk,            /* lower bound for marked points */
877 				    PetscInt maxmrk)            /* upper bound for marked points */
878 {
879    PetscErrorCode ierr;
880    PetscInt i,j, nnz;
881 
882    PetscFunctionBegin;
883    nnz = in_matrix->row_nnz[row];
884 
885    /* For higher powers, call this function recursively */
886    if (marker>1)
887    {
888       for (i=0; i<nnz;i++)
889       {
890          j = row + in_matrix->icols[row][i];
891          if (minmrk<=j && j<maxmrk && iwork[j] < marker )
892          {
893             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
894             iwork[j] |= marker;
895          }
896       }
897    }
898    else
899    {
900      /*  Mark the columns reached */
901       for (i=0; i<nnz;i++)
902       {
903          j = row + in_matrix->icols[row][i];
904          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
905       }
906    }
907 
908    PetscFunctionReturn(0);
909 }
910 
911 
912 /*
913    spbas_power
914       Calculate sparseness patterns for incomplete Cholesky decompositions
915       of a given order: (almost) all nonzeros of the matrix^(order+1) which
916       are inside the band width are found and stored in the output sparseness
917       pattern.
918 */
919 #undef __FUNCT__
920 #define __FUNCT__ "spbas_power"
921 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
922 {
923    spbas_matrix retval;
924    PetscInt nrows = in_matrix.nrows;
925    PetscInt ncols = in_matrix.ncols;
926    PetscInt i, j, kend;
927    PetscInt nnz, inz;
928    PetscInt *iwork;
929    PetscInt marker;
930    PetscInt maxmrk=0;
931    PetscErrorCode ierr;
932 
933    PetscFunctionBegin;
934    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
935    {
936       SETERRQ( PETSC_ERR_SUP_SYS,
937                "must have diagonal offsets in pattern\n");
938    }
939 
940    if (ncols != nrows)
941    {
942       SETERRQ( PETSC_ERR_ARG_INCOMP,
943                "Dimension error\n");
944    }
945 
946    if (in_matrix.values)
947    {
948       SETERRQ( PETSC_ERR_ARG_INCOMP,
949                "Input array must be sparseness pattern (no values)");
950    }
951 
952    if (power<=0)
953    {
954       SETERRQ( PETSC_ERR_SUP_SYS,
955                "Power must be 1 or up");
956    }
957 
958    /* Copy input values*/
959    retval.nrows = ncols;
960    retval.ncols = nrows;
961    retval.nnz   = 0;
962    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
963    retval.block_data = PETSC_FALSE;
964 
965    /* Allocate sparseness pattern */
966    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
967 
968    /* Allocate marker array */
969    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
970 
971    /* Erase the pattern for this row */
972    memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt));
973 
974    /* Calculate marker values */
975    marker = 1; for (i=1; i<power; i++) {marker*=2;}
976 
977    for (i=0; i<nrows; i++)
978    {
979      /* Calculate the pattern for each row */
980 
981       nnz    = in_matrix.row_nnz[i];
982       kend   = i+in_matrix.icols[i][nnz-1];
983       if (maxmrk<=kend) {maxmrk=kend+1;}
984       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
985 
986       /* Count the columns*/
987       nnz = 0;
988       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
989 
990       /* Allocate the column indices */
991       retval.row_nnz[i] = nnz;
992       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
993 
994       /* Administrate the column indices */
995       inz = 0;
996       for (j=i; j<maxmrk; j++)
997       {
998          if (iwork[j])
999          {
1000             retval.icols[i][inz] = j-i;
1001             inz++;
1002             iwork[j]=0;
1003          }
1004       }
1005       retval.nnz += nnz;
1006    };
1007 
1008    ierr = PetscFree(iwork);CHKERRQ(ierr);
1009 
1010    *result = retval;
1011    PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 /*
1017    spbas_keep_upper:
1018       remove the lower part of the matrix: keep the upper part
1019 */
1020 #undef __FUNCT__
1021 #define __FUNCT__ "spbas_keep_upper"
1022 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
1023 {
1024    PetscInt i, j;
1025    PetscInt jstart;
1026 
1027    PetscFunctionBegin;
1028    if (inout_matrix->block_data)
1029    {
1030       SETERRQ( PETSC_ERR_SUP_SYS,
1031                "Not yet for block data matrices\n");
1032    }
1033 
1034    for (i=0; i<inout_matrix->nrows; i++)
1035    {
1036        for (jstart=0;
1037             (jstart<inout_matrix->row_nnz[i]) &&
1038             (inout_matrix->icols[i][jstart]<0); jstart++){}
1039 
1040        if (jstart>0)
1041        {
1042           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1043           {
1044              inout_matrix->icols[i][j] =
1045                  inout_matrix->icols[i][j+jstart];
1046           }
1047 
1048           if (inout_matrix->values)
1049           {
1050              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1051              {
1052                 inout_matrix->values[i][j] =
1053                     inout_matrix->values[i][j+jstart];
1054              }
1055           }
1056 
1057           inout_matrix->row_nnz[i] -= jstart;
1058 
1059           inout_matrix->icols[i] = (PetscInt*) realloc(
1060               (void*) inout_matrix->icols[i],
1061               inout_matrix->row_nnz[i]*sizeof(PetscInt));
1062 
1063           if (inout_matrix->values)
1064           {
1065              inout_matrix->values[i] = (PetscScalar*) realloc(
1066                  (void*) inout_matrix->values[i],
1067                  inout_matrix->row_nnz[i]*sizeof(PetscScalar));
1068           }
1069           inout_matrix->nnz -= jstart;
1070        }
1071    }
1072    PetscFunctionReturn(0);
1073 }
1074 
1075