xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision c328eead8cc18b10fedd0a2df62d438138cde9df)
1 #include "petsc.h"
2 #include "../src/mat/impls/aij/seq/aij.h"
3 #include "spbas.h"
4 
5 
6 
7 
8 /*
9   spbas_memory_requirement:
10     Calculate the number of bytes needed to store tha matrix
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "spbas_memory_requirement"
14 long int spbas_memory_requirement( spbas_matrix matrix)
15 {
16    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
17 						       n_alloc_val, col_idx_type */
18      sizeof(PetscTruth)                 + /* block_data */
19      sizeof(PetscScalar**)        + /* values */
20      sizeof(PetscScalar*)         + /* alloc_val */
21      2 * sizeof(int**)            + /* icols, icols0 */
22      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
23      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
24      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
25 
26    /* icol0[*] */
27    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY)
28       { memreq += matrix.nrows * sizeof(PetscInt); }
29 
30    /* icols[*][*] */
31    if (matrix.block_data)
32       { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
33    else
34       { memreq += matrix.nnz * sizeof(PetscInt); }
35 
36    if (matrix.values)
37    {
38      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
39      /* values[*][*] */
40        if (matrix.block_data)
41           { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
42        else
43           { memreq += matrix.nnz * sizeof(PetscScalar); }
44    }
45 
46    return memreq;
47 }
48 
49 /*
50   spbas_allocate_pattern:
51     allocate the pattern arrays row_nnz, icols and optionally values
52 */
53 #undef __FUNCT__
54 #define __FUNCT__ "spbas_allocate_pattern"
55 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values)
56 {
57    PetscErrorCode ierr;
58    PetscInt nrows = result->nrows;
59    PetscInt col_idx_type = result->col_idx_type;
60 
61    PetscFunctionBegin;
62    /* Allocate sparseness pattern */
63    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
64    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
65 
66    /* If offsets are given wrt an array, create array */
67    if (col_idx_type == SPBAS_OFFSET_ARRAY)
68    {
69       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
70    }
71    else
72    {
73       result->icol0 = PETSC_NULL;
74    }
75 
76    /* If values are given, allocate values array */
77    if (do_values)
78    {
79       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
80    }
81    else
82    {
83       result->values = PETSC_NULL;
84    }
85 
86    PetscFunctionReturn(0);
87 }
88 
89 /*
90 spbas_allocate_data:
91    in case of block_data:
92        Allocate the data arrays alloc_icol and optionally alloc_val,
93        set appropriate pointers from icols and values;
94    in case of !block_data:
95        Allocate the arrays icols[i] and optionally values[i]
96 */
97 #undef __FUNCT__
98 #define __FUNCT__ "spbas_allocate_data"
99 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
100 {
101    PetscInt i;
102    PetscInt nnz   = result->nnz;
103    PetscInt nrows = result->nrows;
104    PetscInt r_nnz;
105    PetscErrorCode ierr;
106    PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
107    PetscTruth block_data = result->block_data;
108 
109    PetscFunctionBegin;
110    if (block_data)
111    {
112      /* Allocate the column number array and point to it */
113       result->n_alloc_icol = nnz;
114       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
115       result->icols[0]= result->alloc_icol;
116       for (i=1; i<nrows; i++)
117       {
118           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
119       }
120 
121       /* Allocate the value array and point to it */
122       if (do_values)
123       {
124          result->n_alloc_val= nnz;
125          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
126          result->values[0]= result->alloc_val;
127          for (i=1; i<nrows; i++)
128          {
129              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
130          }
131       }
132    }
133    else
134    {
135       for (i=0; i<nrows; i++)
136       {
137          r_nnz = result->row_nnz[i];
138          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
139       }
140       if (do_values)
141       {
142          for (i=0; i<nrows; i++)
143          {
144             r_nnz = result->row_nnz[i];
145             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar),
146                                &result->values[i]);CHKERRQ(ierr);
147          }
148       }
149    }
150 
151    PetscFunctionReturn(0);
152 }
153 
154 /*
155   spbas_row_order_icol
156      determine if row i1 should come
157        + before row i2 in the sorted rows (return -1),
158        + after (return 1)
159        + is identical (return 0).
160 */
161 #undef __FUNCT__
162 #define __FUNCT__ "spbas_row_order_icol"
163 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
164 {
165    PetscInt j;
166    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
167    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
168 
169    PetscInt * icol1 = &icol_in[irow_in[i1]];
170    PetscInt * icol2 = &icol_in[irow_in[i2]];
171 
172    if (nnz1<nnz2) {return -1;}
173    if (nnz1>nnz2) {return 1;}
174 
175    if (col_idx_type == SPBAS_COLUMN_NUMBERS)
176    {
177       for (j=0; j<nnz1; j++)
178       {
179          if (icol1[j]< icol2[j]) {return -1;}
180          if (icol1[j]> icol2[j]) {return 1;}
181       }
182    }
183    else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
184    {
185       for (j=0; j<nnz1; j++)
186       {
187          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
188          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
189       }
190    }
191    else if (col_idx_type == SPBAS_OFFSET_ARRAY)
192    {
193       for (j=1; j<nnz1; j++)
194       {
195          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
196          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
197       }
198    }
199    return 0;
200 }
201 
202 /*
203   spbas_mergesort_icols:
204     return a sorting of the rows in which identical sparseness patterns are
205     next to each other
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "spbas_mergesort_icols"
209 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
210 {
211    PetscErrorCode ierr;
212    PetscInt istep;       /* Chunk-sizes of already sorted parts of arrays */
213 
214    PetscInt i, i1, i2;   /* Loop counters for (partly) sorted arrays */
215 
216    PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
217 
218    PetscInt *ialloc;     /* Allocated arrays */
219    PetscInt *iswap;      /* auxiliary pointers for swapping */
220    PetscInt *ihlp1;      /* Pointers to new version of arrays, */
221    PetscInt *ihlp2;      /* Pointers to previous version of arrays, */
222 
223    PetscFunctionBegin;
224 
225    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
226 
227    ihlp1 = ialloc;
228    ihlp2 = isort;
229 
230    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
231    for (istep=1; istep<nrows; istep*=2)
232    {
233      /*
234        Combine sorted parts
235             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
236        of ihlp2 and vhlp2
237 
238        into one sorted part
239             istart:istart+2*istep-1
240        of ihlp1 and vhlp1
241      */
242       for (istart=0; istart<nrows; istart+=2*istep)
243       {
244 
245 	/* Set counters and bound array part endings */
246          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
247          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
248 
249          /* Merge the two array parts */
250          for (i=istart; i<i2end; i++)
251          {
252             if (i1<i1end && i2<i2end &&
253                 spbas_row_order_icol( ihlp2[i1], ihlp2[i2],
254                              irow_in, icol_in, col_idx_type) < 0)
255             {
256                 ihlp1[i] = ihlp2[i1];
257                 i1++;
258             }
259             else if (i2<i2end )
260             {
261                 ihlp1[i] = ihlp2[i2];
262                 i2++;
263             }
264             else
265             {
266                 ihlp1[i] = ihlp2[i1];
267                 i1++;
268             }
269          }
270       }
271 
272       /* Swap the two array sets */
273       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
274    }
275 
276    /* Copy one more time in case the sorted arrays are the temporary ones */
277    if (ihlp2 != isort)
278    {
279       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
280    }
281 
282    ierr = PetscFree(ialloc); CHKERRQ(ierr);
283    PetscFunctionReturn(0);
284 }
285 
286 
287 
288 /*
289   spbas_compress_pattern:
290      calculate a compressed sparseness pattern for a sparseness pattern
291      given in compressed row storage. The compressed sparseness pattern may
292      require (much) less memory.
293 */
294 #undef __FUNCT__
295 #define __FUNCT__ "spbas_compress_pattern"
296 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols,
297 				      PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
298 {
299    PetscInt nnz = irow_in[nrows];
300    long int mem_orig = (nrows + nnz) * sizeof(PetscInt);
301    long int mem_compressed;
302    PetscErrorCode ierr;
303    PetscInt *isort;
304    PetscInt *icols;
305    PetscInt row_nnz;
306    PetscInt *ipoint;
307    PetscTruth *used;
308    PetscInt ptr;
309    PetscInt i,j;
310 
311    const PetscTruth no_values = PETSC_FALSE;
312 
313    PetscFunctionBegin;
314 
315    /* Allocate the structure of the new matrix */
316    B->nrows = nrows;
317    B->ncols = ncols;
318    B->nnz   = nnz;
319    B->col_idx_type= col_idx_type;
320    B->block_data = PETSC_TRUE;
321    ierr = spbas_allocate_pattern( B, no_values); CHKERRQ(ierr);
322 
323    /* When using an offset array, set it */
324    if (col_idx_type==SPBAS_OFFSET_ARRAY)
325    {
326       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
327    }
328 
329    /* Allocate the ordering for the rows */
330    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort); CHKERRQ(ierr);
331    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint); CHKERRQ(ierr);
332    ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used); CHKERRQ(ierr);
333 
334    /*  Initialize the sorting */
335    memset((void*) used, 0, nrows*sizeof(PetscTruth));
336    for (i = 0; i<nrows; i++)
337    {
338       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
339       isort[i] = i;
340       ipoint[i]= i;
341    }
342 
343    /* Sort the rows so that identical columns will be next to each other */
344    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort); CHKERRQ(ierr);
345    //   ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
346 
347    /* Replace identical rows with the first one in the list */
348    for (i=1; i<nrows; i++)
349    {
350       if (spbas_row_order_icol(isort[i-1], isort[i],
351                       irow_in, icol_in, col_idx_type) == 0)
352       {
353          ipoint[isort[i]] = ipoint[isort[i-1]];
354       }
355    }
356 
357    /* Collect the rows which are used*/
358    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
359 
360    /* Calculate needed memory */
361    B->n_alloc_icol = 0;
362    for (i=0; i<nrows; i++)
363    {
364       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
365    }
366 
367    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);
368    CHKERRQ(ierr);
369 
370    /* Fill in the diagonal offsets for the rows which store their own data */
371    ptr = 0;
372    for (i=0; i<B->nrows; i++)
373    {
374       if (used[i])
375       {
376          B->icols[i] = &B->alloc_icol[ptr];
377          icols = &icol_in[irow_in[i]];
378          row_nnz = B->row_nnz[i];
379          if (col_idx_type == SPBAS_COLUMN_NUMBERS)
380          {
381             for (j=0; j<row_nnz; j++)
382             {
383                B->icols[i][j] = icols[j];
384             }
385          }
386          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
387          {
388             for (j=0; j<row_nnz; j++)
389             {
390                B->icols[i][j] = icols[j]-i;
391             }
392          }
393          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
394          {
395             for (j=0; j<row_nnz; j++)
396             {
397                B->icols[i][j] = icols[j]-icols[0];
398             }
399          }
400          ptr += B->row_nnz[i];
401       }
402    }
403 
404    /* Point to the right places for all data */
405    for (i=0; i<nrows; i++)
406    {
407       B->icols[i] = B->icols[ipoint[i]];
408    }
409    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
410    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
411 
412    ierr=PetscFree(isort);   CHKERRQ(ierr);
413    ierr=PetscFree(used);    CHKERRQ(ierr);
414    ierr=PetscFree(ipoint);  CHKERRQ(ierr);
415 
416    mem_compressed = spbas_memory_requirement( *B );
417    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/
418                             (PetscReal) mem_orig;
419    PetscFunctionReturn(0);
420 }
421 
422 /*
423    spbas_incomplete_cholesky
424        Incomplete Cholesky decomposition
425 */
426 #include "spbas_cholesky.h"
427 
428 /*
429   spbas_delete : de-allocate the arrays owned by this matrix
430 */
431 #undef __FUNCT__
432 #define __FUNCT__ "spbas_delete"
433 PetscErrorCode spbas_delete(spbas_matrix matrix)
434 {
435    PetscInt i;
436    PetscErrorCode ierr;
437    PetscFunctionBegin;
438    if (matrix.block_data)
439    {
440       ierr=PetscFree(matrix.alloc_icol); CHKERRQ(ierr)
441       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
442    }
443    else
444    {
445       for (i=0; i<matrix.nrows; i++)
446 	{ ierr=PetscFree(matrix.icols[i]); CHKERRQ(ierr);}
447       ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
448       if (matrix.values)
449       {
450          for (i=0; i<matrix.nrows; i++)
451           { ierr=PetscFree(matrix.values[i]); CHKERRQ(ierr);}
452       }
453    }
454 
455    ierr=PetscFree(matrix.row_nnz); CHKERRQ(ierr);
456    ierr=PetscFree(matrix.icols); CHKERRQ(ierr);
457    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY)
458       {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
459    if (matrix.values)
460       {ierr=PetscFree(matrix.values);CHKERRQ(ierr);}
461 
462    PetscFunctionReturn(0);
463 }
464 
465 /*
466 spbas_matrix_to_crs:
467    Convert an spbas_matrix to compessed row storage
468 */
469 #undef __FUNCT__
470 #define __FUNCT__ "spbas_matrix_to_crs"
471 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
472 {
473    PetscInt nrows = matrix_A.nrows;
474    PetscInt nnz   = matrix_A.nnz;
475    PetscInt i,j,r_nnz,i0;
476    PetscInt *irow;
477    PetscInt *icol;
478    PetscInt *icol_A;
479    MatScalar *val;
480    PetscScalar *val_A;
481    PetscInt col_idx_type = matrix_A.col_idx_type;
482    PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
483    PetscErrorCode ierr;
484 
485    PetscFunctionBegin;
486    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow); CHKERRQ(ierr);
487    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol); CHKERRQ(ierr);
488    *icol_out = icol; *irow_out=irow;
489    if (do_values)
490    {
491       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val); CHKERRQ(ierr);
492       *val_out = val; *icol_out = icol; *irow_out=irow;
493    }
494 
495    irow[0]=0;
496    for (i=0; i<nrows; i++)
497    {
498       r_nnz = matrix_A.row_nnz[i];
499       i0 = irow[i];
500       irow[i+1] = i0 + r_nnz;
501       icol_A = matrix_A.icols[i];
502 
503       if (do_values)
504       {
505           val_A = matrix_A.values[i];
506           for (j=0; j<r_nnz; j++)
507           {
508              icol[i0+j] = icol_A[j];
509              val[i0+j]  = val_A[j];
510           }
511       }
512       else
513       {
514           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
515       }
516 
517       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
518       {
519          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
520       }
521       else if (col_idx_type == SPBAS_OFFSET_ARRAY)
522       {
523          i0 = matrix_A.icol0[i];
524          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
525       }
526 
527    }
528 
529    PetscFunctionReturn(0);
530 }
531 
532 
533 /*
534     spbas_transpose
535        return the transpose of a matrix
536 */
537 #undef __FUNCT__
538 #define __FUNCT__ "spbas_transpose"
539 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
540 {
541    PetscInt col_idx_type = in_matrix.col_idx_type;
542    PetscInt nnz   = in_matrix.nnz;
543    PetscInt ncols = in_matrix.nrows;
544    PetscInt nrows = in_matrix.ncols;
545    PetscInt i,j,k;
546    PetscInt r_nnz;
547    PetscInt *irow;
548    PetscInt icol0;
549    PetscScalar * val;
550    PetscErrorCode ierr;
551 
552    PetscFunctionBegin;
553 
554    /* Copy input values */
555    result->nrows        = nrows;
556    result->ncols        = ncols;
557    result->nnz          = nnz;
558    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
559    result->block_data   = PETSC_TRUE;
560 
561    /* Allocate sparseness pattern */
562    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);
563    CHKERRQ(ierr);
564 
565    /*  Count the number of nonzeros in each row */
566    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
567 
568    for (i=0; i<ncols; i++)
569    {
570       r_nnz = in_matrix.row_nnz[i];
571       irow  = in_matrix.icols[i];
572       if (col_idx_type == SPBAS_COLUMN_NUMBERS)
573       {
574          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
575       }
576       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
577       {
578          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
579       }
580       else if (col_idx_type == SPBAS_OFFSET_ARRAY)
581       {
582          icol0=in_matrix.icol0[i];
583          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
584       }
585    }
586 
587    /* Set the pointers to the data */
588    ierr = spbas_allocate_data(result); CHKERRQ(ierr);
589 
590    /* Reset the number of nonzeros in each row */
591    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
592 
593    /* Fill the data arrays */
594    if (in_matrix.values)
595    {
596       for (i=0; i<ncols; i++)
597       {
598          r_nnz = in_matrix.row_nnz[i];
599          irow  = in_matrix.icols[i];
600          val   = in_matrix.values[i];
601 
602          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
603          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
604          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
605                  {icol0=in_matrix.icol0[i];}
606          for (j=0; j<r_nnz; j++)
607          {
608             k = icol0 + irow[j];
609             result->icols[k][result->row_nnz[k]]  = i;
610             result->values[k][result->row_nnz[k]] = val[j];
611             result->row_nnz[k]++;
612          }
613       }
614    }
615    else
616    {
617       for (i=0; i<ncols; i++)
618       {
619          r_nnz = in_matrix.row_nnz[i];
620          irow  = in_matrix.icols[i];
621 
622          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
623          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
624          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
625                  {icol0=in_matrix.icol0[i];}
626 
627          for (j=0; j<r_nnz; j++)
628          {
629             k = icol0 + irow[j];
630             result->icols[k][result->row_nnz[k]]  = i;
631             result->row_nnz[k]++;
632          }
633       }
634    }
635    PetscFunctionReturn(0);
636 }
637 
638 /*
639    spbas_mergesort
640 
641       mergesort for an array of intergers and an array of associated
642       reals
643 
644       on output, icol[0..nnz-1] is increasing;
645                   val[0..nnz-1] has undergone the same permutation as icol
646 
647       NB: val may be PETSC_NULL: in that case, only the integers are sorted
648 
649 */
650 #undef __FUNCT__
651 #define __FUNCT__ "spbas_mergesort"
652 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
653 {
654   PetscInt istep;       /* Chunk-sizes of already sorted parts of arrays */
655   PetscInt i, i1, i2;   /* Loop counters for (partly) sorted arrays */
656   PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
657   PetscInt *ialloc;     /* Allocated arrays */
658    PetscScalar *valloc=PETSC_NULL;
659    PetscInt *iswap;      /* auxiliary pointers for swapping */
660    PetscScalar *vswap;
661    PetscInt *ihlp1;      /* Pointers to new version of arrays, */
662    PetscScalar *vhlp1=PETSC_NULL;  /* (arrays under construction) */
663    PetscInt *ihlp2;      /* Pointers to previous version of arrays, */
664    PetscScalar *vhlp2=PETSC_NULL;
665    PetscErrorCode ierr;
666 
667    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc); CHKERRQ(ierr);
668    ihlp1 = ialloc;
669    ihlp2 = icol;
670 
671    if (val)
672    {
673       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc); CHKERRQ(ierr);
674       vhlp1 = valloc;
675       vhlp2 = val;
676    }
677 
678 
679    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
680    for (istep=1; istep<nnz; istep*=2)
681    {
682      /*
683        Combine sorted parts
684             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
685        of ihlp2 and vhlp2
686 
687        into one sorted part
688             istart:istart+2*istep-1
689        of ihlp1 and vhlp1
690      */
691       for (istart=0; istart<nnz; istart+=2*istep)
692       {
693 
694 	/* Set counters and bound array part endings */
695          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
696          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
697 
698          /* Merge the two array parts */
699          if (val)
700          {
701             for (i=istart; i<i2end; i++)
702             {
703                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2])
704                {
705                    ihlp1[i] = ihlp2[i1];
706                    vhlp1[i] = vhlp2[i1];
707                    i1++;
708                }
709                else if (i2<i2end )
710                {
711                    ihlp1[i] = ihlp2[i2];
712                    vhlp1[i] = vhlp2[i2];
713                    i2++;
714                }
715                else
716                {
717                    ihlp1[i] = ihlp2[i1];
718                    vhlp1[i] = vhlp2[i1];
719                    i1++;
720                }
721             }
722          }
723          else
724          {
725             for (i=istart; i<i2end; i++)
726             {
727                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2])
728                {
729                    ihlp1[i] = ihlp2[i1];
730                    i1++;
731                }
732                else if (i2<i2end )
733                {
734                    ihlp1[i] = ihlp2[i2];
735                    i2++;
736                }
737                else
738                {
739                    ihlp1[i] = ihlp2[i1];
740                    i1++;
741                }
742             }
743          }
744       }
745 
746       /* Swap the two array sets */
747       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
748       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
749    }
750 
751    /* Copy one more time in case the sorted arrays are the temporary ones */
752    if (ihlp2 != icol)
753    {
754       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
755       if (val)
756       {
757          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
758       }
759    }
760 
761    ierr = PetscFree(ialloc); CHKERRQ(ierr);
762    if(val){ierr = PetscFree(valloc); CHKERRQ(ierr);}
763    PetscFunctionReturn(0);
764 }
765 
766 /*
767   spbas_apply_reordering_rows:
768     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
769 */
770 #undef __FUNCT__
771 #define __FUNCT__ "spbas_apply_reordering_rows"
772 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
773 {
774    PetscInt i,j,ip;
775    PetscInt nrows=matrix_A->nrows;
776    PetscInt * row_nnz;
777    PetscInt **icols;
778    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
779    PetscScalar **vals=PETSC_NULL;
780    PetscErrorCode ierr;
781 
782    PetscFunctionBegin;
783    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
784    {
785       SETERRQ( PETSC_ERR_SUP_SYS,
786                "must have diagonal offsets in pattern\n");
787    }
788 
789    if (do_values)
790    {
791      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals); CHKERRQ(ierr);
792    }
793    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);       CHKERRQ(ierr);
794    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);        CHKERRQ(ierr);
795 
796    for (i=0; i<nrows;i++)
797    {
798       ip = permutation[i];
799       if (do_values) {vals[i]    = matrix_A->values[ip];}
800       icols[i]   = matrix_A->icols[ip];
801       row_nnz[i] = matrix_A->row_nnz[ip];
802       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
803    }
804 
805    if (do_values){ ierr = PetscFree(matrix_A->values);  CHKERRQ(ierr);}
806    ierr = PetscFree(matrix_A->icols);   CHKERRQ(ierr);
807    ierr = PetscFree(matrix_A->row_nnz); CHKERRQ(ierr);
808 
809    if (do_values) { matrix_A->values  = vals; }
810    matrix_A->icols   = icols;
811    matrix_A->row_nnz = row_nnz;
812 
813    PetscFunctionReturn(0);
814 }
815 
816 
817 /*
818   spbas_apply_reordering_cols:
819     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
820 */
821 #undef __FUNCT__
822 #define __FUNCT__ "spbas_apply_reordering_cols"
823 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
824 {
825    PetscInt i,j;
826    PetscInt nrows=matrix_A->nrows;
827    PetscInt row_nnz;
828    PetscInt *icols;
829    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
830    PetscScalar *vals=PETSC_NULL;
831 
832    PetscErrorCode ierr;
833 
834    PetscFunctionBegin;
835 
836    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
837    {
838       SETERRQ( PETSC_ERR_SUP_SYS,
839                "must have diagonal offsets in pattern\n");
840    }
841 
842    for (i=0; i<nrows;i++)
843    {
844       icols   = matrix_A->icols[i];
845       row_nnz = matrix_A->row_nnz[i];
846       if (do_values) { vals = matrix_A->values[i]; }
847 
848       for (j=0; j<row_nnz; j++)
849       {
850          icols[j] = permutation[i+icols[j]]-i;
851       }
852       ierr = spbas_mergesort(row_nnz, icols, vals); CHKERRQ(ierr);
853    }
854 
855    PetscFunctionReturn(0);
856 }
857 
858 /*
859   spbas_apply_reordering:
860     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
861 */
862 #undef __FUNCT__
863 #define __FUNCT__ "spbas_apply_reordering"
864 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
865 {
866    PetscErrorCode ierr;
867    PetscFunctionBegin;
868    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm); CHKERRQ(ierr);
869    ierr = spbas_apply_reordering_cols( matrix_A, permutation); CHKERRQ(ierr);
870    PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "spbas_pattern_only"
875 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
876 {
877    spbas_matrix retval;
878    PetscInt i, j, i0, r_nnz;
879    PetscErrorCode ierr;
880 
881    PetscFunctionBegin;
882 
883    /* Copy input values */
884    retval.nrows = nrows;
885    retval.ncols = ncols;
886    retval.nnz   = ai[nrows];
887 
888    retval.block_data   = PETSC_TRUE;
889    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
890 
891    /* Allocate output matrix */
892    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE); CHKERRQ(ierr);
893    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
894    ierr =  spbas_allocate_data(&retval); CHKERRQ(ierr);
895    /* Copy the structure */
896    for (i = 0; i<retval.nrows; i++)
897    {
898       i0 = ai[i];
899       r_nnz = ai[i+1]-i0;
900 
901       for (j=0; j<r_nnz; j++)
902       {
903          retval.icols[i][j]   = aj[i0+j]-i;
904       }
905    }
906 
907    *result = retval;
908    PetscFunctionReturn(0);
909 }
910 
911 
912 /*
913    spbas_mark_row_power:
914       Mark the columns in row 'row' which are nonzero in
915           matrix^2log(marker).
916 */
917 #undef __FUNCT__
918 #define __FUNCT__ "spbas_mark_row_power"
919 PetscErrorCode spbas_mark_row_power(
920 				    PetscInt *iwork,             /* marker-vector */
921 				    PetscInt row,                /* row for which the columns are marked */
922 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
923 				    PetscInt marker,             /* marker-value: 2^power */
924 				    PetscInt minmrk,            /* lower bound for marked points */
925 				    PetscInt maxmrk)            /* upper bound for marked points */
926 {
927    PetscErrorCode ierr;
928    PetscInt i,j, nnz;
929 
930    PetscFunctionBegin;
931    nnz = in_matrix->row_nnz[row];
932 
933    /* For higher powers, call this function recursively */
934    if (marker>1)
935    {
936       for (i=0; i<nnz;i++)
937       {
938          j = row + in_matrix->icols[row][i];
939          if (minmrk<=j && j<maxmrk && iwork[j] < marker )
940          {
941             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],
942                                         in_matrix, marker/2,minmrk,maxmrk);
943             CHKERRQ(ierr);
944             iwork[j] |= marker;
945          }
946       }
947    }
948    else
949    {
950      /*  Mark the columns reached */
951       for (i=0; i<nnz;i++)
952       {
953          j = row + in_matrix->icols[row][i];
954          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
955       }
956    }
957 
958    PetscFunctionReturn(0);
959 }
960 
961 
962 /*
963    spbas_power
964       Calculate sparseness patterns for incomplete Cholesky decompositions
965       of a given order: (almost) all nonzeros of the matrix^(order+1) which
966       are inside the band width are found and stored in the output sparseness
967       pattern.
968 */
969 #undef __FUNCT__
970 #define __FUNCT__ "spbas_power"
971 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
972 {
973    spbas_matrix retval;
974    PetscInt nrows = in_matrix.nrows;
975    PetscInt ncols = in_matrix.ncols;
976    PetscInt i, j, kend;
977    PetscInt nnz, inz;
978    PetscInt *iwork;
979    PetscInt marker;
980    PetscInt maxmrk=0;
981    PetscErrorCode ierr;
982 
983    PetscFunctionBegin;
984    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
985    {
986       SETERRQ( PETSC_ERR_SUP_SYS,
987                "must have diagonal offsets in pattern\n");
988    }
989 
990    if (ncols != nrows)
991    {
992       SETERRQ( PETSC_ERR_ARG_INCOMP,
993                "Dimension error\n");
994    }
995 
996    if (in_matrix.values)
997    {
998       SETERRQ( PETSC_ERR_ARG_INCOMP,
999                "Input array must be sparseness pattern (no values)");
1000    }
1001 
1002    if (power<=0)
1003    {
1004       SETERRQ( PETSC_ERR_SUP_SYS,
1005                "Power must be 1 or up");
1006    }
1007 
1008    /* Copy input values*/
1009    retval.nrows = ncols;
1010    retval.ncols = nrows;
1011    retval.nnz   = 0;
1012    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
1013    retval.block_data = PETSC_FALSE;
1014 
1015    /* Allocate sparseness pattern */
1016    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);
1017    CHKERRQ(ierr);
1018 
1019    /* Allocate marker array */
1020    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork); CHKERRQ(ierr);
1021 
1022    /* Erase the pattern for this row */
1023    memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt));
1024 
1025    /* Calculate marker values */
1026    marker = 1; for (i=1; i<power; i++) {marker*=2;}
1027 
1028    for (i=0; i<nrows; i++)
1029    {
1030      /* Calculate the pattern for each row */
1031 
1032       nnz    = in_matrix.row_nnz[i];
1033       kend   = i+in_matrix.icols[i][nnz-1];
1034       if (maxmrk<=kend) {maxmrk=kend+1;}
1035       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker,
1036                                     i, maxmrk); CHKERRQ(ierr);
1037 
1038       /* Count the columns*/
1039       nnz = 0;
1040       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
1041 
1042       /* Allocate the column indices */
1043       retval.row_nnz[i] = nnz;
1044       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]); CHKERRQ(ierr);
1045 
1046       /* Administrate the column indices */
1047       inz = 0;
1048       for (j=i; j<maxmrk; j++)
1049       {
1050          if (iwork[j])
1051          {
1052             retval.icols[i][inz] = j-i;
1053             inz++;
1054             iwork[j]=0;
1055          }
1056       }
1057       retval.nnz += nnz;
1058    };
1059 
1060    ierr = PetscFree(iwork); CHKERRQ(ierr);
1061 
1062    *result = retval;
1063    PetscFunctionReturn(0);
1064 }
1065 
1066 
1067 
1068 /*
1069    spbas_keep_upper:
1070       remove the lower part of the matrix: keep the upper part
1071 */
1072 #undef __FUNCT__
1073 #define __FUNCT__ "spbas_keep_upper"
1074 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
1075 {
1076    PetscInt i, j;
1077    PetscInt jstart;
1078 
1079    PetscFunctionBegin;
1080    if (inout_matrix->block_data)
1081    {
1082       SETERRQ( PETSC_ERR_SUP_SYS,
1083                "Not yet for block data matrices\n");
1084    }
1085 
1086    for (i=0; i<inout_matrix->nrows; i++)
1087    {
1088        for (jstart=0;
1089             (jstart<inout_matrix->row_nnz[i]) &&
1090             (inout_matrix->icols[i][jstart]<0); jstart++){}
1091 
1092        if (jstart>0)
1093        {
1094           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1095           {
1096              inout_matrix->icols[i][j] =
1097                  inout_matrix->icols[i][j+jstart];
1098           }
1099 
1100           if (inout_matrix->values)
1101           {
1102              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1103              {
1104                 inout_matrix->values[i][j] =
1105                     inout_matrix->values[i][j+jstart];
1106              }
1107           }
1108 
1109           inout_matrix->row_nnz[i] -= jstart;
1110 
1111           inout_matrix->icols[i] = (PetscInt*) realloc(
1112               (void*) inout_matrix->icols[i],
1113               inout_matrix->row_nnz[i]*sizeof(PetscInt));
1114 
1115           if (inout_matrix->values)
1116           {
1117              inout_matrix->values[i] = (PetscScalar*) realloc(
1118                  (void*) inout_matrix->values[i],
1119                  inout_matrix->row_nnz[i]*sizeof(PetscScalar));
1120           }
1121           inout_matrix->nnz -= jstart;
1122        }
1123    }
1124    PetscFunctionReturn(0);
1125 }
1126 
1127