xref: /petsc/src/mat/impls/aij/seq/bas/spbas.c (revision 71d55d18a6cc105fecd21255dfee54cec910a2af)
1 #include "petsc.h"
2 #include "../src/mat/impls/aij/seq/aij.h"
3 #include "spbas.h"
4 
5 /*
6   spbas_memory_requirement:
7     Calculate the number of bytes needed to store tha matrix
8 */
9 #undef __FUNCT__
10 #define __FUNCT__ "spbas_memory_requirement"
11 long int spbas_memory_requirement( spbas_matrix matrix)
12 {
13    long int memreq = 6 * sizeof(PetscInt)         + /* nrows, ncols, nnz, n_alloc_icol,
14 						       n_alloc_val, col_idx_type */
15      sizeof(PetscTruth)                 + /* block_data */
16      sizeof(PetscScalar**)        + /* values */
17      sizeof(PetscScalar*)         + /* alloc_val */
18      2 * sizeof(int**)            + /* icols, icols0 */
19      2 * sizeof(PetscInt*)             + /* row_nnz, alloc_icol */
20      matrix.nrows * sizeof(PetscInt)   + /* row_nnz[*] */
21      matrix.nrows * sizeof(PetscInt*);   /* icols[*] */
22 
23    /* icol0[*] */
24    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY)
25       { memreq += matrix.nrows * sizeof(PetscInt); }
26 
27    /* icols[*][*] */
28    if (matrix.block_data)
29       { memreq += matrix.n_alloc_icol * sizeof(PetscInt); }
30    else
31       { memreq += matrix.nnz * sizeof(PetscInt); }
32 
33    if (matrix.values)
34    {
35      memreq += matrix.nrows * sizeof(PetscScalar*); /* values[*] */
36      /* values[*][*] */
37        if (matrix.block_data)
38           { memreq += matrix.n_alloc_val  * sizeof(PetscScalar); }
39        else
40           { memreq += matrix.nnz * sizeof(PetscScalar); }
41    }
42 
43    return memreq;
44 }
45 
46 /*
47   spbas_allocate_pattern:
48     allocate the pattern arrays row_nnz, icols and optionally values
49 */
50 #undef __FUNCT__
51 #define __FUNCT__ "spbas_allocate_pattern"
52 PetscErrorCode spbas_allocate_pattern( spbas_matrix * result, PetscTruth do_values)
53 {
54    PetscErrorCode ierr;
55    PetscInt nrows = result->nrows;
56    PetscInt col_idx_type = result->col_idx_type;
57 
58    PetscFunctionBegin;
59    /* Allocate sparseness pattern */
60    ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->row_nnz);CHKERRQ(ierr);
61    ierr = PetscMalloc(nrows*sizeof(PetscInt*),&result->icols);CHKERRQ(ierr);
62 
63    /* If offsets are given wrt an array, create array */
64    if (col_idx_type == SPBAS_OFFSET_ARRAY)
65    {
66       ierr = PetscMalloc(nrows*sizeof(PetscInt),&result->icol0);CHKERRQ(ierr);
67    }
68    else
69    {
70       result->icol0 = PETSC_NULL;
71    }
72 
73    /* If values are given, allocate values array */
74    if (do_values)
75    {
76       ierr = PetscMalloc(nrows*sizeof(PetscScalar*),&result->values);CHKERRQ(ierr);
77    }
78    else
79    {
80       result->values = PETSC_NULL;
81    }
82 
83    PetscFunctionReturn(0);
84 }
85 
86 /*
87 spbas_allocate_data:
88    in case of block_data:
89        Allocate the data arrays alloc_icol and optionally alloc_val,
90        set appropriate pointers from icols and values;
91    in case of !block_data:
92        Allocate the arrays icols[i] and optionally values[i]
93 */
94 #undef __FUNCT__
95 #define __FUNCT__ "spbas_allocate_data"
96 PetscErrorCode spbas_allocate_data( spbas_matrix * result)
97 {
98    PetscInt i;
99    PetscInt nnz   = result->nnz;
100    PetscInt nrows = result->nrows;
101    PetscInt r_nnz;
102    PetscErrorCode ierr;
103    PetscTruth do_values = (result->values != PETSC_NULL) ? PETSC_TRUE : PETSC_FALSE;
104    PetscTruth block_data = result->block_data;
105 
106    PetscFunctionBegin;
107    if (block_data)
108    {
109      /* Allocate the column number array and point to it */
110       result->n_alloc_icol = nnz;
111       ierr = PetscMalloc(nnz*sizeof(PetscInt), &result->alloc_icol);CHKERRQ(ierr);
112       result->icols[0]= result->alloc_icol;
113       for (i=1; i<nrows; i++)
114       {
115           result->icols[i] = result->icols[i-1] + result->row_nnz[i-1];
116       }
117 
118       /* Allocate the value array and point to it */
119       if (do_values)
120       {
121          result->n_alloc_val= nnz;
122          ierr = PetscMalloc(nnz*sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
123          result->values[0]= result->alloc_val;
124          for (i=1; i<nrows; i++)
125          {
126              result->values[i] = result->values[i-1] + result->row_nnz[i-1];
127          }
128       }
129    }
130    else
131    {
132       for (i=0; i<nrows; i++)
133       {
134          r_nnz = result->row_nnz[i];
135          ierr = PetscMalloc(r_nnz*sizeof(PetscInt), &result->icols[i]);CHKERRQ(ierr);
136       }
137       if (do_values)
138       {
139          for (i=0; i<nrows; i++)
140          {
141             r_nnz = result->row_nnz[i];
142             ierr = PetscMalloc(r_nnz*sizeof(PetscScalar),
143                                &result->values[i]);CHKERRQ(ierr);
144          }
145       }
146    }
147 
148    PetscFunctionReturn(0);
149 }
150 
151 /*
152   spbas_row_order_icol
153      determine if row i1 should come
154        + before row i2 in the sorted rows (return -1),
155        + after (return 1)
156        + is identical (return 0).
157 */
158 #undef __FUNCT__
159 #define __FUNCT__ "spbas_row_order_icol"
160 int spbas_row_order_icol(PetscInt i1, PetscInt i2, PetscInt *irow_in, PetscInt *icol_in,PetscInt col_idx_type)
161 {
162    PetscInt j;
163    PetscInt nnz1 = irow_in[i1+1] - irow_in[i1];
164    PetscInt nnz2 = irow_in[i2+1] - irow_in[i2];
165 
166    PetscInt * icol1 = &icol_in[irow_in[i1]];
167    PetscInt * icol2 = &icol_in[irow_in[i2]];
168 
169    if (nnz1<nnz2) {return -1;}
170    if (nnz1>nnz2) {return 1;}
171 
172    if (col_idx_type == SPBAS_COLUMN_NUMBERS)
173    {
174       for (j=0; j<nnz1; j++)
175       {
176          if (icol1[j]< icol2[j]) {return -1;}
177          if (icol1[j]> icol2[j]) {return 1;}
178       }
179    }
180    else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
181    {
182       for (j=0; j<nnz1; j++)
183       {
184          if (icol1[j]-i1< icol2[j]-i2) {return -1;}
185          if (icol1[j]-i1> icol2[j]-i2) {return 1;}
186       }
187    }
188    else if (col_idx_type == SPBAS_OFFSET_ARRAY)
189    {
190       for (j=1; j<nnz1; j++)
191       {
192          if (icol1[j]-icol1[0] < icol2[j]-icol2[0]) {return -1;}
193          if (icol1[j]-icol1[0] > icol2[j]-icol2[0]) {return 1;}
194       }
195    }
196    return 0;
197 }
198 
199 /*
200   spbas_mergesort_icols:
201     return a sorting of the rows in which identical sparseness patterns are
202     next to each other
203 */
204 #undef __FUNCT__
205 #define __FUNCT__ "spbas_mergesort_icols"
206 PetscErrorCode spbas_mergesort_icols( PetscInt nrows, PetscInt * irow_in, PetscInt * icol_in,PetscInt col_idx_type, PetscInt *isort)
207 {
208    PetscErrorCode ierr;
209    PetscInt istep;       /* Chunk-sizes of already sorted parts of arrays */
210 
211    PetscInt i, i1, i2;   /* Loop counters for (partly) sorted arrays */
212 
213    PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both  parts */
214 
215    PetscInt *ialloc;     /* Allocated arrays */
216    PetscInt *iswap;      /* auxiliary pointers for swapping */
217    PetscInt *ihlp1;      /* Pointers to new version of arrays, */
218    PetscInt *ihlp2;      /* Pointers to previous version of arrays, */
219 
220    PetscFunctionBegin;
221 
222    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
223 
224    ihlp1 = ialloc;
225    ihlp2 = isort;
226 
227    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
228    for (istep=1; istep<nrows; istep*=2)
229    {
230      /*
231        Combine sorted parts
232             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
233        of ihlp2 and vhlp2
234 
235        into one sorted part
236             istart:istart+2*istep-1
237        of ihlp1 and vhlp1
238      */
239       for (istart=0; istart<nrows; istart+=2*istep)
240       {
241 
242 	/* Set counters and bound array part endings */
243          i1=istart;        i1end = i1+istep;  if (i1end>nrows) {i1end=nrows;}
244          i2=istart+istep;  i2end = i2+istep;  if (i2end>nrows) {i2end=nrows;}
245 
246          /* Merge the two array parts */
247          for (i=istart; i<i2end; i++)
248          {
249             if (i1<i1end && i2<i2end &&
250                 spbas_row_order_icol( ihlp2[i1], ihlp2[i2],
251                              irow_in, icol_in, col_idx_type) < 0)
252             {
253                 ihlp1[i] = ihlp2[i1];
254                 i1++;
255             }
256             else if (i2<i2end )
257             {
258                 ihlp1[i] = ihlp2[i2];
259                 i2++;
260             }
261             else
262             {
263                 ihlp1[i] = ihlp2[i1];
264                 i1++;
265             }
266          }
267       }
268 
269       /* Swap the two array sets */
270       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
271    }
272 
273    /* Copy one more time in case the sorted arrays are the temporary ones */
274    if (ihlp2 != isort)
275    {
276       for (i=0; i<nrows; i++) { isort[i] = ihlp2[i]; }
277    }
278 
279    ierr = PetscFree(ialloc);CHKERRQ(ierr);
280    PetscFunctionReturn(0);
281 }
282 
283 
284 
285 /*
286   spbas_compress_pattern:
287      calculate a compressed sparseness pattern for a sparseness pattern
288      given in compressed row storage. The compressed sparseness pattern may
289      require (much) less memory.
290 */
291 #undef __FUNCT__
292 #define __FUNCT__ "spbas_compress_pattern"
293 PetscErrorCode spbas_compress_pattern(PetscInt *irow_in, PetscInt *icol_in, PetscInt nrows, PetscInt ncols,
294 				      PetscInt col_idx_type, spbas_matrix *B,PetscReal *mem_reduction)
295 {
296    PetscInt nnz = irow_in[nrows];
297    long int mem_orig = (nrows + nnz) * sizeof(PetscInt);
298    long int mem_compressed;
299    PetscErrorCode ierr;
300    PetscInt *isort;
301    PetscInt *icols;
302    PetscInt row_nnz;
303    PetscInt *ipoint;
304    PetscTruth *used;
305    PetscInt ptr;
306    PetscInt i,j;
307 
308    const PetscTruth no_values = PETSC_FALSE;
309 
310    PetscFunctionBegin;
311 
312    /* Allocate the structure of the new matrix */
313    B->nrows = nrows;
314    B->ncols = ncols;
315    B->nnz   = nnz;
316    B->col_idx_type= col_idx_type;
317    B->block_data = PETSC_TRUE;
318    ierr = spbas_allocate_pattern( B, no_values);CHKERRQ(ierr);
319 
320    /* When using an offset array, set it */
321    if (col_idx_type==SPBAS_OFFSET_ARRAY)
322    {
323       for (i=0; i<nrows; i++) {B->icol0[i] = icol_in[irow_in[i]];}
324    }
325 
326    /* Allocate the ordering for the rows */
327    ierr = PetscMalloc(nrows*sizeof(PetscInt),&isort);CHKERRQ(ierr);
328    ierr = PetscMalloc(nrows*sizeof(PetscInt),&ipoint);CHKERRQ(ierr);
329    ierr = PetscMalloc(nrows*sizeof(PetscTruth),&used);CHKERRQ(ierr);
330 
331    /*  Initialize the sorting */
332    memset((void*) used, 0, nrows*sizeof(PetscTruth));
333    for (i = 0; i<nrows; i++)
334    {
335       B->row_nnz[i] = irow_in[i+1]-irow_in[i];
336       isort[i] = i;
337       ipoint[i]= i;
338    }
339 
340    /* Sort the rows so that identical columns will be next to each other */
341    ierr = spbas_mergesort_icols( nrows, irow_in, icol_in, col_idx_type, isort);CHKERRQ(ierr);
342    ierr = PetscInfo(PETSC_NULL,"Rows have been sorted for patterns\n");CHKERRQ(ierr);
343 
344    /* Replace identical rows with the first one in the list */
345    for (i=1; i<nrows; i++)
346    {
347       if (spbas_row_order_icol(isort[i-1], isort[i],
348                       irow_in, icol_in, col_idx_type) == 0)
349       {
350          ipoint[isort[i]] = ipoint[isort[i-1]];
351       }
352    }
353 
354    /* Collect the rows which are used*/
355    for (i=0; i<nrows; i++) {used[ipoint[i]] = PETSC_TRUE;}
356 
357    /* Calculate needed memory */
358    B->n_alloc_icol = 0;
359    for (i=0; i<nrows; i++)
360    {
361       if (used[i]) {B->n_alloc_icol += B->row_nnz[i];}
362    }
363 
364    ierr = PetscMalloc(B->n_alloc_icol*sizeof(PetscInt),&B->alloc_icol);CHKERRQ(ierr);
365 
366    /* Fill in the diagonal offsets for the rows which store their own data */
367    ptr = 0;
368    for (i=0; i<B->nrows; i++)
369    {
370       if (used[i])
371       {
372          B->icols[i] = &B->alloc_icol[ptr];
373          icols = &icol_in[irow_in[i]];
374          row_nnz = B->row_nnz[i];
375          if (col_idx_type == SPBAS_COLUMN_NUMBERS)
376          {
377             for (j=0; j<row_nnz; j++)
378             {
379                B->icols[i][j] = icols[j];
380             }
381          }
382          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
383          {
384             for (j=0; j<row_nnz; j++)
385             {
386                B->icols[i][j] = icols[j]-i;
387             }
388          }
389          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
390          {
391             for (j=0; j<row_nnz; j++)
392             {
393                B->icols[i][j] = icols[j]-icols[0];
394             }
395          }
396          ptr += B->row_nnz[i];
397       }
398    }
399 
400    /* Point to the right places for all data */
401    for (i=0; i<nrows; i++)
402    {
403       B->icols[i] = B->icols[ipoint[i]];
404    }
405    ierr = PetscInfo(PETSC_NULL,"Row patterns have been compressed\n");CHKERRQ(ierr);
406    ierr = PetscInfo1(PETSC_NULL,"         (%G nonzeros per row)\n",  (PetscReal) nnz / (PetscReal) nrows);CHKERRQ(ierr);
407 
408    ierr=PetscFree(isort);CHKERRQ(ierr);
409    ierr=PetscFree(used);CHKERRQ(ierr);
410    ierr=PetscFree(ipoint);CHKERRQ(ierr);
411 
412    mem_compressed = spbas_memory_requirement( *B );
413    *mem_reduction = 100.0 * (PetscReal)(mem_orig-mem_compressed)/
414                             (PetscReal) mem_orig;
415    PetscFunctionReturn(0);
416 }
417 
418 /*
419    spbas_incomplete_cholesky
420        Incomplete Cholesky decomposition
421 */
422 #include "spbas_cholesky.h"
423 
424 /*
425   spbas_delete : de-allocate the arrays owned by this matrix
426 */
427 #undef __FUNCT__
428 #define __FUNCT__ "spbas_delete"
429 PetscErrorCode spbas_delete(spbas_matrix matrix)
430 {
431    PetscInt i;
432    PetscErrorCode ierr;
433    PetscFunctionBegin;
434    if (matrix.block_data)
435    {
436       ierr=PetscFree(matrix.alloc_icol);CHKERRQ(ierr)
437       if (matrix.values){ierr=PetscFree(matrix.alloc_val);CHKERRQ(ierr);}
438    }
439    else
440    {
441       for (i=0; i<matrix.nrows; i++)
442 	{ ierr=PetscFree(matrix.icols[i]);CHKERRQ(ierr);}
443       ierr = PetscFree(matrix.icols);CHKERRQ(ierr);
444       if (matrix.values)
445       {
446          for (i=0; i<matrix.nrows; i++)
447           { ierr=PetscFree(matrix.values[i]);CHKERRQ(ierr);}
448       }
449    }
450 
451    ierr=PetscFree(matrix.row_nnz);CHKERRQ(ierr);
452    ierr=PetscFree(matrix.icols);CHKERRQ(ierr);
453    if (matrix.col_idx_type == SPBAS_OFFSET_ARRAY)
454       {ierr=PetscFree(matrix.icol0);CHKERRQ(ierr);}
455    if (matrix.values)
456       {ierr=PetscFree(matrix.values);CHKERRQ(ierr);}
457 
458    PetscFunctionReturn(0);
459 }
460 
461 /*
462 spbas_matrix_to_crs:
463    Convert an spbas_matrix to compessed row storage
464 */
465 #undef __FUNCT__
466 #define __FUNCT__ "spbas_matrix_to_crs"
467 PetscErrorCode spbas_matrix_to_crs(spbas_matrix matrix_A,MatScalar **val_out, PetscInt **irow_out, PetscInt **icol_out)
468 {
469    PetscInt nrows = matrix_A.nrows;
470    PetscInt nnz   = matrix_A.nnz;
471    PetscInt i,j,r_nnz,i0;
472    PetscInt *irow;
473    PetscInt *icol;
474    PetscInt *icol_A;
475    MatScalar *val;
476    PetscScalar *val_A;
477    PetscInt col_idx_type = matrix_A.col_idx_type;
478    PetscTruth do_values = matrix_A.values ? PETSC_TRUE : PETSC_FALSE;
479    PetscErrorCode ierr;
480 
481    PetscFunctionBegin;
482    ierr = PetscMalloc( sizeof(PetscInt) * (nrows+1), &irow);CHKERRQ(ierr);
483    ierr = PetscMalloc( sizeof(PetscInt) * nnz, &icol);CHKERRQ(ierr);
484    *icol_out = icol; *irow_out=irow;
485    if (do_values)
486    {
487       ierr = PetscMalloc( sizeof(MatScalar) * nnz, &val);CHKERRQ(ierr);
488       *val_out = val; *icol_out = icol; *irow_out=irow;
489    }
490 
491    irow[0]=0;
492    for (i=0; i<nrows; i++)
493    {
494       r_nnz = matrix_A.row_nnz[i];
495       i0 = irow[i];
496       irow[i+1] = i0 + r_nnz;
497       icol_A = matrix_A.icols[i];
498 
499       if (do_values)
500       {
501           val_A = matrix_A.values[i];
502           for (j=0; j<r_nnz; j++)
503           {
504              icol[i0+j] = icol_A[j];
505              val[i0+j]  = val_A[j];
506           }
507       }
508       else
509       {
510           for (j=0; j<r_nnz; j++) { icol[i0+j] = icol_A[j]; }
511       }
512 
513       if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
514       {
515          for (j=0; j<r_nnz; j++) { icol[i0+j] += i; }
516       }
517       else if (col_idx_type == SPBAS_OFFSET_ARRAY)
518       {
519          i0 = matrix_A.icol0[i];
520          for (j=0; j<r_nnz; j++) { icol[i0+j] += i0; }
521       }
522 
523    }
524 
525    PetscFunctionReturn(0);
526 }
527 
528 
529 /*
530     spbas_transpose
531        return the transpose of a matrix
532 */
533 #undef __FUNCT__
534 #define __FUNCT__ "spbas_transpose"
535 PetscErrorCode spbas_transpose( spbas_matrix in_matrix, spbas_matrix * result)
536 {
537    PetscInt col_idx_type = in_matrix.col_idx_type;
538    PetscInt nnz   = in_matrix.nnz;
539    PetscInt ncols = in_matrix.nrows;
540    PetscInt nrows = in_matrix.ncols;
541    PetscInt i,j,k;
542    PetscInt r_nnz;
543    PetscInt *irow;
544    PetscInt icol0;
545    PetscScalar * val;
546    PetscErrorCode ierr;
547 
548    PetscFunctionBegin;
549 
550    /* Copy input values */
551    result->nrows        = nrows;
552    result->ncols        = ncols;
553    result->nnz          = nnz;
554    result->col_idx_type = SPBAS_COLUMN_NUMBERS;
555    result->block_data   = PETSC_TRUE;
556 
557    /* Allocate sparseness pattern */
558    ierr =  spbas_allocate_pattern(result, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
559 
560    /*  Count the number of nonzeros in each row */
561    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
562 
563    for (i=0; i<ncols; i++)
564    {
565       r_nnz = in_matrix.row_nnz[i];
566       irow  = in_matrix.icols[i];
567       if (col_idx_type == SPBAS_COLUMN_NUMBERS)
568       {
569          for (j=0; j<r_nnz; j++) { result->row_nnz[irow[j]]++; }
570       }
571       else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS)
572       {
573          for (j=0; j<r_nnz; j++) { result->row_nnz[i+irow[j]]++; }
574       }
575       else if (col_idx_type == SPBAS_OFFSET_ARRAY)
576       {
577          icol0=in_matrix.icol0[i];
578          for (j=0; j<r_nnz; j++) { result->row_nnz[icol0+irow[j]]++; }
579       }
580    }
581 
582    /* Set the pointers to the data */
583    ierr = spbas_allocate_data(result);CHKERRQ(ierr);
584 
585    /* Reset the number of nonzeros in each row */
586    for (i = 0; i<nrows; i++) { result->row_nnz[i] = 0; }
587 
588    /* Fill the data arrays */
589    if (in_matrix.values)
590    {
591       for (i=0; i<ncols; i++)
592       {
593          r_nnz = in_matrix.row_nnz[i];
594          irow  = in_matrix.icols[i];
595          val   = in_matrix.values[i];
596 
597          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
598          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
599          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
600                  {icol0=in_matrix.icol0[i];}
601          for (j=0; j<r_nnz; j++)
602          {
603             k = icol0 + irow[j];
604             result->icols[k][result->row_nnz[k]]  = i;
605             result->values[k][result->row_nnz[k]] = val[j];
606             result->row_nnz[k]++;
607          }
608       }
609    }
610    else
611    {
612       for (i=0; i<ncols; i++)
613       {
614          r_nnz = in_matrix.row_nnz[i];
615          irow  = in_matrix.icols[i];
616 
617          if      (col_idx_type == SPBAS_COLUMN_NUMBERS)   {icol0=0;}
618          else if (col_idx_type == SPBAS_DIAGONAL_OFFSETS) {icol0=i;}
619          else if (col_idx_type == SPBAS_OFFSET_ARRAY)
620                  {icol0=in_matrix.icol0[i];}
621 
622          for (j=0; j<r_nnz; j++)
623          {
624             k = icol0 + irow[j];
625             result->icols[k][result->row_nnz[k]]  = i;
626             result->row_nnz[k]++;
627          }
628       }
629    }
630    PetscFunctionReturn(0);
631 }
632 
633 /*
634    spbas_mergesort
635 
636       mergesort for an array of intergers and an array of associated
637       reals
638 
639       on output, icol[0..nnz-1] is increasing;
640                   val[0..nnz-1] has undergone the same permutation as icol
641 
642       NB: val may be PETSC_NULL: in that case, only the integers are sorted
643 
644 */
645 #undef __FUNCT__
646 #define __FUNCT__ "spbas_mergesort"
647 PetscErrorCode spbas_mergesort(PetscInt nnz, PetscInt *icol, PetscScalar *val)
648 {
649   PetscInt istep;       /* Chunk-sizes of already sorted parts of arrays */
650   PetscInt i, i1, i2;   /* Loop counters for (partly) sorted arrays */
651   PetscInt istart, i1end, i2end; /* start of newly sorted array part, end of both parts */
652   PetscInt *ialloc;     /* Allocated arrays */
653    PetscScalar *valloc=PETSC_NULL;
654    PetscInt *iswap;      /* auxiliary pointers for swapping */
655    PetscScalar *vswap;
656    PetscInt *ihlp1;      /* Pointers to new version of arrays, */
657    PetscScalar *vhlp1=PETSC_NULL;  /* (arrays under construction) */
658    PetscInt *ihlp2;      /* Pointers to previous version of arrays, */
659    PetscScalar *vhlp2=PETSC_NULL;
660    PetscErrorCode ierr;
661 
662    ierr = PetscMalloc(nnz*sizeof(PetscInt),&ialloc);CHKERRQ(ierr);
663    ihlp1 = ialloc;
664    ihlp2 = icol;
665 
666    if (val)
667    {
668       ierr = PetscMalloc(nnz*sizeof(PetscScalar),&valloc);CHKERRQ(ierr);
669       vhlp1 = valloc;
670       vhlp2 = val;
671    }
672 
673 
674    /* Sorted array chunks are first 1 long, and increase until they are the complete array */
675    for (istep=1; istep<nnz; istep*=2)
676    {
677      /*
678        Combine sorted parts
679             istart:istart+istep-1 and istart+istep-1:istart+2*istep-1
680        of ihlp2 and vhlp2
681 
682        into one sorted part
683             istart:istart+2*istep-1
684        of ihlp1 and vhlp1
685      */
686       for (istart=0; istart<nnz; istart+=2*istep)
687       {
688 
689 	/* Set counters and bound array part endings */
690          i1=istart;        i1end = i1+istep;  if (i1end>nnz) {i1end=nnz;}
691          i2=istart+istep;  i2end = i2+istep;  if (i2end>nnz) {i2end=nnz;}
692 
693          /* Merge the two array parts */
694          if (val)
695          {
696             for (i=istart; i<i2end; i++)
697             {
698                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2])
699                {
700                    ihlp1[i] = ihlp2[i1];
701                    vhlp1[i] = vhlp2[i1];
702                    i1++;
703                }
704                else if (i2<i2end )
705                {
706                    ihlp1[i] = ihlp2[i2];
707                    vhlp1[i] = vhlp2[i2];
708                    i2++;
709                }
710                else
711                {
712                    ihlp1[i] = ihlp2[i1];
713                    vhlp1[i] = vhlp2[i1];
714                    i1++;
715                }
716             }
717          }
718          else
719          {
720             for (i=istart; i<i2end; i++)
721             {
722                if (i1<i1end && i2<i2end && ihlp2[i1] < ihlp2[i2])
723                {
724                    ihlp1[i] = ihlp2[i1];
725                    i1++;
726                }
727                else if (i2<i2end )
728                {
729                    ihlp1[i] = ihlp2[i2];
730                    i2++;
731                }
732                else
733                {
734                    ihlp1[i] = ihlp2[i1];
735                    i1++;
736                }
737             }
738          }
739       }
740 
741       /* Swap the two array sets */
742       iswap = ihlp2; ihlp2 = ihlp1; ihlp1 = iswap;
743       vswap = vhlp2; vhlp2 = vhlp1; vhlp1 = vswap;
744    }
745 
746    /* Copy one more time in case the sorted arrays are the temporary ones */
747    if (ihlp2 != icol)
748    {
749       for (i=0; i<nnz; i++) { icol[i] = ihlp2[i]; }
750       if (val)
751       {
752          for (i=0; i<nnz; i++) { val[i] = vhlp2[i]; }
753       }
754    }
755 
756    ierr = PetscFree(ialloc);CHKERRQ(ierr);
757    if(val){ierr = PetscFree(valloc);CHKERRQ(ierr);}
758    PetscFunctionReturn(0);
759 }
760 
761 /*
762   spbas_apply_reordering_rows:
763     apply the given reordering to the rows:  matrix_A = matrix_A(perm,:);
764 */
765 #undef __FUNCT__
766 #define __FUNCT__ "spbas_apply_reordering_rows"
767 PetscErrorCode spbas_apply_reordering_rows(spbas_matrix *matrix_A, const PetscInt *permutation)
768 {
769    PetscInt i,j,ip;
770    PetscInt nrows=matrix_A->nrows;
771    PetscInt * row_nnz;
772    PetscInt **icols;
773    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
774    PetscScalar **vals=PETSC_NULL;
775    PetscErrorCode ierr;
776 
777    PetscFunctionBegin;
778    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
779    {
780       SETERRQ( PETSC_ERR_SUP_SYS,
781                "must have diagonal offsets in pattern\n");
782    }
783 
784    if (do_values)
785    {
786      ierr = PetscMalloc( sizeof(PetscScalar*)*nrows, &vals);CHKERRQ(ierr);
787    }
788    ierr = PetscMalloc( sizeof(PetscInt)*nrows, &row_nnz);CHKERRQ(ierr);
789    ierr = PetscMalloc( sizeof(PetscInt*)*nrows, &icols);CHKERRQ(ierr);
790 
791    for (i=0; i<nrows;i++)
792    {
793       ip = permutation[i];
794       if (do_values) {vals[i]    = matrix_A->values[ip];}
795       icols[i]   = matrix_A->icols[ip];
796       row_nnz[i] = matrix_A->row_nnz[ip];
797       for (j=0; j<row_nnz[i]; j++) { icols[i][j] += ip-i; }
798    }
799 
800    if (do_values){ ierr = PetscFree(matrix_A->values);CHKERRQ(ierr);}
801    ierr = PetscFree(matrix_A->icols);CHKERRQ(ierr);
802    ierr = PetscFree(matrix_A->row_nnz);CHKERRQ(ierr);
803 
804    if (do_values) { matrix_A->values  = vals; }
805    matrix_A->icols   = icols;
806    matrix_A->row_nnz = row_nnz;
807 
808    PetscFunctionReturn(0);
809 }
810 
811 
812 /*
813   spbas_apply_reordering_cols:
814     apply the given reordering to the columns:  matrix_A(:,perm) = matrix_A;
815 */
816 #undef __FUNCT__
817 #define __FUNCT__ "spbas_apply_reordering_cols"
818 PetscErrorCode spbas_apply_reordering_cols( spbas_matrix *matrix_A,const PetscInt *permutation)
819 {
820    PetscInt i,j;
821    PetscInt nrows=matrix_A->nrows;
822    PetscInt row_nnz;
823    PetscInt *icols;
824    PetscTruth do_values = matrix_A->values ? PETSC_TRUE : PETSC_FALSE;
825    PetscScalar *vals=PETSC_NULL;
826 
827    PetscErrorCode ierr;
828 
829    PetscFunctionBegin;
830 
831    if (matrix_A->col_idx_type != SPBAS_DIAGONAL_OFFSETS)
832    {
833       SETERRQ( PETSC_ERR_SUP_SYS,
834                "must have diagonal offsets in pattern\n");
835    }
836 
837    for (i=0; i<nrows;i++)
838    {
839       icols   = matrix_A->icols[i];
840       row_nnz = matrix_A->row_nnz[i];
841       if (do_values) { vals = matrix_A->values[i]; }
842 
843       for (j=0; j<row_nnz; j++)
844       {
845          icols[j] = permutation[i+icols[j]]-i;
846       }
847       ierr = spbas_mergesort(row_nnz, icols, vals);CHKERRQ(ierr);
848    }
849 
850    PetscFunctionReturn(0);
851 }
852 
853 /*
854   spbas_apply_reordering:
855     apply the given reordering:  matrix_A(perm,perm) = matrix_A;
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "spbas_apply_reordering"
859 PetscErrorCode spbas_apply_reordering( spbas_matrix *matrix_A, const PetscInt *permutation, const PetscInt * inv_perm)
860 {
861    PetscErrorCode ierr;
862    PetscFunctionBegin;
863    ierr = spbas_apply_reordering_rows( matrix_A, inv_perm);CHKERRQ(ierr);
864    ierr = spbas_apply_reordering_cols( matrix_A, permutation);CHKERRQ(ierr);
865    PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "spbas_pattern_only"
870 PetscErrorCode spbas_pattern_only(PetscInt nrows, PetscInt ncols, PetscInt *ai, PetscInt *aj, spbas_matrix * result)
871 {
872    spbas_matrix retval;
873    PetscInt i, j, i0, r_nnz;
874    PetscErrorCode ierr;
875 
876    PetscFunctionBegin;
877 
878    /* Copy input values */
879    retval.nrows = nrows;
880    retval.ncols = ncols;
881    retval.nnz   = ai[nrows];
882 
883    retval.block_data   = PETSC_TRUE;
884    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
885 
886    /* Allocate output matrix */
887    ierr =  spbas_allocate_pattern(&retval, PETSC_FALSE);CHKERRQ(ierr);
888    for (i=0; i<nrows; i++)  {retval.row_nnz[i] = ai[i+1]-ai[i];}
889    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
890    /* Copy the structure */
891    for (i = 0; i<retval.nrows; i++)
892    {
893       i0 = ai[i];
894       r_nnz = ai[i+1]-i0;
895 
896       for (j=0; j<r_nnz; j++)
897       {
898          retval.icols[i][j]   = aj[i0+j]-i;
899       }
900    }
901 
902    *result = retval;
903    PetscFunctionReturn(0);
904 }
905 
906 
907 /*
908    spbas_mark_row_power:
909       Mark the columns in row 'row' which are nonzero in
910           matrix^2log(marker).
911 */
912 #undef __FUNCT__
913 #define __FUNCT__ "spbas_mark_row_power"
914 PetscErrorCode spbas_mark_row_power(
915 				    PetscInt *iwork,             /* marker-vector */
916 				    PetscInt row,                /* row for which the columns are marked */
917 				    spbas_matrix * in_matrix, /* matrix for which the power is being  calculated */
918 				    PetscInt marker,             /* marker-value: 2^power */
919 				    PetscInt minmrk,            /* lower bound for marked points */
920 				    PetscInt maxmrk)            /* upper bound for marked points */
921 {
922    PetscErrorCode ierr;
923    PetscInt i,j, nnz;
924 
925    PetscFunctionBegin;
926    nnz = in_matrix->row_nnz[row];
927 
928    /* For higher powers, call this function recursively */
929    if (marker>1)
930    {
931       for (i=0; i<nnz;i++)
932       {
933          j = row + in_matrix->icols[row][i];
934          if (minmrk<=j && j<maxmrk && iwork[j] < marker )
935          {
936             ierr =  spbas_mark_row_power( iwork, row + in_matrix->icols[row][i],in_matrix, marker/2,minmrk,maxmrk);CHKERRQ(ierr);
937             iwork[j] |= marker;
938          }
939       }
940    }
941    else
942    {
943      /*  Mark the columns reached */
944       for (i=0; i<nnz;i++)
945       {
946          j = row + in_matrix->icols[row][i];
947          if (minmrk<=j && j<maxmrk) { iwork[j] |= 1; }
948       }
949    }
950 
951    PetscFunctionReturn(0);
952 }
953 
954 
955 /*
956    spbas_power
957       Calculate sparseness patterns for incomplete Cholesky decompositions
958       of a given order: (almost) all nonzeros of the matrix^(order+1) which
959       are inside the band width are found and stored in the output sparseness
960       pattern.
961 */
962 #undef __FUNCT__
963 #define __FUNCT__ "spbas_power"
964 PetscErrorCode spbas_power (spbas_matrix in_matrix,PetscInt power, spbas_matrix * result)
965 {
966    spbas_matrix retval;
967    PetscInt nrows = in_matrix.nrows;
968    PetscInt ncols = in_matrix.ncols;
969    PetscInt i, j, kend;
970    PetscInt nnz, inz;
971    PetscInt *iwork;
972    PetscInt marker;
973    PetscInt maxmrk=0;
974    PetscErrorCode ierr;
975 
976    PetscFunctionBegin;
977    if (in_matrix.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
978    {
979       SETERRQ( PETSC_ERR_SUP_SYS,
980                "must have diagonal offsets in pattern\n");
981    }
982 
983    if (ncols != nrows)
984    {
985       SETERRQ( PETSC_ERR_ARG_INCOMP,
986                "Dimension error\n");
987    }
988 
989    if (in_matrix.values)
990    {
991       SETERRQ( PETSC_ERR_ARG_INCOMP,
992                "Input array must be sparseness pattern (no values)");
993    }
994 
995    if (power<=0)
996    {
997       SETERRQ( PETSC_ERR_SUP_SYS,
998                "Power must be 1 or up");
999    }
1000 
1001    /* Copy input values*/
1002    retval.nrows = ncols;
1003    retval.ncols = nrows;
1004    retval.nnz   = 0;
1005    retval.col_idx_type = SPBAS_DIAGONAL_OFFSETS;
1006    retval.block_data = PETSC_FALSE;
1007 
1008    /* Allocate sparseness pattern */
1009    ierr =  spbas_allocate_pattern(&retval, in_matrix.values ? PETSC_TRUE : PETSC_FALSE);CHKERRQ(ierr);
1010 
1011    /* Allocate marker array */
1012    ierr = PetscMalloc(nrows * sizeof(PetscInt), &iwork);CHKERRQ(ierr);
1013 
1014    /* Erase the pattern for this row */
1015    memset( (void *) iwork, 0, retval.nrows*sizeof(PetscInt));
1016 
1017    /* Calculate marker values */
1018    marker = 1; for (i=1; i<power; i++) {marker*=2;}
1019 
1020    for (i=0; i<nrows; i++)
1021    {
1022      /* Calculate the pattern for each row */
1023 
1024       nnz    = in_matrix.row_nnz[i];
1025       kend   = i+in_matrix.icols[i][nnz-1];
1026       if (maxmrk<=kend) {maxmrk=kend+1;}
1027       ierr =  spbas_mark_row_power( iwork, i, &in_matrix, marker, i, maxmrk);CHKERRQ(ierr);
1028 
1029       /* Count the columns*/
1030       nnz = 0;
1031       for (j=i; j<maxmrk; j++) {nnz+= (iwork[j]!=0);}
1032 
1033       /* Allocate the column indices */
1034       retval.row_nnz[i] = nnz;
1035       ierr = PetscMalloc(nnz*sizeof(PetscInt),&retval.icols[i]);CHKERRQ(ierr);
1036 
1037       /* Administrate the column indices */
1038       inz = 0;
1039       for (j=i; j<maxmrk; j++)
1040       {
1041          if (iwork[j])
1042          {
1043             retval.icols[i][inz] = j-i;
1044             inz++;
1045             iwork[j]=0;
1046          }
1047       }
1048       retval.nnz += nnz;
1049    };
1050 
1051    ierr = PetscFree(iwork);CHKERRQ(ierr);
1052 
1053    *result = retval;
1054    PetscFunctionReturn(0);
1055 }
1056 
1057 
1058 
1059 /*
1060    spbas_keep_upper:
1061       remove the lower part of the matrix: keep the upper part
1062 */
1063 #undef __FUNCT__
1064 #define __FUNCT__ "spbas_keep_upper"
1065 PetscErrorCode spbas_keep_upper( spbas_matrix * inout_matrix)
1066 {
1067    PetscInt i, j;
1068    PetscInt jstart;
1069 
1070    PetscFunctionBegin;
1071    if (inout_matrix->block_data)
1072    {
1073       SETERRQ( PETSC_ERR_SUP_SYS,
1074                "Not yet for block data matrices\n");
1075    }
1076 
1077    for (i=0; i<inout_matrix->nrows; i++)
1078    {
1079        for (jstart=0;
1080             (jstart<inout_matrix->row_nnz[i]) &&
1081             (inout_matrix->icols[i][jstart]<0); jstart++){}
1082 
1083        if (jstart>0)
1084        {
1085           for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1086           {
1087              inout_matrix->icols[i][j] =
1088                  inout_matrix->icols[i][j+jstart];
1089           }
1090 
1091           if (inout_matrix->values)
1092           {
1093              for (j=0; j<inout_matrix->row_nnz[i]-jstart; j++)
1094              {
1095                 inout_matrix->values[i][j] =
1096                     inout_matrix->values[i][j+jstart];
1097              }
1098           }
1099 
1100           inout_matrix->row_nnz[i] -= jstart;
1101 
1102           inout_matrix->icols[i] = (PetscInt*) realloc(
1103               (void*) inout_matrix->icols[i],
1104               inout_matrix->row_nnz[i]*sizeof(PetscInt));
1105 
1106           if (inout_matrix->values)
1107           {
1108              inout_matrix->values[i] = (PetscScalar*) realloc(
1109                  (void*) inout_matrix->values[i],
1110                  inout_matrix->row_nnz[i]*sizeof(PetscScalar));
1111           }
1112           inout_matrix->nnz -= jstart;
1113        }
1114    }
1115    PetscFunctionReturn(0);
1116 }
1117 
1118