xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 6abefa4f446b2ea91915da4e25ea0defa83db48e)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12    PetscFunctionBegin;
13    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15    *n_alloc_used   += r_nnz;
16    if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17    PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(
36       spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
37                                    Only the storage, not the contents of this matrix
38                                    is changed in this function */
39       PetscInt     i_row,             /* I  : Number of rows for which the final contents are
40 				     known */
41       PetscInt     *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
42 				     places in the arrays: they need not be moved any more */
43       PetscInt     *n_alloc_used,     /* I/O:  */
44       PetscInt     *max_row_nnz       /*I  : Over-estimate of the number of nonzeros needed to
45 				    store each row */
46      )
47 {
48 
49 
50   /* PSEUDO-CODE:
51     1. Choose the appropriate size for the arrays
52     2. Rescue the arrays which would be lost during garbage collection
53     3. Reallocate and correct administration
54     4. Move all arrays so that they are in proper order */
55 
56    PetscInt        i,j;
57    PetscInt        nrows = result->nrows;
58    PetscInt        n_alloc_ok=0;
59    PetscInt        n_alloc_ok_max=0;
60    PetscErrorCode  ierr;
61    PetscInt        need_already= 0;
62    PetscInt        n_rows_ahead=0;
63    PetscInt        max_need_extra= 0;
64    PetscInt        n_alloc_max, n_alloc_est, n_alloc;
65    PetscInt        n_alloc_now = result->n_alloc_icol;
66    PetscInt        *alloc_icol_old = result->alloc_icol;
67    PetscScalar     *alloc_val_old  = result->alloc_val;
68    PetscInt        *icol_rescue;
69    PetscScalar     *val_rescue;
70    PetscInt        n_rescue;
71    PetscInt        n_row_rescue;
72    PetscInt        i_here, i_last, n_copy;
73    const PetscReal xtra_perc = 20;
74 
75    PetscFunctionBegin;
76 
77    /*********************************************************
78     1. Choose appropriate array size
79     Count number of rows and memory usage which is already final */
80    for (i=0;i<i_row; i++)  {
81       n_alloc_ok += result->row_nnz[i];
82       n_alloc_ok_max += max_row_nnz[i];
83    }
84 
85    /* Count currently needed memory usage and future memory requirements
86       (max, predicted)*/
87    for (i=i_row; i<nrows; i++) {
88      if (!result->row_nnz[i]) {
89          max_need_extra  += max_row_nnz[i];
90       } else {
91          need_already    += max_row_nnz[i];
92          n_rows_ahead++;
93       }
94    }
95 
96    /* Make maximal and realistic memory requirement estimates */
97    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
98    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
99 
100    /* Choose array sizes */
101    if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
102    else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
103    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
104    else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
105 
106    /* If new estimate is less than what we already have,
107       don't reallocate, just garbage-collect */
108    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
109       n_alloc = result->n_alloc_icol;
110    }
111 
112    /* Motivate dimension choice */
113    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
114    if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
115    else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
116    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
117    else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
118 
119 
120    /**********************************************************
121     2. Rescue arrays which would be lost
122     Count how many rows and nonzeros will have to be rescued
123     when moving all arrays in place */
124    n_row_rescue = 0; n_rescue = 0;
125    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
126    else  {
127       i = *n_row_alloc_ok - 1;
128       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
129    }
130 
131    for (i=*n_row_alloc_ok; i<nrows; i++) {
132       i_here = result->icols[i]-result->alloc_icol;
133       i_last = i_here + result->row_nnz[i];
134       if (result->row_nnz[i]>0) {
135          if (*n_alloc_used > i_here || i_last > n_alloc) {
136             n_rescue += result->row_nnz[i];
137             n_row_rescue++;
138          }
139 
140          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
141          else         {  *n_alloc_used += max_row_nnz[i];}
142       }
143    }
144 
145    /* Allocate rescue arrays */
146    ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
147    ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
148 
149    /* Rescue the arrays which need rescuing */
150    n_row_rescue = 0; n_rescue = 0;
151    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
152    else {
153       i = *n_row_alloc_ok - 1;
154       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
155    }
156 
157    for (i=*n_row_alloc_ok; i<nrows; i++) {
158       i_here = result->icols[i]-result->alloc_icol;
159       i_last = i_here + result->row_nnz[i];
160       if (result->row_nnz[i]>0) {
161          if (*n_alloc_used > i_here || i_last > n_alloc) {
162             ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
163             ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
164             n_rescue += result->row_nnz[i];
165             n_row_rescue++;
166          }
167 
168          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
169          else         {  *n_alloc_used += max_row_nnz[i];}
170       }
171    }
172 
173    /**********************************************************
174     3. Reallocate and correct administration */
175 
176    if (n_alloc != result->n_alloc_icol)  {
177       n_copy = PetscMin(n_alloc,result->n_alloc_icol);
178 
179       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
180 
181 	 Allocate new icol-data, copy old contents */
182       ierr = PetscMalloc(  n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
183       ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
184 
185       /* Update administration, Reset pointers to new arrays  */
186       result->n_alloc_icol = n_alloc;
187       for (i=0; i<nrows; i++) {
188           result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
189           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
190       }
191 
192       /* Delete old array */
193       ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
194 
195       /* Allocate new value-data, copy old contents */
196       ierr = PetscMalloc(  n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
197       ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
198 
199       /* Update administration, Reset pointers to new arrays  */
200       result->n_alloc_val  = n_alloc;
201       for (i=0; i<nrows; i++) {
202           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
203       }
204 
205       /* Delete old array */
206       ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
207    }
208 
209 
210    /*********************************************************
211     4. Copy all the arrays to their proper places */
212    n_row_rescue = 0; n_rescue = 0;
213    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
214    else {
215       i = *n_row_alloc_ok - 1;
216       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
217    }
218 
219    for (i=*n_row_alloc_ok; i<nrows; i++) {
220       i_here = result->icols[i]-result->alloc_icol;
221       i_last = i_here + result->row_nnz[i];
222 
223       result->icols[i] = result->alloc_icol + *n_alloc_used;
224       result->values[i]= result->alloc_val  + *n_alloc_used;
225 
226       if (result->row_nnz[i]>0) {
227          if (*n_alloc_used > i_here || i_last > n_alloc)  {
228             ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
229             ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
230             n_rescue += result->row_nnz[i];
231             n_row_rescue++;
232          } else {
233             for (j=0; j<result->row_nnz[i]; j++) {
234                 result->icols[i][j]   = result->alloc_icol[i_here+j];
235                 result->values[i][j]  = result->alloc_val[ i_here+j];
236             }
237          }
238          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
239          else         {  *n_alloc_used += max_row_nnz[i];}
240       }
241    }
242 
243    /* Delete the rescue arrays */
244    ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
245    ierr = PetscFree(val_rescue);CHKERRQ(ierr);
246    *n_row_alloc_ok = i_row;
247    PetscFunctionReturn(0);
248 }
249 
250 /*
251   spbas_incomplete_cholesky:
252      incomplete Cholesky decomposition of a square, symmetric,
253      positive definite matrix.
254 
255      In case negative diagonals are encountered, function returns
256      NEGATIVE_DIAGONAL. When this happens, call this function again
257      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
258      droptol
259 */
260 #undef __FUNCT__
261 #define __FUNCT__ "spbas_incomplete_cholesky"
262 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
263 {
264    PetscInt         jL;
265    Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
266    PetscInt         *ai=a->i,*aj=a->j;
267    MatScalar        *aa=a->a;
268    PetscInt         nrows, ncols;
269    PetscInt         *max_row_nnz;
270    PetscErrorCode   ierr;
271    spbas_matrix     retval;
272    PetscScalar      * diag;
273    PetscScalar      * val;
274    PetscScalar      * lvec;
275    PetscScalar      epsdiag;
276    PetscInt         i,j,k;
277    const PetscTruth do_values = PETSC_TRUE;
278    PetscInt         * r1_icol;
279    PetscScalar      *r1_val;
280    PetscInt         * r_icol;
281    PetscInt         r_nnz;
282    PetscScalar      *r_val;
283    PetscInt         * A_icol;
284    PetscInt         A_nnz;
285    PetscScalar      *A_val;
286    PetscInt         * p_icol;
287    PetscInt         p_nnz;
288    PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
289    PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
290 
291    PetscFunctionBegin;
292    /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
293    ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
294    ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
295    epsdiag *= epsdiag_in / nrows;
296    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
297 
298    if (droptol<1e-10) {droptol=1e-10;}
299 
300    if ( (nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows) ) SETERRQ( PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
301 
302    retval.nrows = nrows;
303    retval.ncols = nrows;
304    retval.nnz   = pattern.nnz/10;
305    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
306    retval.block_data = PETSC_TRUE;
307 
308    ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
309    ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
310    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
311    retval.nnz   = 0;
312 
313    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
314    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
315    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
316    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
317    if (!(diag && val && lvec && max_row_nnz))  SETERRQ( PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
318 
319    ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
320    ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
321    ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
322 
323    /* Check correct format of sparseness pattern */
324    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ( PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
325 
326    /* Count the nonzeros on transpose of pattern */
327    for (i = 0; i<nrows; i++)  {
328       p_nnz  = pattern.row_nnz[i];
329       p_icol = pattern.icols[i];
330       for (j=0; j<p_nnz; j++)  {
331          max_row_nnz[i+p_icol[j]]++;
332       }
333    }
334 
335    /* Calculate rows of L */
336    for (i = 0; i<nrows; i++)  {
337       p_nnz  = pattern.row_nnz[i];
338       p_icol = pattern.icols[i];
339 
340       r_nnz  = retval.row_nnz[i];
341       r_icol = retval.icols[i];
342       r_val  = retval.values[i];
343       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
344       A_icol = &aj[ai[rip[i]]];
345       A_val  = &aa[ai[rip[i]]];
346 
347       /* Calculate  val += A(i,i:n)'; */
348       for (j=0; j<A_nnz; j++) {
349          k = riip[A_icol[j]];
350          if (k>=i) { val[k] = A_val[j]; }
351       }
352 
353       /*  Add regularization */
354       val[i] += epsdiag;
355 
356       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
357 	 val(i) = A(i,i) - L(0:i-1,i)' * lvec */
358       for ( j=0; j<r_nnz; j++)  {
359          k           = r_icol[j];
360          lvec[k]     = diag[k] * r_val[j];
361          val[i]     -= r_val[j] * lvec[k];
362       }
363 
364       /* Calculate the new diagonal */
365       diag[i] = val[i];
366       if (PetscRealPart(diag[i])<droptol)  {
367          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
368          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
369 
370          /* Delete the whole matrix at once. */
371          ierr = spbas_delete(retval);
372          return NEGATIVE_DIAGONAL;
373       }
374 
375       /* If necessary, allocate arrays */
376       if (r_nnz==0) {
377          ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
378          if (ierr == PETSC_ERR_MEM) {
379             ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
380             ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
381          }
382          r_icol = retval.icols[i];
383          r_val  = retval.values[i];
384       }
385 
386       /* Now, fill in */
387       r_icol[r_nnz] = i;
388       r_val [r_nnz] = 1.0;
389       r_nnz++;
390       retval.row_nnz[i]++;
391 
392       retval.nnz += r_nnz;
393 
394       /* Calculate
395          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
396       for (j=1; j<p_nnz; j++)  {
397          k = i+p_icol[j];
398          r1_icol = retval.icols[k];
399          r1_val  = retval.values[k];
400          for (jL=0; jL<retval.row_nnz[k]; jL++)   {
401             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
402          }
403          val[k] /= diag[i];
404 
405          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
406 	   /* If necessary, allocate arrays */
407             if (retval.row_nnz[k]==0) {
408                ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
409                if (ierr == PETSC_ERR_MEM) {
410                   ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
411                   ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
412                   r_icol = retval.icols[i];
413                   r_val  = retval.values[i];
414                }
415             }
416 
417             retval.icols[k][retval.row_nnz[k]] = i;
418             retval.values[k][retval.row_nnz[k]] = val[k];
419             retval.row_nnz[k]++;
420          }
421          val[k] = 0;
422       }
423 
424       /*Erase the values used in the work arrays */
425       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
426    }
427 
428    ierr=PetscFree(lvec);CHKERRQ(ierr);
429    ierr=PetscFree(val);CHKERRQ(ierr);
430 
431    ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
432    ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
433 
434    /* Place the inverse of the diagonals in the matrix */
435    for (i=0; i<nrows; i++) {
436       r_nnz = retval.row_nnz[i];
437       retval.values[i][r_nnz-1] = 1.0 / diag[i];
438       for (j=0; j<r_nnz-1; j++) {
439          retval.values[i][j] *= -1;
440       }
441    }
442    ierr=PetscFree(diag);CHKERRQ(ierr);
443    *matrix_L = retval;
444    PetscFunctionReturn(0);
445 }
446 
447