xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE);
15   PetscFunctionReturn(PETSC_TRUE);
16 }
17 
18 /*
19   spbas_cholesky_garbage_collect:
20      move the rows which have been calculated so far, as well as
21      those currently under construction, to the front of the
22      array, while putting them in the proper order.
23      When it seems necessary, enlarge the current arrays.
24 
25      NB: re-allocation is being simulated using
26          PetscMalloc, memcpy, PetscFree, because
27          PetscRealloc does not seem to exist.
28 
29 */
30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
31                                                                                     Only the storage, not the contents of this matrix is changed in this function */
32                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
33                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
34                                                                                     places in the arrays: they need not be moved any more */
35                                               PetscInt     *n_alloc_used,   /* I/O:  */
36                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
37 {
38 /* PSEUDO-CODE:
39   1. Choose the appropriate size for the arrays
40   2. Rescue the arrays which would be lost during garbage collection
41   3. Reallocate and correct administration
42   4. Move all arrays so that they are in proper order */
43 
44   PetscInt        i,j;
45   PetscInt        nrows         = result->nrows;
46   PetscInt        n_alloc_ok    =0;
47   PetscInt        n_alloc_ok_max=0;
48   PetscErrorCode  ierr;
49   PetscInt        need_already  = 0;
50   PetscInt        n_rows_ahead  =0;
51   PetscInt        max_need_extra= 0;
52   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
53   PetscInt        n_alloc_now     = result->n_alloc_icol;
54   PetscInt        *alloc_icol_old = result->alloc_icol;
55   PetscScalar     *alloc_val_old  = result->alloc_val;
56   PetscInt        *icol_rescue;
57   PetscScalar     *val_rescue;
58   PetscInt        n_rescue;
59   PetscInt        n_row_rescue;
60   PetscInt        i_here, i_last, n_copy;
61   const PetscReal xtra_perc = 20;
62 
63   PetscFunctionBegin;
64   /*********************************************************
65   1. Choose appropriate array size
66   Count number of rows and memory usage which is already final */
67   for (i=0; i<i_row; i++)  {
68     n_alloc_ok     += result->row_nnz[i];
69     n_alloc_ok_max += max_row_nnz[i];
70   }
71 
72   /* Count currently needed memory usage and future memory requirements
73     (max, predicted)*/
74   for (i=i_row; i<nrows; i++) {
75     if (!result->row_nnz[i]) {
76       max_need_extra += max_row_nnz[i];
77     } else {
78       need_already += max_row_nnz[i];
79       n_rows_ahead++;
80     }
81   }
82 
83   /* Make maximal and realistic memory requirement estimates */
84   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
85   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
86 
87   /* Choose array sizes */
88   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
89   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
90   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
91   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
92 
93   /* If new estimate is less than what we already have,
94     don't reallocate, just garbage-collect */
95   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
96     n_alloc = result->n_alloc_icol;
97   }
98 
99   /* Motivate dimension choice */
100   ierr = PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
101   if (n_alloc_max == n_alloc_est) {
102     ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr);
103   } else if (n_alloc_now >= n_alloc_est) {
104     ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr);
105   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
106     ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr);
107   } else {
108     ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr);
109   }
110 
111   /**********************************************************
112   2. Rescue arrays which would be lost
113   Count how many rows and nonzeros will have to be rescued
114   when moving all arrays in place */
115   n_row_rescue = 0; n_rescue = 0;
116   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
117   else {
118     i = *n_row_alloc_ok - 1;
119 
120     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
121   }
122 
123   for (i=*n_row_alloc_ok; i<nrows; i++) {
124     i_here = result->icols[i]-result->alloc_icol;
125     i_last = i_here + result->row_nnz[i];
126     if (result->row_nnz[i]>0) {
127       if (*n_alloc_used > i_here || i_last > n_alloc) {
128         n_rescue += result->row_nnz[i];
129         n_row_rescue++;
130       }
131 
132       if (i<i_row) *n_alloc_used += result->row_nnz[i];
133       else         *n_alloc_used += max_row_nnz[i];
134     }
135   }
136 
137   /* Allocate rescue arrays */
138   ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr);
139   ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr);
140 
141   /* Rescue the arrays which need rescuing */
142   n_row_rescue = 0; n_rescue = 0;
143   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
144   else {
145     i             = *n_row_alloc_ok - 1;
146     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
147   }
148 
149   for (i=*n_row_alloc_ok; i<nrows; i++) {
150     i_here = result->icols[i]-result->alloc_icol;
151     i_last = i_here + result->row_nnz[i];
152     if (result->row_nnz[i]>0) {
153       if (*n_alloc_used > i_here || i_last > n_alloc) {
154         ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr);
155         ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr);
156         n_rescue += result->row_nnz[i];
157         n_row_rescue++;
158       }
159 
160       if (i<i_row) *n_alloc_used += result->row_nnz[i];
161       else         *n_alloc_used += max_row_nnz[i];
162     }
163   }
164 
165   /**********************************************************
166   3. Reallocate and correct administration */
167 
168   if (n_alloc != result->n_alloc_icol) {
169     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
170 
171     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
172 
173         Allocate new icol-data, copy old contents */
174     ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr);
175     ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr);
176 
177     /* Update administration, Reset pointers to new arrays  */
178     result->n_alloc_icol = n_alloc;
179     for (i=0; i<nrows; i++) {
180       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
181       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
182     }
183 
184     /* Delete old array */
185     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
186 
187     /* Allocate new value-data, copy old contents */
188     ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr);
189     ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr);
190 
191     /* Update administration, Reset pointers to new arrays  */
192     result->n_alloc_val = n_alloc;
193     for (i=0; i<nrows; i++) {
194       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
195     }
196 
197     /* Delete old array */
198     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
199   }
200 
201   /*********************************************************
202   4. Copy all the arrays to their proper places */
203   n_row_rescue = 0; n_rescue = 0;
204   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
205   else {
206     i = *n_row_alloc_ok - 1;
207 
208     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
209   }
210 
211   for (i=*n_row_alloc_ok; i<nrows; i++) {
212     i_here = result->icols[i]-result->alloc_icol;
213     i_last = i_here + result->row_nnz[i];
214 
215     result->icols[i] = result->alloc_icol + *n_alloc_used;
216     result->values[i]= result->alloc_val  + *n_alloc_used;
217 
218     if (result->row_nnz[i]>0) {
219       if (*n_alloc_used > i_here || i_last > n_alloc) {
220         ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr);
221         ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr);
222 
223         n_rescue += result->row_nnz[i];
224         n_row_rescue++;
225       } else {
226         for (j=0; j<result->row_nnz[i]; j++) {
227           result->icols[i][j]  = result->alloc_icol[i_here+j];
228           result->values[i][j] = result->alloc_val[i_here+j];
229         }
230       }
231       if (i<i_row) *n_alloc_used += result->row_nnz[i];
232       else         *n_alloc_used += max_row_nnz[i];
233     }
234   }
235 
236   /* Delete the rescue arrays */
237   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
238   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
239 
240   *n_row_alloc_ok = i_row;
241   PetscFunctionReturn(0);
242 }
243 
244 /*
245   spbas_incomplete_cholesky:
246      incomplete Cholesky decomposition of a square, symmetric,
247      positive definite matrix.
248 
249      In case negative diagonals are encountered, function returns
250      NEGATIVE_DIAGONAL. When this happens, call this function again
251      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
252      droptol
253 */
254 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success)
255 {
256   PetscInt        jL;
257   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
258   PetscInt        *ai=a->i,*aj=a->j;
259   MatScalar       *aa=a->a;
260   PetscInt        nrows, ncols;
261   PetscInt        *max_row_nnz;
262   PetscErrorCode  ierr;
263   spbas_matrix    retval;
264   PetscScalar     *diag;
265   PetscScalar     *val;
266   PetscScalar     *lvec;
267   PetscScalar     epsdiag;
268   PetscInt        i,j,k;
269   const PetscBool do_values = PETSC_TRUE;
270   PetscInt        *r1_icol;
271   PetscScalar     *r1_val;
272   PetscInt        *r_icol;
273   PetscInt        r_nnz;
274   PetscScalar     *r_val;
275   PetscInt        *A_icol;
276   PetscInt        A_nnz;
277   PetscScalar     *A_val;
278   PetscInt        *p_icol;
279   PetscInt        p_nnz;
280   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
281   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
282 
283   PetscFunctionBegin;
284   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
285   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
286   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
287 
288   epsdiag *= epsdiag_in / nrows;
289 
290   ierr = PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr);
291 
292   if (droptol<1e-10) droptol=1e-10;
293 
294   retval.nrows        = nrows;
295   retval.ncols        = nrows;
296   retval.nnz          = pattern.nnz/10;
297   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
298   retval.block_data   = PETSC_TRUE;
299 
300   ierr       = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
301   ierr       = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr);
302   ierr       = spbas_allocate_data(&retval);CHKERRQ(ierr);
303   retval.nnz = 0;
304 
305   ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr);
306   ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr);
307   ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr);
308   ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr);
309 
310   /* Count the nonzeros on transpose of pattern */
311   for (i = 0; i<nrows; i++)  {
312     p_nnz  = pattern.row_nnz[i];
313     p_icol = pattern.icols[i];
314     for (j=0; j<p_nnz; j++)  {
315       max_row_nnz[i+p_icol[j]]++;
316     }
317   }
318 
319   /* Calculate rows of L */
320   for (i = 0; i<nrows; i++)  {
321     p_nnz  = pattern.row_nnz[i];
322     p_icol = pattern.icols[i];
323 
324     r_nnz  = retval.row_nnz[i];
325     r_icol = retval.icols[i];
326     r_val  = retval.values[i];
327     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
328     A_icol = &aj[ai[rip[i]]];
329     A_val  = &aa[ai[rip[i]]];
330 
331     /* Calculate  val += A(i,i:n)'; */
332     for (j=0; j<A_nnz; j++) {
333       k = riip[A_icol[j]];
334       if (k>=i) val[k] = A_val[j];
335     }
336 
337     /*  Add regularization */
338     val[i] += epsdiag;
339 
340     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
341         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
342     for (j=0; j<r_nnz; j++)  {
343       k       = r_icol[j];
344       lvec[k] = diag[k] * r_val[j];
345       val[i] -= r_val[j] * lvec[k];
346     }
347 
348     /* Calculate the new diagonal */
349     diag[i] = val[i];
350     if (PetscRealPart(diag[i])<droptol) {
351       ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr);
352       ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr);
353 
354       /* Delete the whole matrix at once. */
355       ierr = spbas_delete(retval);CHKERRQ(ierr);
356       *success = PETSC_FALSE;
357       PetscFunctionReturn(0);
358     }
359 
360     /* If necessary, allocate arrays */
361     if (r_nnz==0) {
362       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
363       if (!success) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed");
364       r_icol = retval.icols[i];
365       r_val  = retval.values[i];
366     }
367 
368     /* Now, fill in */
369     r_icol[r_nnz] = i;
370     r_val [r_nnz] = 1.0;
371     r_nnz++;
372     retval.row_nnz[i]++;
373 
374     retval.nnz += r_nnz;
375 
376     /* Calculate
377         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
378     for (j=1; j<p_nnz; j++)  {
379       k       = i+p_icol[j];
380       r1_icol = retval.icols[k];
381       r1_val  = retval.values[k];
382       for (jL=0; jL<retval.row_nnz[k]; jL++) {
383         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
384       }
385       val[k] /= diag[i];
386 
387       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
388         /* If necessary, allocate arrays */
389         if (!retval.row_nnz[k]) {
390           PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
391           if (!success) {
392             ierr   = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
393             flag   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
394             if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed");
395             r_icol = retval.icols[i];
396           }
397         }
398 
399         retval.icols[k][retval.row_nnz[k]]  = i;
400         retval.values[k][retval.row_nnz[k]] = val[k];
401         retval.row_nnz[k]++;
402       }
403       val[k] = 0;
404     }
405 
406     /* Erase the values used in the work arrays */
407     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
408   }
409 
410   ierr = PetscFree(lvec);CHKERRQ(ierr);
411   ierr = PetscFree(val);CHKERRQ(ierr);
412 
413   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
414   ierr = PetscFree(max_row_nnz);CHKERRQ(ierr);
415 
416   /* Place the inverse of the diagonals in the matrix */
417   for (i=0; i<nrows; i++) {
418     r_nnz = retval.row_nnz[i];
419 
420     retval.values[i][r_nnz-1] = 1.0 / diag[i];
421     for (j=0; j<r_nnz-1; j++) {
422       retval.values[i][j] *= -1;
423     }
424   }
425   ierr      = PetscFree(diag);CHKERRQ(ierr);
426   *matrix_L = retval;
427   *success  = PETSC_TRUE;
428   PetscFunctionReturn(0);
429 }
430