xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 608e5a7a8a35b40c4e75a0aae313cbdeb3df9fee)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15   PetscFunctionReturn(0);
16 }
17 
18 /*
19   spbas_cholesky_garbage_collect:
20      move the rows which have been calculated so far, as well as
21      those currently under construction, to the front of the
22      array, while putting them in the proper order.
23      When it seems necessary, enlarge the current arrays.
24 
25      NB: re-allocation is being simulated using
26          PetscMalloc, memcpy, PetscFree, because
27          PetscRealloc does not seem to exist.
28 
29 */
30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
31                                                                                     Only the storage, not the contents of this matrix is changed in this function */
32                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
33                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
34                                                                                     places in the arrays: they need not be moved any more */
35                                               PetscInt     *n_alloc_used,   /* I/O:  */
36                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
37 {
38 /* PSEUDO-CODE:
39   1. Choose the appropriate size for the arrays
40   2. Rescue the arrays which would be lost during garbage collection
41   3. Reallocate and correct administration
42   4. Move all arrays so that they are in proper order */
43 
44   PetscInt        i,j;
45   PetscInt        nrows         = result->nrows;
46   PetscInt        n_alloc_ok    =0;
47   PetscInt        n_alloc_ok_max=0;
48   PetscErrorCode  ierr;
49   PetscInt        need_already  = 0;
50   PetscInt        n_rows_ahead  =0;
51   PetscInt        max_need_extra= 0;
52   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
53   PetscInt        n_alloc_now     = result->n_alloc_icol;
54   PetscInt        *alloc_icol_old = result->alloc_icol;
55   PetscScalar     *alloc_val_old  = result->alloc_val;
56   PetscInt        *icol_rescue;
57   PetscScalar     *val_rescue;
58   PetscInt        n_rescue;
59   PetscInt        n_row_rescue;
60   PetscInt        i_here, i_last, n_copy;
61   const PetscReal xtra_perc = 20;
62 
63   PetscFunctionBegin;
64   /*********************************************************
65   1. Choose appropriate array size
66   Count number of rows and memory usage which is already final */
67   for (i=0; i<i_row; i++)  {
68     n_alloc_ok     += result->row_nnz[i];
69     n_alloc_ok_max += max_row_nnz[i];
70   }
71 
72   /* Count currently needed memory usage and future memory requirements
73     (max, predicted)*/
74   for (i=i_row; i<nrows; i++) {
75     if (!result->row_nnz[i]) {
76       max_need_extra += max_row_nnz[i];
77     } else {
78       need_already += max_row_nnz[i];
79       n_rows_ahead++;
80     }
81   }
82 
83   /* Make maximal and realistic memory requirement estimates */
84   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
85   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
86 
87   /* Choose array sizes */
88   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
89   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
90   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
91   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
92 
93   /* If new estimate is less than what we already have,
94     don't reallocate, just garbage-collect */
95   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
96     n_alloc = result->n_alloc_icol;
97   }
98 
99   /* Motivate dimension choice */
100   ierr = PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
101   if (n_alloc_max == n_alloc_est) {
102     ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr);
103   } else if (n_alloc_now >= n_alloc_est) {
104     ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr);
105   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
106     ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr);
107   } else {
108     ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr);
109   }
110 
111   /**********************************************************
112   2. Rescue arrays which would be lost
113   Count how many rows and nonzeros will have to be rescued
114   when moving all arrays in place */
115   n_row_rescue = 0; n_rescue = 0;
116   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
117   else {
118     i = *n_row_alloc_ok - 1;
119 
120     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
121   }
122 
123   for (i=*n_row_alloc_ok; i<nrows; i++) {
124     i_here = result->icols[i]-result->alloc_icol;
125     i_last = i_here + result->row_nnz[i];
126     if (result->row_nnz[i]>0) {
127       if (*n_alloc_used > i_here || i_last > n_alloc) {
128         n_rescue += result->row_nnz[i];
129         n_row_rescue++;
130       }
131 
132       if (i<i_row) *n_alloc_used += result->row_nnz[i];
133       else         *n_alloc_used += max_row_nnz[i];
134     }
135   }
136 
137   /* Allocate rescue arrays */
138   ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr);
139   ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr);
140 
141   /* Rescue the arrays which need rescuing */
142   n_row_rescue = 0; n_rescue = 0;
143   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
144   else {
145     i             = *n_row_alloc_ok - 1;
146     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
147   }
148 
149   for (i=*n_row_alloc_ok; i<nrows; i++) {
150     i_here = result->icols[i]-result->alloc_icol;
151     i_last = i_here + result->row_nnz[i];
152     if (result->row_nnz[i]>0) {
153       if (*n_alloc_used > i_here || i_last > n_alloc) {
154         ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr);
155         ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr);
156         n_rescue += result->row_nnz[i];
157         n_row_rescue++;
158       }
159 
160       if (i<i_row) *n_alloc_used += result->row_nnz[i];
161       else         *n_alloc_used += max_row_nnz[i];
162     }
163   }
164 
165   /**********************************************************
166   3. Reallocate and correct administration */
167 
168   if (n_alloc != result->n_alloc_icol) {
169     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
170 
171     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
172 
173         Allocate new icol-data, copy old contents */
174     ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr);
175     ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr);
176 
177     /* Update administration, Reset pointers to new arrays  */
178     result->n_alloc_icol = n_alloc;
179     for (i=0; i<nrows; i++) {
180       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
181       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
182     }
183 
184     /* Delete old array */
185     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
186 
187     /* Allocate new value-data, copy old contents */
188     ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr);
189     ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr);
190 
191     /* Update administration, Reset pointers to new arrays  */
192     result->n_alloc_val = n_alloc;
193     for (i=0; i<nrows; i++) {
194       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
195     }
196 
197     /* Delete old array */
198     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
199   }
200 
201   /*********************************************************
202   4. Copy all the arrays to their proper places */
203   n_row_rescue = 0; n_rescue = 0;
204   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
205   else {
206     i = *n_row_alloc_ok - 1;
207 
208     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
209   }
210 
211   for (i=*n_row_alloc_ok; i<nrows; i++) {
212     i_here = result->icols[i]-result->alloc_icol;
213     i_last = i_here + result->row_nnz[i];
214 
215     result->icols[i] = result->alloc_icol + *n_alloc_used;
216     result->values[i]= result->alloc_val  + *n_alloc_used;
217 
218     if (result->row_nnz[i]>0) {
219       if (*n_alloc_used > i_here || i_last > n_alloc) {
220         ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr);
221         ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr);
222 
223         n_rescue += result->row_nnz[i];
224         n_row_rescue++;
225       } else {
226         for (j=0; j<result->row_nnz[i]; j++) {
227           result->icols[i][j]  = result->alloc_icol[i_here+j];
228           result->values[i][j] = result->alloc_val[i_here+j];
229         }
230       }
231       if (i<i_row) *n_alloc_used += result->row_nnz[i];
232       else         *n_alloc_used += max_row_nnz[i];
233     }
234   }
235 
236   /* Delete the rescue arrays */
237   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
238   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
239 
240   *n_row_alloc_ok = i_row;
241   PetscFunctionReturn(0);
242 }
243 
244 /*
245   spbas_incomplete_cholesky:
246      incomplete Cholesky decomposition of a square, symmetric,
247      positive definite matrix.
248 
249      In case negative diagonals are encountered, function returns
250      NEGATIVE_DIAGONAL. When this happens, call this function again
251      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
252      droptol
253 */
254 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
255 {
256   PetscInt        jL;
257   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
258   PetscInt        *ai=a->i,*aj=a->j;
259   MatScalar       *aa=a->a;
260   PetscInt        nrows, ncols;
261   PetscInt        *max_row_nnz;
262   PetscErrorCode  ierr;
263   spbas_matrix    retval;
264   PetscScalar     *diag;
265   PetscScalar     *val;
266   PetscScalar     *lvec;
267   PetscScalar     epsdiag;
268   PetscInt        i,j,k;
269   const PetscBool do_values = PETSC_TRUE;
270   PetscInt        *r1_icol;
271   PetscScalar     *r1_val;
272   PetscInt        *r_icol;
273   PetscInt        r_nnz;
274   PetscScalar     *r_val;
275   PetscInt        *A_icol;
276   PetscInt        A_nnz;
277   PetscScalar     *A_val;
278   PetscInt        *p_icol;
279   PetscInt        p_nnz;
280   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
281   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
282 
283   PetscFunctionBegin;
284   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
285   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
286   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
287 
288   epsdiag *= epsdiag_in / nrows;
289 
290   ierr = PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr);
291 
292   if (droptol<1e-10) droptol=1e-10;
293 
294   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
295 
296   retval.nrows        = nrows;
297   retval.ncols        = nrows;
298   retval.nnz          = pattern.nnz/10;
299   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
300   retval.block_data   = PETSC_TRUE;
301 
302   ierr       = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
303   ierr       = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr);
304   ierr       = spbas_allocate_data(&retval);CHKERRQ(ierr);
305   retval.nnz = 0;
306 
307   ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr);
308   ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr);
309   ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr);
310   ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr);
311 
312   /* Check correct format of sparseness pattern */
313   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
314 
315   /* Count the nonzeros on transpose of pattern */
316   for (i = 0; i<nrows; i++)  {
317     p_nnz  = pattern.row_nnz[i];
318     p_icol = pattern.icols[i];
319     for (j=0; j<p_nnz; j++)  {
320       max_row_nnz[i+p_icol[j]]++;
321     }
322   }
323 
324   /* Calculate rows of L */
325   for (i = 0; i<nrows; i++)  {
326     p_nnz  = pattern.row_nnz[i];
327     p_icol = pattern.icols[i];
328 
329     r_nnz  = retval.row_nnz[i];
330     r_icol = retval.icols[i];
331     r_val  = retval.values[i];
332     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
333     A_icol = &aj[ai[rip[i]]];
334     A_val  = &aa[ai[rip[i]]];
335 
336     /* Calculate  val += A(i,i:n)'; */
337     for (j=0; j<A_nnz; j++) {
338       k = riip[A_icol[j]];
339       if (k>=i) val[k] = A_val[j];
340     }
341 
342     /*  Add regularization */
343     val[i] += epsdiag;
344 
345     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
346         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
347     for (j=0; j<r_nnz; j++)  {
348       k       = r_icol[j];
349       lvec[k] = diag[k] * r_val[j];
350       val[i] -= r_val[j] * lvec[k];
351     }
352 
353     /* Calculate the new diagonal */
354     diag[i] = val[i];
355     if (PetscRealPart(diag[i])<droptol) {
356       ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr);
357       ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr);
358 
359       /* Delete the whole matrix at once. */
360       ierr = spbas_delete(retval);CHKERRQ(ierr);
361       return NEGATIVE_DIAGONAL;
362     }
363 
364     /* If necessary, allocate arrays */
365     if (r_nnz==0) {
366       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
367       if (ierr == PETSC_ERR_MEM) {
368         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
369         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
370       } else CHKERRQ(ierr);
371       r_icol = retval.icols[i];
372       r_val  = retval.values[i];
373     }
374 
375     /* Now, fill in */
376     r_icol[r_nnz] = i;
377     r_val [r_nnz] = 1.0;
378     r_nnz++;
379     retval.row_nnz[i]++;
380 
381     retval.nnz += r_nnz;
382 
383     /* Calculate
384         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
385     for (j=1; j<p_nnz; j++)  {
386       k       = i+p_icol[j];
387       r1_icol = retval.icols[k];
388       r1_val  = retval.values[k];
389       for (jL=0; jL<retval.row_nnz[k]; jL++) {
390         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
391       }
392       val[k] /= diag[i];
393 
394       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
395         /* If necessary, allocate arrays */
396         if (retval.row_nnz[k]==0) {
397           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
398           if (ierr == PETSC_ERR_MEM) {
399             ierr   = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
400             ierr   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
401             r_icol = retval.icols[i];
402           } else CHKERRQ(ierr);
403         }
404 
405         retval.icols[k][retval.row_nnz[k]]  = i;
406         retval.values[k][retval.row_nnz[k]] = val[k];
407         retval.row_nnz[k]++;
408       }
409       val[k] = 0;
410     }
411 
412     /* Erase the values used in the work arrays */
413     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
414   }
415 
416   ierr = PetscFree(lvec);CHKERRQ(ierr);
417   ierr = PetscFree(val);CHKERRQ(ierr);
418 
419   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
420   ierr = PetscFree(max_row_nnz);CHKERRQ(ierr);
421 
422   /* Place the inverse of the diagonals in the matrix */
423   for (i=0; i<nrows; i++) {
424     r_nnz = retval.row_nnz[i];
425 
426     retval.values[i][r_nnz-1] = 1.0 / diag[i];
427     for (j=0; j<r_nnz-1; j++) {
428       retval.values[i][j] *= -1;
429     }
430   }
431   ierr      = PetscFree(diag);CHKERRQ(ierr);
432   *matrix_L = retval;
433   PetscFunctionReturn(0);
434 }
435