xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 7b2fcb5d6efa604aac62606659be4a62fc8a0438)
1 #pragma once
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
spbas_cholesky_row_alloc(spbas_matrix retval,PetscInt k,PetscInt r_nnz,PetscInt * n_alloc_used)8 static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
9 {
10   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
11   retval.values[k] = &retval.alloc_val[*n_alloc_used];
12   *n_alloc_used += r_nnz;
13   return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
14 }
15 
16 /*
17   spbas_cholesky_garbage_collect:
18      move the rows which have been calculated so far, as well as
19      those currently under construction, to the front of the
20      array, while putting them in the proper order.
21      When it seems necessary, enlarge the current arrays.
22 
23      NB: re-allocation is being simulated using
24          PetscMalloc, memcpy, PetscFree, because
25          PetscRealloc does not seem to exist.
26 
27 */
spbas_cholesky_garbage_collect(spbas_matrix * result,PetscInt i_row,PetscInt * n_row_alloc_ok,PetscInt * n_alloc_used,PetscInt * max_row_nnz)28 static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
29                                                                                     Only the storage, not the contents of this matrix is changed in this function */
30                                                      PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
31                                                      PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
32                                                                                     places in the arrays: they need not be moved any more */
33                                                      PetscInt     *n_alloc_used,   /* I/O:  */
34                                                      PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
35 {
36   /* PSEUDO-CODE:
37   1. Choose the appropriate size for the arrays
38   2. Rescue the arrays which would be lost during garbage collection
39   3. Reallocate and correct administration
40   4. Move all arrays so that they are in proper order */
41 
42   PetscInt        i, j;
43   PetscInt        nrows          = result->nrows;
44   PetscInt        n_alloc_ok     = 0;
45   PetscInt        n_alloc_ok_max = 0;
46   PetscInt        need_already   = 0;
47   PetscInt        max_need_extra = 0;
48   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
49   PetscInt        n_alloc_now    = result->n_alloc_icol;
50   PetscInt       *alloc_icol_old = result->alloc_icol;
51   PetscScalar    *alloc_val_old  = result->alloc_val;
52   PetscInt       *icol_rescue;
53   PetscScalar    *val_rescue;
54   PetscInt        n_rescue;
55   PetscInt        i_here, i_last, n_copy;
56   const PetscReal xtra_perc = 20;
57 
58   PetscFunctionBegin;
59   /*********************************************************
60   1. Choose appropriate array size
61   Count number of rows and memory usage which is already final */
62   for (i = 0; i < i_row; i++) {
63     n_alloc_ok += result->row_nnz[i];
64     n_alloc_ok_max += max_row_nnz[i];
65   }
66 
67   /* Count currently needed memory usage and future memory requirements
68     (max, predicted)*/
69   for (i = i_row; i < nrows; i++) {
70     if (!result->row_nnz[i]) {
71       max_need_extra += max_row_nnz[i];
72     } else {
73       need_already += max_row_nnz[i];
74     }
75   }
76 
77   /* Make maximal and realistic memory requirement estimates */
78   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
79   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
80 
81   /* Choose array sizes */
82   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
83   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
84   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
85   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
86 
87   /* If new estimate is less than what we already have,
88     don't reallocate, just garbage-collect */
89   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
90 
91   /* Motivate dimension choice */
92   PetscCall(PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */
93   if (n_alloc_max == n_alloc_est) {
94     PetscCall(PetscInfo(NULL, "this is the correct size\n"));
95   } else if (n_alloc_now >= n_alloc_est) {
96     PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
97   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
98     PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
99   } else {
100     PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
101   }
102 
103   /**********************************************************
104   2. Rescue arrays which would be lost
105   Count how many rows and nonzeros will have to be rescued
106   when moving all arrays in place */
107   n_rescue = 0;
108   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
109   else {
110     i = *n_row_alloc_ok - 1;
111 
112     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
113   }
114 
115   for (i = *n_row_alloc_ok; i < nrows; i++) {
116     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
117     i_last = i_here + result->row_nnz[i];
118     if (result->row_nnz[i] > 0) {
119       if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
120 
121       if (i < i_row) *n_alloc_used += result->row_nnz[i];
122       else *n_alloc_used += max_row_nnz[i];
123     }
124   }
125 
126   /* Allocate rescue arrays */
127   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
128   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
129 
130   /* Rescue the arrays which need rescuing */
131   n_rescue = 0;
132   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
133   else {
134     i = *n_row_alloc_ok - 1;
135     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
136   }
137 
138   for (i = *n_row_alloc_ok; i < nrows; i++) {
139     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
140     i_last = i_here + result->row_nnz[i];
141     if (result->row_nnz[i] > 0) {
142       if (*n_alloc_used > i_here || i_last > n_alloc) {
143         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
144         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
145         n_rescue += result->row_nnz[i];
146       }
147 
148       if (i < i_row) *n_alloc_used += result->row_nnz[i];
149       else *n_alloc_used += max_row_nnz[i];
150     }
151   }
152 
153   /**********************************************************
154   3. Reallocate and correct administration */
155 
156   if (n_alloc != result->n_alloc_icol) {
157     n_copy = PetscMin(n_alloc, result->n_alloc_icol);
158 
159     /* PETSc knows no REALLOC, so we'll REALLOC ourselves.
160 
161         Allocate new icol-data, copy old contents */
162     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
163     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
164 
165     /* Update administration, Reset pointers to new arrays  */
166     result->n_alloc_icol = n_alloc;
167     for (i = 0; i < nrows; i++) {
168       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
169       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
170     }
171 
172     /* Delete old array */
173     PetscCall(PetscFree(alloc_icol_old));
174 
175     /* Allocate new value-data, copy old contents */
176     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
177     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
178 
179     /* Update administration, Reset pointers to new arrays  */
180     result->n_alloc_val = n_alloc;
181     for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
182 
183     /* Delete old array */
184     PetscCall(PetscFree(alloc_val_old));
185   }
186 
187   /*********************************************************
188   4. Copy all the arrays to their proper places */
189   n_rescue = 0;
190   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
191   else {
192     i = *n_row_alloc_ok - 1;
193 
194     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
195   }
196 
197   for (i = *n_row_alloc_ok; i < nrows; i++) {
198     PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
199     i_last = i_here + result->row_nnz[i];
200 
201     result->icols[i]  = result->alloc_icol + *n_alloc_used;
202     result->values[i] = result->alloc_val + *n_alloc_used;
203 
204     if (result->row_nnz[i] > 0) {
205       if (*n_alloc_used > i_here || i_last > n_alloc) {
206         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
207         PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
208 
209         n_rescue += result->row_nnz[i];
210       } else {
211         for (j = 0; j < result->row_nnz[i]; j++) {
212           result->icols[i][j]  = result->alloc_icol[i_here + j];
213           result->values[i][j] = result->alloc_val[i_here + j];
214         }
215       }
216       if (i < i_row) *n_alloc_used += result->row_nnz[i];
217       else *n_alloc_used += max_row_nnz[i];
218     }
219   }
220 
221   /* Delete the rescue arrays */
222   PetscCall(PetscFree(icol_rescue));
223   PetscCall(PetscFree(val_rescue));
224 
225   *n_row_alloc_ok = i_row;
226   PetscFunctionReturn(PETSC_SUCCESS);
227 }
228 
229 /*
230   spbas_incomplete_cholesky:
231      incomplete Cholesky decomposition of a square, symmetric,
232      positive definite matrix.
233 
234      In case negative diagonals are encountered, function returns
235      NEGATIVE_DIAGONAL. When this happens, call this function again
236      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
237      droptol
238 */
spbas_incomplete_cholesky(Mat A,const PetscInt * rip,const PetscInt * riip,spbas_matrix pattern,PetscReal droptol,PetscReal epsdiag_in,spbas_matrix * matrix_L,PetscBool * success)239 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
240 {
241   PetscInt        jL;
242   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
243   PetscInt       *ai = a->i, *aj = a->j;
244   MatScalar      *aa = a->a;
245   PetscInt        nrows, ncols;
246   PetscInt       *max_row_nnz;
247   spbas_matrix    retval;
248   PetscScalar    *diag;
249   PetscScalar    *val;
250   PetscScalar    *lvec;
251   PetscScalar     epsdiag;
252   PetscInt        i, j, k;
253   const PetscBool do_values = PETSC_TRUE;
254   PetscInt       *r1_icol;
255   PetscScalar    *r1_val;
256   PetscInt       *r_icol;
257   PetscInt        r_nnz;
258   PetscScalar    *r_val;
259   PetscInt       *A_icol;
260   PetscInt        A_nnz;
261   PetscScalar    *A_val;
262   PetscInt       *p_icol;
263   PetscInt        p_nnz;
264   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
265   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */
266 
267   PetscFunctionBegin;
268   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
269   PetscCall(MatGetSize(A, &nrows, &ncols));
270   PetscCall(MatGetTrace(A, &epsdiag));
271 
272   epsdiag *= epsdiag_in / nrows;
273 
274   PetscCall(PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
275 
276   if (droptol < 1e-10) droptol = 1e-10;
277 
278   retval.nrows        = nrows;
279   retval.ncols        = nrows;
280   retval.nnz          = pattern.nnz / 10;
281   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
282   retval.block_data   = PETSC_TRUE;
283 
284   PetscCall(spbas_allocate_pattern(&retval, do_values));
285   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
286   PetscCall(spbas_allocate_data(&retval));
287   retval.nnz = 0;
288 
289   PetscCall(PetscMalloc1(nrows, &diag));
290   PetscCall(PetscCalloc1(nrows, &val));
291   PetscCall(PetscCalloc1(nrows, &lvec));
292   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
293 
294   /* Count the nonzeros on transpose of pattern */
295   for (i = 0; i < nrows; i++) {
296     p_nnz  = pattern.row_nnz[i];
297     p_icol = pattern.icols[i];
298     for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
299   }
300 
301   /* Calculate rows of L */
302   for (i = 0; i < nrows; i++) {
303     p_nnz  = pattern.row_nnz[i];
304     p_icol = pattern.icols[i];
305 
306     r_nnz  = retval.row_nnz[i];
307     r_icol = retval.icols[i];
308     r_val  = retval.values[i];
309     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
310     A_icol = &aj[ai[rip[i]]];
311     A_val  = &aa[ai[rip[i]]];
312 
313     /* Calculate  val += A(i,i:n)'; */
314     for (j = 0; j < A_nnz; j++) {
315       k = riip[A_icol[j]];
316       if (k >= i) val[k] = A_val[j];
317     }
318 
319     /*  Add regularization */
320     val[i] += epsdiag;
321 
322     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
323         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
324     for (j = 0; j < r_nnz; j++) {
325       k       = r_icol[j];
326       lvec[k] = diag[k] * r_val[j];
327       val[i] -= r_val[j] * lvec[k];
328     }
329 
330     /* Calculate the new diagonal */
331     diag[i] = val[i];
332     if (PetscRealPart(diag[i]) < droptol) {
333       PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
334       PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
335 
336       /* Delete the whole matrix at once. */
337       PetscCall(spbas_delete(retval));
338       *success = PETSC_FALSE;
339       PetscFunctionReturn(PETSC_SUCCESS);
340     }
341 
342     /* If necessary, allocate arrays */
343     if (r_nnz == 0) {
344       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
345       PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
346       r_icol = retval.icols[i];
347       r_val  = retval.values[i];
348     }
349 
350     /* Now, fill in */
351     r_icol[r_nnz] = i;
352     r_val[r_nnz]  = 1.0;
353     r_nnz++;
354     retval.row_nnz[i]++;
355 
356     retval.nnz += r_nnz;
357 
358     /* Calculate
359         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
360     for (j = 1; j < p_nnz; j++) {
361       k       = i + p_icol[j];
362       r1_icol = retval.icols[k];
363       r1_val  = retval.values[k];
364       for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
365       val[k] /= diag[i];
366 
367       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
368         /* If necessary, allocate arrays */
369         if (!retval.row_nnz[k]) {
370           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
371           if (!success) {
372             PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
373             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
374             PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
375             r_icol = retval.icols[i];
376           }
377         }
378 
379         retval.icols[k][retval.row_nnz[k]]  = i;
380         retval.values[k][retval.row_nnz[k]] = val[k];
381         retval.row_nnz[k]++;
382       }
383       val[k] = 0;
384     }
385 
386     /* Erase the values used in the work arrays */
387     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
388   }
389 
390   PetscCall(PetscFree(lvec));
391   PetscCall(PetscFree(val));
392 
393   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
394   PetscCall(PetscFree(max_row_nnz));
395 
396   /* Place the inverse of the diagonals in the matrix */
397   for (i = 0; i < nrows; i++) {
398     r_nnz = retval.row_nnz[i];
399 
400     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
401     for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
402   }
403   PetscCall(PetscFree(diag));
404   *matrix_L = retval;
405   *success  = PETSC_TRUE;
406   PetscFunctionReturn(PETSC_SUCCESS);
407 }
408