xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) {
9   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
10   retval.values[k] = &retval.alloc_val[*n_alloc_used];
11   *n_alloc_used += r_nnz;
12   return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
13 }
14 
15 /*
16   spbas_cholesky_garbage_collect:
17      move the rows which have been calculated so far, as well as
18      those currently under construction, to the front of the
19      array, while putting them in the proper order.
20      When it seems necessary, enlarge the current arrays.
21 
22      NB: re-allocation is being simulated using
23          PetscMalloc, memcpy, PetscFree, because
24          PetscRealloc does not seem to exist.
25 
26 */
27 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
28                                                                                     Only the storage, not the contents of this matrix is changed in this function */
29                                               PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
30                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
31                                                                                     places in the arrays: they need not be moved any more */
32                                               PetscInt     *n_alloc_used,   /* I/O:  */
33                                               PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
34 {
35   /* PSEUDO-CODE:
36   1. Choose the appropriate size for the arrays
37   2. Rescue the arrays which would be lost during garbage collection
38   3. Reallocate and correct administration
39   4. Move all arrays so that they are in proper order */
40 
41   PetscInt        i, j;
42   PetscInt        nrows          = result->nrows;
43   PetscInt        n_alloc_ok     = 0;
44   PetscInt        n_alloc_ok_max = 0;
45   PetscInt        need_already   = 0;
46   PetscInt        max_need_extra = 0;
47   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
48   PetscInt        n_alloc_now    = result->n_alloc_icol;
49   PetscInt       *alloc_icol_old = result->alloc_icol;
50   PetscScalar    *alloc_val_old  = result->alloc_val;
51   PetscInt       *icol_rescue;
52   PetscScalar    *val_rescue;
53   PetscInt        n_rescue;
54   PetscInt        i_here, i_last, n_copy;
55   const PetscReal xtra_perc = 20;
56 
57   PetscFunctionBegin;
58   /*********************************************************
59   1. Choose appropriate array size
60   Count number of rows and memory usage which is already final */
61   for (i = 0; i < i_row; i++) {
62     n_alloc_ok += result->row_nnz[i];
63     n_alloc_ok_max += max_row_nnz[i];
64   }
65 
66   /* Count currently needed memory usage and future memory requirements
67     (max, predicted)*/
68   for (i = i_row; i < nrows; i++) {
69     if (!result->row_nnz[i]) {
70       max_need_extra += max_row_nnz[i];
71     } else {
72       need_already += max_row_nnz[i];
73     }
74   }
75 
76   /* Make maximal and realistic memory requirement estimates */
77   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
78   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
79 
80   /* Choose array sizes */
81   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
82   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
83   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
84   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
85 
86   /* If new estimate is less than what we already have,
87     don't reallocate, just garbage-collect */
88   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
89 
90   /* Motivate dimension choice */
91   PetscCall(PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc));
92   if (n_alloc_max == n_alloc_est) {
93     PetscCall(PetscInfo(NULL, "this is the correct size\n"));
94   } else if (n_alloc_now >= n_alloc_est) {
95     PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
96   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
97     PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
98   } else {
99     PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
100   }
101 
102   /**********************************************************
103   2. Rescue arrays which would be lost
104   Count how many rows and nonzeros will have to be rescued
105   when moving all arrays in place */
106   n_rescue = 0;
107   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
108   else {
109     i = *n_row_alloc_ok - 1;
110 
111     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
112   }
113 
114   for (i = *n_row_alloc_ok; i < nrows; i++) {
115     i_here = result->icols[i] - result->alloc_icol;
116     i_last = i_here + result->row_nnz[i];
117     if (result->row_nnz[i] > 0) {
118       if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
119 
120       if (i < i_row) *n_alloc_used += result->row_nnz[i];
121       else *n_alloc_used += max_row_nnz[i];
122     }
123   }
124 
125   /* Allocate rescue arrays */
126   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
127   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
128 
129   /* Rescue the arrays which need rescuing */
130   n_rescue = 0;
131   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
132   else {
133     i             = *n_row_alloc_ok - 1;
134     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
135   }
136 
137   for (i = *n_row_alloc_ok; i < nrows; i++) {
138     i_here = result->icols[i] - result->alloc_icol;
139     i_last = i_here + result->row_nnz[i];
140     if (result->row_nnz[i] > 0) {
141       if (*n_alloc_used > i_here || i_last > n_alloc) {
142         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
143         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
144         n_rescue += result->row_nnz[i];
145       }
146 
147       if (i < i_row) *n_alloc_used += result->row_nnz[i];
148       else *n_alloc_used += max_row_nnz[i];
149     }
150   }
151 
152   /**********************************************************
153   3. Reallocate and correct administration */
154 
155   if (n_alloc != result->n_alloc_icol) {
156     n_copy = PetscMin(n_alloc, result->n_alloc_icol);
157 
158     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
159 
160         Allocate new icol-data, copy old contents */
161     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
162     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
163 
164     /* Update administration, Reset pointers to new arrays  */
165     result->n_alloc_icol = n_alloc;
166     for (i = 0; i < nrows; i++) {
167       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
168       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
169     }
170 
171     /* Delete old array */
172     PetscCall(PetscFree(alloc_icol_old));
173 
174     /* Allocate new value-data, copy old contents */
175     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
176     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
177 
178     /* Update administration, Reset pointers to new arrays  */
179     result->n_alloc_val = n_alloc;
180     for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
181 
182     /* Delete old array */
183     PetscCall(PetscFree(alloc_val_old));
184   }
185 
186   /*********************************************************
187   4. Copy all the arrays to their proper places */
188   n_rescue = 0;
189   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
190   else {
191     i = *n_row_alloc_ok - 1;
192 
193     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
194   }
195 
196   for (i = *n_row_alloc_ok; i < nrows; i++) {
197     i_here = result->icols[i] - result->alloc_icol;
198     i_last = i_here + result->row_nnz[i];
199 
200     result->icols[i]  = result->alloc_icol + *n_alloc_used;
201     result->values[i] = result->alloc_val + *n_alloc_used;
202 
203     if (result->row_nnz[i] > 0) {
204       if (*n_alloc_used > i_here || i_last > n_alloc) {
205         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
206         PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
207 
208         n_rescue += result->row_nnz[i];
209       } else {
210         for (j = 0; j < result->row_nnz[i]; j++) {
211           result->icols[i][j]  = result->alloc_icol[i_here + j];
212           result->values[i][j] = result->alloc_val[i_here + j];
213         }
214       }
215       if (i < i_row) *n_alloc_used += result->row_nnz[i];
216       else *n_alloc_used += max_row_nnz[i];
217     }
218   }
219 
220   /* Delete the rescue arrays */
221   PetscCall(PetscFree(icol_rescue));
222   PetscCall(PetscFree(val_rescue));
223 
224   *n_row_alloc_ok = i_row;
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229   spbas_incomplete_cholesky:
230      incomplete Cholesky decomposition of a square, symmetric,
231      positive definite matrix.
232 
233      In case negative diagonals are encountered, function returns
234      NEGATIVE_DIAGONAL. When this happens, call this function again
235      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
236      droptol
237 */
238 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) {
239   PetscInt        jL;
240   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
241   PetscInt       *ai = a->i, *aj = a->j;
242   MatScalar      *aa = a->a;
243   PetscInt        nrows, ncols;
244   PetscInt       *max_row_nnz;
245   spbas_matrix    retval;
246   PetscScalar    *diag;
247   PetscScalar    *val;
248   PetscScalar    *lvec;
249   PetscScalar     epsdiag;
250   PetscInt        i, j, k;
251   const PetscBool do_values = PETSC_TRUE;
252   PetscInt       *r1_icol;
253   PetscScalar    *r1_val;
254   PetscInt       *r_icol;
255   PetscInt        r_nnz;
256   PetscScalar    *r_val;
257   PetscInt       *A_icol;
258   PetscInt        A_nnz;
259   PetscScalar    *A_val;
260   PetscInt       *p_icol;
261   PetscInt        p_nnz;
262   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
263   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */
264 
265   PetscFunctionBegin;
266   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
267   PetscCall(MatGetSize(A, &nrows, &ncols));
268   PetscCall(MatGetTrace(A, &epsdiag));
269 
270   epsdiag *= epsdiag_in / nrows;
271 
272   PetscCall(PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
273 
274   if (droptol < 1e-10) droptol = 1e-10;
275 
276   retval.nrows        = nrows;
277   retval.ncols        = nrows;
278   retval.nnz          = pattern.nnz / 10;
279   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
280   retval.block_data   = PETSC_TRUE;
281 
282   PetscCall(spbas_allocate_pattern(&retval, do_values));
283   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
284   PetscCall(spbas_allocate_data(&retval));
285   retval.nnz = 0;
286 
287   PetscCall(PetscMalloc1(nrows, &diag));
288   PetscCall(PetscCalloc1(nrows, &val));
289   PetscCall(PetscCalloc1(nrows, &lvec));
290   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
291 
292   /* Count the nonzeros on transpose of pattern */
293   for (i = 0; i < nrows; i++) {
294     p_nnz  = pattern.row_nnz[i];
295     p_icol = pattern.icols[i];
296     for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
297   }
298 
299   /* Calculate rows of L */
300   for (i = 0; i < nrows; i++) {
301     p_nnz  = pattern.row_nnz[i];
302     p_icol = pattern.icols[i];
303 
304     r_nnz  = retval.row_nnz[i];
305     r_icol = retval.icols[i];
306     r_val  = retval.values[i];
307     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
308     A_icol = &aj[ai[rip[i]]];
309     A_val  = &aa[ai[rip[i]]];
310 
311     /* Calculate  val += A(i,i:n)'; */
312     for (j = 0; j < A_nnz; j++) {
313       k = riip[A_icol[j]];
314       if (k >= i) val[k] = A_val[j];
315     }
316 
317     /*  Add regularization */
318     val[i] += epsdiag;
319 
320     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
321         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
322     for (j = 0; j < r_nnz; j++) {
323       k       = r_icol[j];
324       lvec[k] = diag[k] * r_val[j];
325       val[i] -= r_val[j] * lvec[k];
326     }
327 
328     /* Calculate the new diagonal */
329     diag[i] = val[i];
330     if (PetscRealPart(diag[i]) < droptol) {
331       PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
332       PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
333 
334       /* Delete the whole matrix at once. */
335       PetscCall(spbas_delete(retval));
336       *success = PETSC_FALSE;
337       PetscFunctionReturn(0);
338     }
339 
340     /* If necessary, allocate arrays */
341     if (r_nnz == 0) {
342       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
343       PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
344       r_icol = retval.icols[i];
345       r_val  = retval.values[i];
346     }
347 
348     /* Now, fill in */
349     r_icol[r_nnz] = i;
350     r_val[r_nnz]  = 1.0;
351     r_nnz++;
352     retval.row_nnz[i]++;
353 
354     retval.nnz += r_nnz;
355 
356     /* Calculate
357         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
358     for (j = 1; j < p_nnz; j++) {
359       k       = i + p_icol[j];
360       r1_icol = retval.icols[k];
361       r1_val  = retval.values[k];
362       for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
363       val[k] /= diag[i];
364 
365       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
366         /* If necessary, allocate arrays */
367         if (!retval.row_nnz[k]) {
368           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
369           if (!success) {
370             PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
371             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
372             PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
373             r_icol = retval.icols[i];
374           }
375         }
376 
377         retval.icols[k][retval.row_nnz[k]]  = i;
378         retval.values[k][retval.row_nnz[k]] = val[k];
379         retval.row_nnz[k]++;
380       }
381       val[k] = 0;
382     }
383 
384     /* Erase the values used in the work arrays */
385     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
386   }
387 
388   PetscCall(PetscFree(lvec));
389   PetscCall(PetscFree(val));
390 
391   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
392   PetscCall(PetscFree(max_row_nnz));
393 
394   /* Place the inverse of the diagonals in the matrix */
395   for (i = 0; i < nrows; i++) {
396     r_nnz = retval.row_nnz[i];
397 
398     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
399     for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
400   }
401   PetscCall(PetscFree(diag));
402   *matrix_L = retval;
403   *success  = PETSC_TRUE;
404   PetscFunctionReturn(0);
405 }
406