xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc) !
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) {
9   PetscFunctionBegin;
10   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
11   retval.values[k] = &retval.alloc_val[*n_alloc_used];
12   *n_alloc_used += r_nnz;
13   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE);
14   PetscFunctionReturn(PETSC_TRUE);
15 }
16 
17 /*
18   spbas_cholesky_garbage_collect:
19      move the rows which have been calculated so far, as well as
20      those currently under construction, to the front of the
21      array, while putting them in the proper order.
22      When it seems necessary, enlarge the current arrays.
23 
24      NB: re-allocation is being simulated using
25          PetscMalloc, memcpy, PetscFree, because
26          PetscRealloc does not seem to exist.
27 
28 */
29 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
30                                                                                     Only the storage, not the contents of this matrix is changed in this function */
31                                               PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
32                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
33                                                                                     places in the arrays: they need not be moved any more */
34                                               PetscInt     *n_alloc_used,   /* I/O:  */
35                                               PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
36 {
37   /* PSEUDO-CODE:
38   1. Choose the appropriate size for the arrays
39   2. Rescue the arrays which would be lost during garbage collection
40   3. Reallocate and correct administration
41   4. Move all arrays so that they are in proper order */
42 
43   PetscInt        i, j;
44   PetscInt        nrows          = result->nrows;
45   PetscInt        n_alloc_ok     = 0;
46   PetscInt        n_alloc_ok_max = 0;
47   PetscInt        need_already   = 0;
48   PetscInt        max_need_extra = 0;
49   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
50   PetscInt        n_alloc_now    = result->n_alloc_icol;
51   PetscInt       *alloc_icol_old = result->alloc_icol;
52   PetscScalar    *alloc_val_old  = result->alloc_val;
53   PetscInt       *icol_rescue;
54   PetscScalar    *val_rescue;
55   PetscInt        n_rescue;
56   PetscInt        i_here, i_last, n_copy;
57   const PetscReal xtra_perc = 20;
58 
59   PetscFunctionBegin;
60   /*********************************************************
61   1. Choose appropriate array size
62   Count number of rows and memory usage which is already final */
63   for (i = 0; i < i_row; i++) {
64     n_alloc_ok += result->row_nnz[i];
65     n_alloc_ok_max += max_row_nnz[i];
66   }
67 
68   /* Count currently needed memory usage and future memory requirements
69     (max, predicted)*/
70   for (i = i_row; i < nrows; i++) {
71     if (!result->row_nnz[i]) {
72       max_need_extra += max_row_nnz[i];
73     } else {
74       need_already += max_row_nnz[i];
75     }
76   }
77 
78   /* Make maximal and realistic memory requirement estimates */
79   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
80   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
81 
82   /* Choose array sizes */
83   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
84   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
85   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
86   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
87 
88   /* If new estimate is less than what we already have,
89     don't reallocate, just garbage-collect */
90   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { n_alloc = result->n_alloc_icol; }
91 
92   /* Motivate dimension choice */
93   PetscCall(PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc));
94   if (n_alloc_max == n_alloc_est) {
95     PetscCall(PetscInfo(NULL, "this is the correct size\n"));
96   } else if (n_alloc_now >= n_alloc_est) {
97     PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
98   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
99     PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
100   } else {
101     PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
102   }
103 
104   /**********************************************************
105   2. Rescue arrays which would be lost
106   Count how many rows and nonzeros will have to be rescued
107   when moving all arrays in place */
108   n_rescue = 0;
109   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
110   else {
111     i = *n_row_alloc_ok - 1;
112 
113     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
114   }
115 
116   for (i = *n_row_alloc_ok; i < nrows; i++) {
117     i_here = result->icols[i] - result->alloc_icol;
118     i_last = i_here + result->row_nnz[i];
119     if (result->row_nnz[i] > 0) {
120       if (*n_alloc_used > i_here || i_last > n_alloc) { n_rescue += result->row_nnz[i]; }
121 
122       if (i < i_row) *n_alloc_used += result->row_nnz[i];
123       else *n_alloc_used += max_row_nnz[i];
124     }
125   }
126 
127   /* Allocate rescue arrays */
128   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
129   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
130 
131   /* Rescue the arrays which need rescuing */
132   n_rescue = 0;
133   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
134   else {
135     i             = *n_row_alloc_ok - 1;
136     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
137   }
138 
139   for (i = *n_row_alloc_ok; i < nrows; i++) {
140     i_here = result->icols[i] - result->alloc_icol;
141     i_last = i_here + result->row_nnz[i];
142     if (result->row_nnz[i] > 0) {
143       if (*n_alloc_used > i_here || i_last > n_alloc) {
144         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
145         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
146         n_rescue += result->row_nnz[i];
147       }
148 
149       if (i < i_row) *n_alloc_used += result->row_nnz[i];
150       else *n_alloc_used += max_row_nnz[i];
151     }
152   }
153 
154   /**********************************************************
155   3. Reallocate and correct administration */
156 
157   if (n_alloc != result->n_alloc_icol) {
158     n_copy = PetscMin(n_alloc, result->n_alloc_icol);
159 
160     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
161 
162         Allocate new icol-data, copy old contents */
163     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
164     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
165 
166     /* Update administration, Reset pointers to new arrays  */
167     result->n_alloc_icol = n_alloc;
168     for (i = 0; i < nrows; i++) {
169       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
170       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
171     }
172 
173     /* Delete old array */
174     PetscCall(PetscFree(alloc_icol_old));
175 
176     /* Allocate new value-data, copy old contents */
177     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
178     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
179 
180     /* Update administration, Reset pointers to new arrays  */
181     result->n_alloc_val = n_alloc;
182     for (i = 0; i < nrows; i++) { result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); }
183 
184     /* Delete old array */
185     PetscCall(PetscFree(alloc_val_old));
186   }
187 
188   /*********************************************************
189   4. Copy all the arrays to their proper places */
190   n_rescue = 0;
191   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
192   else {
193     i = *n_row_alloc_ok - 1;
194 
195     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
196   }
197 
198   for (i = *n_row_alloc_ok; i < nrows; i++) {
199     i_here = result->icols[i] - result->alloc_icol;
200     i_last = i_here + result->row_nnz[i];
201 
202     result->icols[i]  = result->alloc_icol + *n_alloc_used;
203     result->values[i] = result->alloc_val + *n_alloc_used;
204 
205     if (result->row_nnz[i] > 0) {
206       if (*n_alloc_used > i_here || i_last > n_alloc) {
207         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
208         PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
209 
210         n_rescue += result->row_nnz[i];
211       } else {
212         for (j = 0; j < result->row_nnz[i]; j++) {
213           result->icols[i][j]  = result->alloc_icol[i_here + j];
214           result->values[i][j] = result->alloc_val[i_here + j];
215         }
216       }
217       if (i < i_row) *n_alloc_used += result->row_nnz[i];
218       else *n_alloc_used += max_row_nnz[i];
219     }
220   }
221 
222   /* Delete the rescue arrays */
223   PetscCall(PetscFree(icol_rescue));
224   PetscCall(PetscFree(val_rescue));
225 
226   *n_row_alloc_ok = i_row;
227   PetscFunctionReturn(0);
228 }
229 
230 /*
231   spbas_incomplete_cholesky:
232      incomplete Cholesky decomposition of a square, symmetric,
233      positive definite matrix.
234 
235      In case negative diagonals are encountered, function returns
236      NEGATIVE_DIAGONAL. When this happens, call this function again
237      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
238      droptol
239 */
240 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) {
241   PetscInt        jL;
242   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
243   PetscInt       *ai = a->i, *aj = a->j;
244   MatScalar      *aa = a->a;
245   PetscInt        nrows, ncols;
246   PetscInt       *max_row_nnz;
247   spbas_matrix    retval;
248   PetscScalar    *diag;
249   PetscScalar    *val;
250   PetscScalar    *lvec;
251   PetscScalar     epsdiag;
252   PetscInt        i, j, k;
253   const PetscBool do_values = PETSC_TRUE;
254   PetscInt       *r1_icol;
255   PetscScalar    *r1_val;
256   PetscInt       *r_icol;
257   PetscInt        r_nnz;
258   PetscScalar    *r_val;
259   PetscInt       *A_icol;
260   PetscInt        A_nnz;
261   PetscScalar    *A_val;
262   PetscInt       *p_icol;
263   PetscInt        p_nnz;
264   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
265   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */
266 
267   PetscFunctionBegin;
268   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
269   PetscCall(MatGetSize(A, &nrows, &ncols));
270   PetscCall(MatGetTrace(A, &epsdiag));
271 
272   epsdiag *= epsdiag_in / nrows;
273 
274   PetscCall(PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
275 
276   if (droptol < 1e-10) droptol = 1e-10;
277 
278   retval.nrows        = nrows;
279   retval.ncols        = nrows;
280   retval.nnz          = pattern.nnz / 10;
281   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
282   retval.block_data   = PETSC_TRUE;
283 
284   PetscCall(spbas_allocate_pattern(&retval, do_values));
285   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
286   PetscCall(spbas_allocate_data(&retval));
287   retval.nnz = 0;
288 
289   PetscCall(PetscMalloc1(nrows, &diag));
290   PetscCall(PetscCalloc1(nrows, &val));
291   PetscCall(PetscCalloc1(nrows, &lvec));
292   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
293 
294   /* Count the nonzeros on transpose of pattern */
295   for (i = 0; i < nrows; i++) {
296     p_nnz  = pattern.row_nnz[i];
297     p_icol = pattern.icols[i];
298     for (j = 0; j < p_nnz; j++) { max_row_nnz[i + p_icol[j]]++; }
299   }
300 
301   /* Calculate rows of L */
302   for (i = 0; i < nrows; i++) {
303     p_nnz  = pattern.row_nnz[i];
304     p_icol = pattern.icols[i];
305 
306     r_nnz  = retval.row_nnz[i];
307     r_icol = retval.icols[i];
308     r_val  = retval.values[i];
309     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
310     A_icol = &aj[ai[rip[i]]];
311     A_val  = &aa[ai[rip[i]]];
312 
313     /* Calculate  val += A(i,i:n)'; */
314     for (j = 0; j < A_nnz; j++) {
315       k = riip[A_icol[j]];
316       if (k >= i) val[k] = A_val[j];
317     }
318 
319     /*  Add regularization */
320     val[i] += epsdiag;
321 
322     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
323         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
324     for (j = 0; j < r_nnz; j++) {
325       k       = r_icol[j];
326       lvec[k] = diag[k] * r_val[j];
327       val[i] -= r_val[j] * lvec[k];
328     }
329 
330     /* Calculate the new diagonal */
331     diag[i] = val[i];
332     if (PetscRealPart(diag[i]) < droptol) {
333       PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
334       PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
335 
336       /* Delete the whole matrix at once. */
337       PetscCall(spbas_delete(retval));
338       *success = PETSC_FALSE;
339       PetscFunctionReturn(0);
340     }
341 
342     /* If necessary, allocate arrays */
343     if (r_nnz == 0) {
344       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
345       PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
346       r_icol = retval.icols[i];
347       r_val  = retval.values[i];
348     }
349 
350     /* Now, fill in */
351     r_icol[r_nnz] = i;
352     r_val[r_nnz]  = 1.0;
353     r_nnz++;
354     retval.row_nnz[i]++;
355 
356     retval.nnz += r_nnz;
357 
358     /* Calculate
359         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
360     for (j = 1; j < p_nnz; j++) {
361       k       = i + p_icol[j];
362       r1_icol = retval.icols[k];
363       r1_val  = retval.values[k];
364       for (jL = 0; jL < retval.row_nnz[k]; jL++) { val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; }
365       val[k] /= diag[i];
366 
367       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
368         /* If necessary, allocate arrays */
369         if (!retval.row_nnz[k]) {
370           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
371           if (!success) {
372             PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
373             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
374             PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
375             r_icol = retval.icols[i];
376           }
377         }
378 
379         retval.icols[k][retval.row_nnz[k]]  = i;
380         retval.values[k][retval.row_nnz[k]] = val[k];
381         retval.row_nnz[k]++;
382       }
383       val[k] = 0;
384     }
385 
386     /* Erase the values used in the work arrays */
387     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
388   }
389 
390   PetscCall(PetscFree(lvec));
391   PetscCall(PetscFree(val));
392 
393   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
394   PetscCall(PetscFree(max_row_nnz));
395 
396   /* Place the inverse of the diagonals in the matrix */
397   for (i = 0; i < nrows; i++) {
398     r_nnz = retval.row_nnz[i];
399 
400     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
401     for (j = 0; j < r_nnz - 1; j++) { retval.values[i][j] *= -1; }
402   }
403   PetscCall(PetscFree(diag));
404   *matrix_L = retval;
405   *success  = PETSC_TRUE;
406   PetscFunctionReturn(0);
407 }
408