1 #pragma once
2 /*
3 spbas_cholesky_row_alloc:
4 in the data arrays, find a place where another row may be stored.
5 Return PETSC_ERR_MEM if there is insufficient space to store the
6 row, so garbage collection and/or re-allocation may be done.
7 */
spbas_cholesky_row_alloc(spbas_matrix retval,PetscInt k,PetscInt r_nnz,PetscInt * n_alloc_used)8 static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
9 {
10 retval.icols[k] = &retval.alloc_icol[*n_alloc_used];
11 retval.values[k] = &retval.alloc_val[*n_alloc_used];
12 *n_alloc_used += r_nnz;
13 return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
14 }
15
16 /*
17 spbas_cholesky_garbage_collect:
18 move the rows which have been calculated so far, as well as
19 those currently under construction, to the front of the
20 array, while putting them in the proper order.
21 When it seems necessary, enlarge the current arrays.
22
23 NB: re-allocation is being simulated using
24 PetscMalloc, memcpy, PetscFree, because
25 PetscRealloc does not seem to exist.
26
27 */
spbas_cholesky_garbage_collect(spbas_matrix * result,PetscInt i_row,PetscInt * n_row_alloc_ok,PetscInt * n_alloc_used,PetscInt * max_row_nnz)28 static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
29 Only the storage, not the contents of this matrix is changed in this function */
30 PetscInt i_row, /* I : Number of rows for which the final contents are known */
31 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
32 places in the arrays: they need not be moved any more */
33 PetscInt *n_alloc_used, /* I/O: */
34 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */
35 {
36 /* PSEUDO-CODE:
37 1. Choose the appropriate size for the arrays
38 2. Rescue the arrays which would be lost during garbage collection
39 3. Reallocate and correct administration
40 4. Move all arrays so that they are in proper order */
41
42 PetscInt i, j;
43 PetscInt nrows = result->nrows;
44 PetscInt n_alloc_ok = 0;
45 PetscInt n_alloc_ok_max = 0;
46 PetscInt need_already = 0;
47 PetscInt max_need_extra = 0;
48 PetscInt n_alloc_max, n_alloc_est, n_alloc;
49 PetscInt n_alloc_now = result->n_alloc_icol;
50 PetscInt *alloc_icol_old = result->alloc_icol;
51 PetscScalar *alloc_val_old = result->alloc_val;
52 PetscInt *icol_rescue;
53 PetscScalar *val_rescue;
54 PetscInt n_rescue;
55 PetscInt i_here, i_last, n_copy;
56 const PetscReal xtra_perc = 20;
57
58 PetscFunctionBegin;
59 /*********************************************************
60 1. Choose appropriate array size
61 Count number of rows and memory usage which is already final */
62 for (i = 0; i < i_row; i++) {
63 n_alloc_ok += result->row_nnz[i];
64 n_alloc_ok_max += max_row_nnz[i];
65 }
66
67 /* Count currently needed memory usage and future memory requirements
68 (max, predicted)*/
69 for (i = i_row; i < nrows; i++) {
70 if (!result->row_nnz[i]) {
71 max_need_extra += max_row_nnz[i];
72 } else {
73 need_already += max_row_nnz[i];
74 }
75 }
76
77 /* Make maximal and realistic memory requirement estimates */
78 n_alloc_max = n_alloc_ok + need_already + max_need_extra;
79 n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
80
81 /* Choose array sizes */
82 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
83 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
84 else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
85 else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
86
87 /* If new estimate is less than what we already have,
88 don't reallocate, just garbage-collect */
89 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
90
91 /* Motivate dimension choice */
92 PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */
93 if (n_alloc_max == n_alloc_est) {
94 PetscCall(PetscInfo(NULL, "this is the correct size\n"));
95 } else if (n_alloc_now >= n_alloc_est) {
96 PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
97 } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
98 PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
99 } else {
100 PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
101 }
102
103 /**********************************************************
104 2. Rescue arrays which would be lost
105 Count how many rows and nonzeros will have to be rescued
106 when moving all arrays in place */
107 n_rescue = 0;
108 if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
109 else {
110 i = *n_row_alloc_ok - 1;
111
112 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
113 }
114
115 for (i = *n_row_alloc_ok; i < nrows; i++) {
116 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
117 i_last = i_here + result->row_nnz[i];
118 if (result->row_nnz[i] > 0) {
119 if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
120
121 if (i < i_row) *n_alloc_used += result->row_nnz[i];
122 else *n_alloc_used += max_row_nnz[i];
123 }
124 }
125
126 /* Allocate rescue arrays */
127 PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
128 PetscCall(PetscMalloc1(n_rescue, &val_rescue));
129
130 /* Rescue the arrays which need rescuing */
131 n_rescue = 0;
132 if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
133 else {
134 i = *n_row_alloc_ok - 1;
135 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
136 }
137
138 for (i = *n_row_alloc_ok; i < nrows; i++) {
139 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
140 i_last = i_here + result->row_nnz[i];
141 if (result->row_nnz[i] > 0) {
142 if (*n_alloc_used > i_here || i_last > n_alloc) {
143 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
144 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
145 n_rescue += result->row_nnz[i];
146 }
147
148 if (i < i_row) *n_alloc_used += result->row_nnz[i];
149 else *n_alloc_used += max_row_nnz[i];
150 }
151 }
152
153 /**********************************************************
154 3. Reallocate and correct administration */
155
156 if (n_alloc != result->n_alloc_icol) {
157 n_copy = PetscMin(n_alloc, result->n_alloc_icol);
158
159 /* PETSc knows no REALLOC, so we'll REALLOC ourselves.
160
161 Allocate new icol-data, copy old contents */
162 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
163 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
164
165 /* Update administration, Reset pointers to new arrays */
166 result->n_alloc_icol = n_alloc;
167 for (i = 0; i < nrows; i++) {
168 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old);
169 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
170 }
171
172 /* Delete old array */
173 PetscCall(PetscFree(alloc_icol_old));
174
175 /* Allocate new value-data, copy old contents */
176 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
177 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
178
179 /* Update administration, Reset pointers to new arrays */
180 result->n_alloc_val = n_alloc;
181 for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
182
183 /* Delete old array */
184 PetscCall(PetscFree(alloc_val_old));
185 }
186
187 /*********************************************************
188 4. Copy all the arrays to their proper places */
189 n_rescue = 0;
190 if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
191 else {
192 i = *n_row_alloc_ok - 1;
193
194 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol + result->row_nnz[i], n_alloc_used));
195 }
196
197 for (i = *n_row_alloc_ok; i < nrows; i++) {
198 PetscCall(PetscIntCast(result->icols[i] - result->alloc_icol, &i_here));
199 i_last = i_here + result->row_nnz[i];
200
201 result->icols[i] = result->alloc_icol + *n_alloc_used;
202 result->values[i] = result->alloc_val + *n_alloc_used;
203
204 if (result->row_nnz[i] > 0) {
205 if (*n_alloc_used > i_here || i_last > n_alloc) {
206 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
207 PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
208
209 n_rescue += result->row_nnz[i];
210 } else {
211 for (j = 0; j < result->row_nnz[i]; j++) {
212 result->icols[i][j] = result->alloc_icol[i_here + j];
213 result->values[i][j] = result->alloc_val[i_here + j];
214 }
215 }
216 if (i < i_row) *n_alloc_used += result->row_nnz[i];
217 else *n_alloc_used += max_row_nnz[i];
218 }
219 }
220
221 /* Delete the rescue arrays */
222 PetscCall(PetscFree(icol_rescue));
223 PetscCall(PetscFree(val_rescue));
224
225 *n_row_alloc_ok = i_row;
226 PetscFunctionReturn(PETSC_SUCCESS);
227 }
228
229 /*
230 spbas_incomplete_cholesky:
231 incomplete Cholesky decomposition of a square, symmetric,
232 positive definite matrix.
233
234 In case negative diagonals are encountered, function returns
235 NEGATIVE_DIAGONAL. When this happens, call this function again
236 with a larger epsdiag_in, a less sparse pattern, and/or a smaller
237 droptol
238 */
spbas_incomplete_cholesky(Mat A,const PetscInt * rip,const PetscInt * riip,spbas_matrix pattern,PetscReal droptol,PetscReal epsdiag_in,spbas_matrix * matrix_L,PetscBool * success)239 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
240 {
241 PetscInt jL;
242 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
243 PetscInt *ai = a->i, *aj = a->j;
244 MatScalar *aa = a->a;
245 PetscInt nrows, ncols;
246 PetscInt *max_row_nnz;
247 spbas_matrix retval;
248 PetscScalar *diag;
249 PetscScalar *val;
250 PetscScalar *lvec;
251 PetscScalar epsdiag;
252 PetscInt i, j, k;
253 const PetscBool do_values = PETSC_TRUE;
254 PetscInt *r1_icol;
255 PetscScalar *r1_val;
256 PetscInt *r_icol;
257 PetscInt r_nnz;
258 PetscScalar *r_val;
259 PetscInt *A_icol;
260 PetscInt A_nnz;
261 PetscScalar *A_val;
262 PetscInt *p_icol;
263 PetscInt p_nnz;
264 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */
265 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */
266
267 PetscFunctionBegin;
268 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */
269 PetscCall(MatGetSize(A, &nrows, &ncols));
270 PetscCall(MatGetTrace(A, &epsdiag));
271
272 epsdiag *= epsdiag_in / nrows;
273
274 PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
275
276 if (droptol < 1e-10) droptol = 1e-10;
277
278 retval.nrows = nrows;
279 retval.ncols = nrows;
280 retval.nnz = pattern.nnz / 10;
281 retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
282 retval.block_data = PETSC_TRUE;
283
284 PetscCall(spbas_allocate_pattern(&retval, do_values));
285 PetscCall(PetscArrayzero(retval.row_nnz, nrows));
286 PetscCall(spbas_allocate_data(&retval));
287 retval.nnz = 0;
288
289 PetscCall(PetscMalloc1(nrows, &diag));
290 PetscCall(PetscCalloc1(nrows, &val));
291 PetscCall(PetscCalloc1(nrows, &lvec));
292 PetscCall(PetscCalloc1(nrows, &max_row_nnz));
293
294 /* Count the nonzeros on transpose of pattern */
295 for (i = 0; i < nrows; i++) {
296 p_nnz = pattern.row_nnz[i];
297 p_icol = pattern.icols[i];
298 for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
299 }
300
301 /* Calculate rows of L */
302 for (i = 0; i < nrows; i++) {
303 p_nnz = pattern.row_nnz[i];
304 p_icol = pattern.icols[i];
305
306 r_nnz = retval.row_nnz[i];
307 r_icol = retval.icols[i];
308 r_val = retval.values[i];
309 A_nnz = ai[rip[i] + 1] - ai[rip[i]];
310 A_icol = &aj[ai[rip[i]]];
311 A_val = &aa[ai[rip[i]]];
312
313 /* Calculate val += A(i,i:n)'; */
314 for (j = 0; j < A_nnz; j++) {
315 k = riip[A_icol[j]];
316 if (k >= i) val[k] = A_val[j];
317 }
318
319 /* Add regularization */
320 val[i] += epsdiag;
321
322 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i);
323 val(i) = A(i,i) - L(0:i-1,i)' * lvec */
324 for (j = 0; j < r_nnz; j++) {
325 k = r_icol[j];
326 lvec[k] = diag[k] * r_val[j];
327 val[i] -= r_val[j] * lvec[k];
328 }
329
330 /* Calculate the new diagonal */
331 diag[i] = val[i];
332 if (PetscRealPart(diag[i]) < droptol) {
333 PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
334 PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
335
336 /* Delete the whole matrix at once. */
337 PetscCall(spbas_delete(retval));
338 *success = PETSC_FALSE;
339 PetscFunctionReturn(PETSC_SUCCESS);
340 }
341
342 /* If necessary, allocate arrays */
343 if (r_nnz == 0) {
344 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
345 PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
346 r_icol = retval.icols[i];
347 r_val = retval.values[i];
348 }
349
350 /* Now, fill in */
351 r_icol[r_nnz] = i;
352 r_val[r_nnz] = 1.0;
353 r_nnz++;
354 retval.row_nnz[i]++;
355
356 retval.nnz += r_nnz;
357
358 /* Calculate
359 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
360 for (j = 1; j < p_nnz; j++) {
361 k = i + p_icol[j];
362 r1_icol = retval.icols[k];
363 r1_val = retval.values[k];
364 for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
365 val[k] /= diag[i];
366
367 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
368 /* If necessary, allocate arrays */
369 if (!retval.row_nnz[k]) {
370 PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
371 if (!success) {
372 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
373 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
374 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
375 r_icol = retval.icols[i];
376 }
377 }
378
379 retval.icols[k][retval.row_nnz[k]] = i;
380 retval.values[k][retval.row_nnz[k]] = val[k];
381 retval.row_nnz[k]++;
382 }
383 val[k] = 0;
384 }
385
386 /* Erase the values used in the work arrays */
387 for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
388 }
389
390 PetscCall(PetscFree(lvec));
391 PetscCall(PetscFree(val));
392
393 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
394 PetscCall(PetscFree(max_row_nnz));
395
396 /* Place the inverse of the diagonals in the matrix */
397 for (i = 0; i < nrows; i++) {
398 r_nnz = retval.row_nnz[i];
399
400 retval.values[i][r_nnz - 1] = 1.0 / diag[i];
401 for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
402 }
403 PetscCall(PetscFree(diag));
404 *matrix_L = retval;
405 *success = PETSC_TRUE;
406 PetscFunctionReturn(PETSC_SUCCESS);
407 }
408