xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE);
15   PetscFunctionReturn(PETSC_TRUE);
16 }
17 
18 /*
19   spbas_cholesky_garbage_collect:
20      move the rows which have been calculated so far, as well as
21      those currently under construction, to the front of the
22      array, while putting them in the proper order.
23      When it seems necessary, enlarge the current arrays.
24 
25      NB: re-allocation is being simulated using
26          PetscMalloc, memcpy, PetscFree, because
27          PetscRealloc does not seem to exist.
28 
29 */
30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
31                                                                                     Only the storage, not the contents of this matrix is changed in this function */
32                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
33                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
34                                                                                     places in the arrays: they need not be moved any more */
35                                               PetscInt     *n_alloc_used,   /* I/O:  */
36                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
37 {
38 /* PSEUDO-CODE:
39   1. Choose the appropriate size for the arrays
40   2. Rescue the arrays which would be lost during garbage collection
41   3. Reallocate and correct administration
42   4. Move all arrays so that they are in proper order */
43 
44   PetscInt        i,j;
45   PetscInt        nrows         = result->nrows;
46   PetscInt        n_alloc_ok    =0;
47   PetscInt        n_alloc_ok_max=0;
48   PetscInt        need_already  = 0;
49   PetscInt        max_need_extra= 0;
50   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
51   PetscInt        n_alloc_now     = result->n_alloc_icol;
52   PetscInt        *alloc_icol_old = result->alloc_icol;
53   PetscScalar     *alloc_val_old  = result->alloc_val;
54   PetscInt        *icol_rescue;
55   PetscScalar     *val_rescue;
56   PetscInt        n_rescue;
57   PetscInt        i_here, i_last, n_copy;
58   const PetscReal xtra_perc = 20;
59 
60   PetscFunctionBegin;
61   /*********************************************************
62   1. Choose appropriate array size
63   Count number of rows and memory usage which is already final */
64   for (i=0; i<i_row; i++)  {
65     n_alloc_ok     += result->row_nnz[i];
66     n_alloc_ok_max += max_row_nnz[i];
67   }
68 
69   /* Count currently needed memory usage and future memory requirements
70     (max, predicted)*/
71   for (i=i_row; i<nrows; i++) {
72     if (!result->row_nnz[i]) {
73       max_need_extra += max_row_nnz[i];
74     } else {
75       need_already += max_row_nnz[i];
76     }
77   }
78 
79   /* Make maximal and realistic memory requirement estimates */
80   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
81   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
82 
83   /* Choose array sizes */
84   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
85   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
86   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
87   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
88 
89   /* If new estimate is less than what we already have,
90     don't reallocate, just garbage-collect */
91   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
92     n_alloc = result->n_alloc_icol;
93   }
94 
95   /* Motivate dimension choice */
96   PetscCall(PetscInfo(NULL,"   Allocating %" PetscInt_FMT " nonzeros: ",n_alloc));
97   if (n_alloc_max == n_alloc_est) {
98     PetscCall(PetscInfo(NULL,"this is the correct size\n"));
99   } else if (n_alloc_now >= n_alloc_est) {
100     PetscCall(PetscInfo(NULL,"the current size, which seems enough\n"));
101   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
102     PetscCall(PetscInfo(NULL,"the maximum estimate\n"));
103   } else {
104     PetscCall(PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc));
105   }
106 
107   /**********************************************************
108   2. Rescue arrays which would be lost
109   Count how many rows and nonzeros will have to be rescued
110   when moving all arrays in place */
111   n_rescue = 0;
112   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
113   else {
114     i = *n_row_alloc_ok - 1;
115 
116     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
117   }
118 
119   for (i=*n_row_alloc_ok; i<nrows; i++) {
120     i_here = result->icols[i]-result->alloc_icol;
121     i_last = i_here + result->row_nnz[i];
122     if (result->row_nnz[i]>0) {
123       if (*n_alloc_used > i_here || i_last > n_alloc) {
124         n_rescue += result->row_nnz[i];
125       }
126 
127       if (i<i_row) *n_alloc_used += result->row_nnz[i];
128       else         *n_alloc_used += max_row_nnz[i];
129     }
130   }
131 
132   /* Allocate rescue arrays */
133   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
134   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
135 
136   /* Rescue the arrays which need rescuing */
137   n_rescue = 0;
138   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
139   else {
140     i             = *n_row_alloc_ok - 1;
141     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
142   }
143 
144   for (i=*n_row_alloc_ok; i<nrows; i++) {
145     i_here = result->icols[i]-result->alloc_icol;
146     i_last = i_here + result->row_nnz[i];
147     if (result->row_nnz[i]>0) {
148       if (*n_alloc_used > i_here || i_last > n_alloc) {
149         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
150         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
151         n_rescue += result->row_nnz[i];
152       }
153 
154       if (i<i_row) *n_alloc_used += result->row_nnz[i];
155       else         *n_alloc_used += max_row_nnz[i];
156     }
157   }
158 
159   /**********************************************************
160   3. Reallocate and correct administration */
161 
162   if (n_alloc != result->n_alloc_icol) {
163     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
164 
165     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
166 
167         Allocate new icol-data, copy old contents */
168     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
169     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
170 
171     /* Update administration, Reset pointers to new arrays  */
172     result->n_alloc_icol = n_alloc;
173     for (i=0; i<nrows; i++) {
174       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
175       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
176     }
177 
178     /* Delete old array */
179     PetscCall(PetscFree(alloc_icol_old));
180 
181     /* Allocate new value-data, copy old contents */
182     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
183     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
184 
185     /* Update administration, Reset pointers to new arrays  */
186     result->n_alloc_val = n_alloc;
187     for (i=0; i<nrows; i++) {
188       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
189     }
190 
191     /* Delete old array */
192     PetscCall(PetscFree(alloc_val_old));
193   }
194 
195   /*********************************************************
196   4. Copy all the arrays to their proper places */
197   n_rescue = 0;
198   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
199   else {
200     i = *n_row_alloc_ok - 1;
201 
202     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
203   }
204 
205   for (i=*n_row_alloc_ok; i<nrows; i++) {
206     i_here = result->icols[i]-result->alloc_icol;
207     i_last = i_here + result->row_nnz[i];
208 
209     result->icols[i] = result->alloc_icol + *n_alloc_used;
210     result->values[i]= result->alloc_val  + *n_alloc_used;
211 
212     if (result->row_nnz[i]>0) {
213       if (*n_alloc_used > i_here || i_last > n_alloc) {
214         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
215         PetscCall(PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]));
216 
217         n_rescue += result->row_nnz[i];
218       } else {
219         for (j=0; j<result->row_nnz[i]; j++) {
220           result->icols[i][j]  = result->alloc_icol[i_here+j];
221           result->values[i][j] = result->alloc_val[i_here+j];
222         }
223       }
224       if (i<i_row) *n_alloc_used += result->row_nnz[i];
225       else         *n_alloc_used += max_row_nnz[i];
226     }
227   }
228 
229   /* Delete the rescue arrays */
230   PetscCall(PetscFree(icol_rescue));
231   PetscCall(PetscFree(val_rescue));
232 
233   *n_row_alloc_ok = i_row;
234   PetscFunctionReturn(0);
235 }
236 
237 /*
238   spbas_incomplete_cholesky:
239      incomplete Cholesky decomposition of a square, symmetric,
240      positive definite matrix.
241 
242      In case negative diagonals are encountered, function returns
243      NEGATIVE_DIAGONAL. When this happens, call this function again
244      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
245      droptol
246 */
247 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success)
248 {
249   PetscInt        jL;
250   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
251   PetscInt        *ai=a->i,*aj=a->j;
252   MatScalar       *aa=a->a;
253   PetscInt        nrows, ncols;
254   PetscInt        *max_row_nnz;
255   spbas_matrix    retval;
256   PetscScalar     *diag;
257   PetscScalar     *val;
258   PetscScalar     *lvec;
259   PetscScalar     epsdiag;
260   PetscInt        i,j,k;
261   const PetscBool do_values = PETSC_TRUE;
262   PetscInt        *r1_icol;
263   PetscScalar     *r1_val;
264   PetscInt        *r_icol;
265   PetscInt        r_nnz;
266   PetscScalar     *r_val;
267   PetscInt        *A_icol;
268   PetscInt        A_nnz;
269   PetscScalar     *A_val;
270   PetscInt        *p_icol;
271   PetscInt        p_nnz;
272   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
273   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
274 
275   PetscFunctionBegin;
276   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
277   PetscCall(MatGetSize(A, &nrows, &ncols));
278   PetscCall(MatGetTrace(A, &epsdiag));
279 
280   epsdiag *= epsdiag_in / nrows;
281 
282   PetscCall(PetscInfo(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol));
283 
284   if (droptol<1e-10) droptol=1e-10;
285 
286   retval.nrows        = nrows;
287   retval.ncols        = nrows;
288   retval.nnz          = pattern.nnz/10;
289   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
290   retval.block_data   = PETSC_TRUE;
291 
292   PetscCall(spbas_allocate_pattern(&retval, do_values));
293   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
294   PetscCall(spbas_allocate_data(&retval));
295   retval.nnz = 0;
296 
297   PetscCall(PetscMalloc1(nrows, &diag));
298   PetscCall(PetscCalloc1(nrows, &val));
299   PetscCall(PetscCalloc1(nrows, &lvec));
300   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
301 
302   /* Count the nonzeros on transpose of pattern */
303   for (i = 0; i<nrows; i++)  {
304     p_nnz  = pattern.row_nnz[i];
305     p_icol = pattern.icols[i];
306     for (j=0; j<p_nnz; j++)  {
307       max_row_nnz[i+p_icol[j]]++;
308     }
309   }
310 
311   /* Calculate rows of L */
312   for (i = 0; i<nrows; i++)  {
313     p_nnz  = pattern.row_nnz[i];
314     p_icol = pattern.icols[i];
315 
316     r_nnz  = retval.row_nnz[i];
317     r_icol = retval.icols[i];
318     r_val  = retval.values[i];
319     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
320     A_icol = &aj[ai[rip[i]]];
321     A_val  = &aa[ai[rip[i]]];
322 
323     /* Calculate  val += A(i,i:n)'; */
324     for (j=0; j<A_nnz; j++) {
325       k = riip[A_icol[j]];
326       if (k>=i) val[k] = A_val[j];
327     }
328 
329     /*  Add regularization */
330     val[i] += epsdiag;
331 
332     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
333         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
334     for (j=0; j<r_nnz; j++)  {
335       k       = r_icol[j];
336       lvec[k] = diag[k] * r_val[j];
337       val[i] -= r_val[j] * lvec[k];
338     }
339 
340     /* Calculate the new diagonal */
341     diag[i] = val[i];
342     if (PetscRealPart(diag[i])<droptol) {
343       PetscCall(PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n"));
344       PetscCall(PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1));
345 
346       /* Delete the whole matrix at once. */
347       PetscCall(spbas_delete(retval));
348       *success = PETSC_FALSE;
349       PetscFunctionReturn(0);
350     }
351 
352     /* If necessary, allocate arrays */
353     if (r_nnz==0) {
354       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
355       PetscCheck(success,PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed");
356       r_icol = retval.icols[i];
357       r_val  = retval.values[i];
358     }
359 
360     /* Now, fill in */
361     r_icol[r_nnz] = i;
362     r_val [r_nnz] = 1.0;
363     r_nnz++;
364     retval.row_nnz[i]++;
365 
366     retval.nnz += r_nnz;
367 
368     /* Calculate
369         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
370     for (j=1; j<p_nnz; j++)  {
371       k       = i+p_icol[j];
372       r1_icol = retval.icols[k];
373       r1_val  = retval.values[k];
374       for (jL=0; jL<retval.row_nnz[k]; jL++) {
375         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
376       }
377       val[k] /= diag[i];
378 
379       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
380         /* If necessary, allocate arrays */
381         if (!retval.row_nnz[k]) {
382           PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
383           if (!success) {
384             PetscCall(spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
385             flag   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
386             PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed");
387             r_icol = retval.icols[i];
388           }
389         }
390 
391         retval.icols[k][retval.row_nnz[k]]  = i;
392         retval.values[k][retval.row_nnz[k]] = val[k];
393         retval.row_nnz[k]++;
394       }
395       val[k] = 0;
396     }
397 
398     /* Erase the values used in the work arrays */
399     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
400   }
401 
402   PetscCall(PetscFree(lvec));
403   PetscCall(PetscFree(val));
404 
405   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
406   PetscCall(PetscFree(max_row_nnz));
407 
408   /* Place the inverse of the diagonals in the matrix */
409   for (i=0; i<nrows; i++) {
410     r_nnz = retval.row_nnz[i];
411 
412     retval.values[i][r_nnz-1] = 1.0 / diag[i];
413     for (j=0; j<r_nnz-1; j++) {
414       retval.values[i][j] *= -1;
415     }
416   }
417   PetscCall(PetscFree(diag));
418   *matrix_L = retval;
419   *success  = PETSC_TRUE;
420   PetscFunctionReturn(0);
421 }
422