xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 2475b7ca256cea2a4b7cbf2d8babcda14e5fa36e)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15   PetscFunctionReturn(0);
16 }
17 
18 
19 /*
20   spbas_cholesky_garbage_collect:
21      move the rows which have been calculated so far, as well as
22      those currently under construction, to the front of the
23      array, while putting them in the proper order.
24      When it seems necessary, enlarge the current arrays.
25 
26      NB: re-allocation is being simulated using
27          PetscMalloc, memcpy, PetscFree, because
28          PetscRealloc does not seem to exist.
29 
30 */
31 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
32                                                                                     Only the storage, not the contents of this matrix is changed in this function */
33                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
34                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
35                                                                                     places in the arrays: they need not be moved any more */
36                                               PetscInt     *n_alloc_used,   /* I/O:  */
37                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
38 {
39 /* PSEUDO-CODE:
40   1. Choose the appropriate size for the arrays
41   2. Rescue the arrays which would be lost during garbage collection
42   3. Reallocate and correct administration
43   4. Move all arrays so that they are in proper order */
44 
45   PetscInt        i,j;
46   PetscInt        nrows         = result->nrows;
47   PetscInt        n_alloc_ok    =0;
48   PetscInt        n_alloc_ok_max=0;
49   PetscErrorCode  ierr;
50   PetscInt        need_already  = 0;
51   PetscInt        n_rows_ahead  =0;
52   PetscInt        max_need_extra= 0;
53   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
54   PetscInt        n_alloc_now     = result->n_alloc_icol;
55   PetscInt        *alloc_icol_old = result->alloc_icol;
56   PetscScalar     *alloc_val_old  = result->alloc_val;
57   PetscInt        *icol_rescue;
58   PetscScalar     *val_rescue;
59   PetscInt        n_rescue;
60   PetscInt        n_row_rescue;
61   PetscInt        i_here, i_last, n_copy;
62   const PetscReal xtra_perc = 20;
63 
64   PetscFunctionBegin;
65   /*********************************************************
66   1. Choose appropriate array size
67   Count number of rows and memory usage which is already final */
68   for (i=0; i<i_row; i++)  {
69     n_alloc_ok     += result->row_nnz[i];
70     n_alloc_ok_max += max_row_nnz[i];
71   }
72 
73   /* Count currently needed memory usage and future memory requirements
74     (max, predicted)*/
75   for (i=i_row; i<nrows; i++) {
76     if (!result->row_nnz[i]) {
77       max_need_extra += max_row_nnz[i];
78     } else {
79       need_already += max_row_nnz[i];
80       n_rows_ahead++;
81     }
82   }
83 
84   /* Make maximal and realistic memory requirement estimates */
85   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
86   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
87 
88   /* Choose array sizes */
89   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
90   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
91   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
92   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
93 
94   /* If new estimate is less than what we already have,
95     don't reallocate, just garbage-collect */
96   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
97     n_alloc = result->n_alloc_icol;
98   }
99 
100   /* Motivate dimension choice */
101   ierr = PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
102   if (n_alloc_max == n_alloc_est) {
103     ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr);
104   } else if (n_alloc_now >= n_alloc_est) {
105     ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr);
106   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107     ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr);
108   } else {
109     ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr);
110   }
111 
112 
113   /**********************************************************
114   2. Rescue arrays which would be lost
115   Count how many rows and nonzeros will have to be rescued
116   when moving all arrays in place */
117   n_row_rescue = 0; n_rescue = 0;
118   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119   else {
120     i = *n_row_alloc_ok - 1;
121 
122     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
123   }
124 
125   for (i=*n_row_alloc_ok; i<nrows; i++) {
126     i_here = result->icols[i]-result->alloc_icol;
127     i_last = i_here + result->row_nnz[i];
128     if (result->row_nnz[i]>0) {
129       if (*n_alloc_used > i_here || i_last > n_alloc) {
130         n_rescue += result->row_nnz[i];
131         n_row_rescue++;
132       }
133 
134       if (i<i_row) *n_alloc_used += result->row_nnz[i];
135       else         *n_alloc_used += max_row_nnz[i];
136     }
137   }
138 
139   /* Allocate rescue arrays */
140   ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr);
141   ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr);
142 
143   /* Rescue the arrays which need rescuing */
144   n_row_rescue = 0; n_rescue = 0;
145   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146   else {
147     i             = *n_row_alloc_ok - 1;
148     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
149   }
150 
151   for (i=*n_row_alloc_ok; i<nrows; i++) {
152     i_here = result->icols[i]-result->alloc_icol;
153     i_last = i_here + result->row_nnz[i];
154     if (result->row_nnz[i]>0) {
155       if (*n_alloc_used > i_here || i_last > n_alloc) {
156         ierr = PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]);CHKERRQ(ierr);
157         ierr = PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]);CHKERRQ(ierr);
158         n_rescue += result->row_nnz[i];
159         n_row_rescue++;
160       }
161 
162       if (i<i_row) *n_alloc_used += result->row_nnz[i];
163       else         *n_alloc_used += max_row_nnz[i];
164     }
165   }
166 
167   /**********************************************************
168   3. Reallocate and correct administration */
169 
170   if (n_alloc != result->n_alloc_icol) {
171     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
172 
173     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
174 
175         Allocate new icol-data, copy old contents */
176     ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr);
177     ierr = PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy);CHKERRQ(ierr);
178 
179     /* Update administration, Reset pointers to new arrays  */
180     result->n_alloc_icol = n_alloc;
181     for (i=0; i<nrows; i++) {
182       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
183       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
184     }
185 
186     /* Delete old array */
187     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
188 
189     /* Allocate new value-data, copy old contents */
190     ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr);
191     ierr = PetscArraycpy(result->alloc_val, alloc_val_old, n_copy);CHKERRQ(ierr);
192 
193     /* Update administration, Reset pointers to new arrays  */
194     result->n_alloc_val = n_alloc;
195     for (i=0; i<nrows; i++) {
196       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
197     }
198 
199     /* Delete old array */
200     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
201   }
202 
203 
204   /*********************************************************
205   4. Copy all the arrays to their proper places */
206   n_row_rescue = 0; n_rescue = 0;
207   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
208   else {
209     i = *n_row_alloc_ok - 1;
210 
211     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
212   }
213 
214   for (i=*n_row_alloc_ok; i<nrows; i++) {
215     i_here = result->icols[i]-result->alloc_icol;
216     i_last = i_here + result->row_nnz[i];
217 
218     result->icols[i] = result->alloc_icol + *n_alloc_used;
219     result->values[i]= result->alloc_val  + *n_alloc_used;
220 
221     if (result->row_nnz[i]>0) {
222       if (*n_alloc_used > i_here || i_last > n_alloc) {
223         ierr = PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]);CHKERRQ(ierr);
224         ierr = PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]);CHKERRQ(ierr);
225 
226         n_rescue += result->row_nnz[i];
227         n_row_rescue++;
228       } else {
229         for (j=0; j<result->row_nnz[i]; j++) {
230           result->icols[i][j]  = result->alloc_icol[i_here+j];
231           result->values[i][j] = result->alloc_val[i_here+j];
232         }
233       }
234       if (i<i_row) *n_alloc_used += result->row_nnz[i];
235       else         *n_alloc_used += max_row_nnz[i];
236     }
237   }
238 
239   /* Delete the rescue arrays */
240   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
241   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
242 
243   *n_row_alloc_ok = i_row;
244   PetscFunctionReturn(0);
245 }
246 
247 /*
248   spbas_incomplete_cholesky:
249      incomplete Cholesky decomposition of a square, symmetric,
250      positive definite matrix.
251 
252      In case negative diagonals are encountered, function returns
253      NEGATIVE_DIAGONAL. When this happens, call this function again
254      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
255      droptol
256 */
257 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
258 {
259   PetscInt        jL;
260   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
261   PetscInt        *ai=a->i,*aj=a->j;
262   MatScalar       *aa=a->a;
263   PetscInt        nrows, ncols;
264   PetscInt        *max_row_nnz;
265   PetscErrorCode  ierr;
266   spbas_matrix    retval;
267   PetscScalar     *diag;
268   PetscScalar     *val;
269   PetscScalar     *lvec;
270   PetscScalar     epsdiag;
271   PetscInt        i,j,k;
272   const PetscBool do_values = PETSC_TRUE;
273   PetscInt        *r1_icol;
274   PetscScalar     *r1_val;
275   PetscInt        *r_icol;
276   PetscInt        r_nnz;
277   PetscScalar     *r_val;
278   PetscInt        *A_icol;
279   PetscInt        A_nnz;
280   PetscScalar     *A_val;
281   PetscInt        *p_icol;
282   PetscInt        p_nnz;
283   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
284   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
285 
286   PetscFunctionBegin;
287   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
288   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
289   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
290 
291   epsdiag *= epsdiag_in / nrows;
292 
293   ierr = PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr);
294 
295   if (droptol<1e-10) droptol=1e-10;
296 
297   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
298 
299   retval.nrows        = nrows;
300   retval.ncols        = nrows;
301   retval.nnz          = pattern.nnz/10;
302   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
303   retval.block_data   = PETSC_TRUE;
304 
305   ierr       = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
306   ierr       = PetscArrayzero(retval.row_nnz, nrows);CHKERRQ(ierr);
307   ierr       = spbas_allocate_data(&retval);CHKERRQ(ierr);
308   retval.nnz = 0;
309 
310   ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr);
311   ierr = PetscCalloc1(nrows, &val);CHKERRQ(ierr);
312   ierr = PetscCalloc1(nrows, &lvec);CHKERRQ(ierr);
313   ierr = PetscCalloc1(nrows, &max_row_nnz);CHKERRQ(ierr);
314 
315   /* Check correct format of sparseness pattern */
316   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
317 
318   /* Count the nonzeros on transpose of pattern */
319   for (i = 0; i<nrows; i++)  {
320     p_nnz  = pattern.row_nnz[i];
321     p_icol = pattern.icols[i];
322     for (j=0; j<p_nnz; j++)  {
323       max_row_nnz[i+p_icol[j]]++;
324     }
325   }
326 
327   /* Calculate rows of L */
328   for (i = 0; i<nrows; i++)  {
329     p_nnz  = pattern.row_nnz[i];
330     p_icol = pattern.icols[i];
331 
332     r_nnz  = retval.row_nnz[i];
333     r_icol = retval.icols[i];
334     r_val  = retval.values[i];
335     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
336     A_icol = &aj[ai[rip[i]]];
337     A_val  = &aa[ai[rip[i]]];
338 
339     /* Calculate  val += A(i,i:n)'; */
340     for (j=0; j<A_nnz; j++) {
341       k = riip[A_icol[j]];
342       if (k>=i) val[k] = A_val[j];
343     }
344 
345     /*  Add regularization */
346     val[i] += epsdiag;
347 
348     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
349         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
350     for (j=0; j<r_nnz; j++)  {
351       k       = r_icol[j];
352       lvec[k] = diag[k] * r_val[j];
353       val[i] -= r_val[j] * lvec[k];
354     }
355 
356     /* Calculate the new diagonal */
357     diag[i] = val[i];
358     if (PetscRealPart(diag[i])<droptol) {
359       ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr);
360       ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr);
361 
362       /* Delete the whole matrix at once. */
363       ierr = spbas_delete(retval);CHKERRQ(ierr);
364       return NEGATIVE_DIAGONAL;
365     }
366 
367     /* If necessary, allocate arrays */
368     if (r_nnz==0) {
369       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
370       if (ierr == PETSC_ERR_MEM) {
371         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
372         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
373       } else CHKERRQ(ierr);
374       r_icol = retval.icols[i];
375       r_val  = retval.values[i];
376     }
377 
378     /* Now, fill in */
379     r_icol[r_nnz] = i;
380     r_val [r_nnz] = 1.0;
381     r_nnz++;
382     retval.row_nnz[i]++;
383 
384     retval.nnz += r_nnz;
385 
386     /* Calculate
387         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
388     for (j=1; j<p_nnz; j++)  {
389       k       = i+p_icol[j];
390       r1_icol = retval.icols[k];
391       r1_val  = retval.values[k];
392       for (jL=0; jL<retval.row_nnz[k]; jL++) {
393         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
394       }
395       val[k] /= diag[i];
396 
397       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
398         /* If necessary, allocate arrays */
399         if (retval.row_nnz[k]==0) {
400           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
401           if (ierr == PETSC_ERR_MEM) {
402             ierr   = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
403             ierr   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
404             r_icol = retval.icols[i];
405           } else CHKERRQ(ierr);
406         }
407 
408         retval.icols[k][retval.row_nnz[k]]  = i;
409         retval.values[k][retval.row_nnz[k]] = val[k];
410         retval.row_nnz[k]++;
411       }
412       val[k] = 0;
413     }
414 
415     /* Erase the values used in the work arrays */
416     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
417   }
418 
419   ierr=PetscFree(lvec);CHKERRQ(ierr);
420   ierr=PetscFree(val);CHKERRQ(ierr);
421 
422   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
423   ierr = PetscFree(max_row_nnz);CHKERRQ(ierr);
424 
425   /* Place the inverse of the diagonals in the matrix */
426   for (i=0; i<nrows; i++) {
427     r_nnz = retval.row_nnz[i];
428 
429     retval.values[i][r_nnz-1] = 1.0 / diag[i];
430     for (j=0; j<r_nnz-1; j++) {
431       retval.values[i][j] *= -1;
432     }
433   }
434   ierr      = PetscFree(diag);CHKERRQ(ierr);
435   *matrix_L = retval;
436   PetscFunctionReturn(0);
437 }
438 
439