xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12   PetscFunctionBegin;
13   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14   retval.values[k] = &retval.alloc_val[*n_alloc_used];
15   *n_alloc_used   += r_nnz;
16   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17   PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
36                                                                                     Only the storage, not the contents of this matrix is changed in this function */
37                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
38                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
39                                                                                     places in the arrays: they need not be moved any more */
40                                               PetscInt     *n_alloc_used,   /* I/O:  */
41                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
42 {
43 /* PSEUDO-CODE:
44   1. Choose the appropriate size for the arrays
45   2. Rescue the arrays which would be lost during garbage collection
46   3. Reallocate and correct administration
47   4. Move all arrays so that they are in proper order */
48 
49   PetscInt        i,j;
50   PetscInt        nrows         = result->nrows;
51   PetscInt        n_alloc_ok    =0;
52   PetscInt        n_alloc_ok_max=0;
53   PetscErrorCode  ierr;
54   PetscInt        need_already  = 0;
55   PetscInt        n_rows_ahead  =0;
56   PetscInt        max_need_extra= 0;
57   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
58   PetscInt        n_alloc_now     = result->n_alloc_icol;
59   PetscInt        *alloc_icol_old = result->alloc_icol;
60   PetscScalar     *alloc_val_old  = result->alloc_val;
61   PetscInt        *icol_rescue;
62   PetscScalar     *val_rescue;
63   PetscInt        n_rescue;
64   PetscInt        n_row_rescue;
65   PetscInt        i_here, i_last, n_copy;
66   const PetscReal xtra_perc = 20;
67 
68   PetscFunctionBegin;
69   /*********************************************************
70   1. Choose appropriate array size
71   Count number of rows and memory usage which is already final */
72   for (i=0; i<i_row; i++)  {
73     n_alloc_ok     += result->row_nnz[i];
74     n_alloc_ok_max += max_row_nnz[i];
75   }
76 
77   /* Count currently needed memory usage and future memory requirements
78     (max, predicted)*/
79   for (i=i_row; i<nrows; i++) {
80     if (!result->row_nnz[i]) {
81       max_need_extra += max_row_nnz[i];
82     } else {
83       need_already += max_row_nnz[i];
84       n_rows_ahead++;
85     }
86   }
87 
88   /* Make maximal and realistic memory requirement estimates */
89   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
90   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
91 
92   /* Choose array sizes */
93   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
94   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
95   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
96   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
97 
98   /* If new estimate is less than what we already have,
99     don't reallocate, just garbage-collect */
100   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
101     n_alloc = result->n_alloc_icol;
102   }
103 
104   /* Motivate dimension choice */
105   ierr = PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
106   if (n_alloc_max == n_alloc_est) {
107     ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr);
108   } else if (n_alloc_now >= n_alloc_est) {
109     ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr);
110   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
111     ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr);
112   } else {
113     ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr);
114   }
115 
116 
117   /**********************************************************
118   2. Rescue arrays which would be lost
119   Count how many rows and nonzeros will have to be rescued
120   when moving all arrays in place */
121   n_row_rescue = 0; n_rescue = 0;
122   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
123   else {
124     i = *n_row_alloc_ok - 1;
125 
126     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
127   }
128 
129   for (i=*n_row_alloc_ok; i<nrows; i++) {
130     i_here = result->icols[i]-result->alloc_icol;
131     i_last = i_here + result->row_nnz[i];
132     if (result->row_nnz[i]>0) {
133       if (*n_alloc_used > i_here || i_last > n_alloc) {
134         n_rescue += result->row_nnz[i];
135         n_row_rescue++;
136       }
137 
138       if (i<i_row) *n_alloc_used += result->row_nnz[i];
139       else         *n_alloc_used += max_row_nnz[i];
140     }
141   }
142 
143   /* Allocate rescue arrays */
144   ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr);
145   ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr);
146 
147   /* Rescue the arrays which need rescuing */
148   n_row_rescue = 0; n_rescue = 0;
149   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
150   else {
151     i             = *n_row_alloc_ok - 1;
152     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
153   }
154 
155   for (i=*n_row_alloc_ok; i<nrows; i++) {
156     i_here = result->icols[i]-result->alloc_icol;
157     i_last = i_here + result->row_nnz[i];
158     if (result->row_nnz[i]>0) {
159       if (*n_alloc_used > i_here || i_last > n_alloc) {
160         ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
161         ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
162 
163         n_rescue += result->row_nnz[i];
164         n_row_rescue++;
165       }
166 
167       if (i<i_row) *n_alloc_used += result->row_nnz[i];
168       else         *n_alloc_used += max_row_nnz[i];
169     }
170   }
171 
172   /**********************************************************
173   3. Reallocate and correct administration */
174 
175   if (n_alloc != result->n_alloc_icol) {
176     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
177 
178     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
179 
180         Allocate new icol-data, copy old contents */
181     ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr);
182     ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
183 
184     /* Update administration, Reset pointers to new arrays  */
185     result->n_alloc_icol = n_alloc;
186     for (i=0; i<nrows; i++) {
187       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
188       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
189     }
190 
191     /* Delete old array */
192     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
193 
194     /* Allocate new value-data, copy old contents */
195     ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr);
196     ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
197 
198     /* Update administration, Reset pointers to new arrays  */
199     result->n_alloc_val = n_alloc;
200     for (i=0; i<nrows; i++) {
201       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
202     }
203 
204     /* Delete old array */
205     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
206   }
207 
208 
209   /*********************************************************
210   4. Copy all the arrays to their proper places */
211   n_row_rescue = 0; n_rescue = 0;
212   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
213   else {
214     i = *n_row_alloc_ok - 1;
215 
216     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
217   }
218 
219   for (i=*n_row_alloc_ok; i<nrows; i++) {
220     i_here = result->icols[i]-result->alloc_icol;
221     i_last = i_here + result->row_nnz[i];
222 
223     result->icols[i] = result->alloc_icol + *n_alloc_used;
224     result->values[i]= result->alloc_val  + *n_alloc_used;
225 
226     if (result->row_nnz[i]>0) {
227       if (*n_alloc_used > i_here || i_last > n_alloc) {
228         ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
229         ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
230 
231         n_rescue += result->row_nnz[i];
232         n_row_rescue++;
233       } else {
234         for (j=0; j<result->row_nnz[i]; j++) {
235           result->icols[i][j]  = result->alloc_icol[i_here+j];
236           result->values[i][j] = result->alloc_val[i_here+j];
237         }
238       }
239       if (i<i_row) *n_alloc_used += result->row_nnz[i];
240       else         *n_alloc_used += max_row_nnz[i];
241     }
242   }
243 
244   /* Delete the rescue arrays */
245   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
246   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
247 
248   *n_row_alloc_ok = i_row;
249   PetscFunctionReturn(0);
250 }
251 
252 /*
253   spbas_incomplete_cholesky:
254      incomplete Cholesky decomposition of a square, symmetric,
255      positive definite matrix.
256 
257      In case negative diagonals are encountered, function returns
258      NEGATIVE_DIAGONAL. When this happens, call this function again
259      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
260      droptol
261 */
262 #undef __FUNCT__
263 #define __FUNCT__ "spbas_incomplete_cholesky"
264 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
265 {
266   PetscInt        jL;
267   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
268   PetscInt        *ai=a->i,*aj=a->j;
269   MatScalar       *aa=a->a;
270   PetscInt        nrows, ncols;
271   PetscInt        *max_row_nnz;
272   PetscErrorCode  ierr;
273   spbas_matrix    retval;
274   PetscScalar     *diag;
275   PetscScalar     *val;
276   PetscScalar     *lvec;
277   PetscScalar     epsdiag;
278   PetscInt        i,j,k;
279   const PetscBool do_values = PETSC_TRUE;
280   PetscInt        *r1_icol;
281   PetscScalar     *r1_val;
282   PetscInt        *r_icol;
283   PetscInt        r_nnz;
284   PetscScalar     *r_val;
285   PetscInt        *A_icol;
286   PetscInt        A_nnz;
287   PetscScalar     *A_val;
288   PetscInt        *p_icol;
289   PetscInt        p_nnz;
290   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
291   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
292 
293   PetscFunctionBegin;
294   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
295   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
296   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
297 
298   epsdiag *= epsdiag_in / nrows;
299 
300   ierr = PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr);
301 
302   if (droptol<1e-10) droptol=1e-10;
303 
304   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
305 
306   retval.nrows        = nrows;
307   retval.ncols        = nrows;
308   retval.nnz          = pattern.nnz/10;
309   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
310   retval.block_data   = PETSC_TRUE;
311 
312   ierr       = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
313   ierr       = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
314   ierr       = spbas_allocate_data(&retval);CHKERRQ(ierr);
315   retval.nnz = 0;
316 
317   ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr);
318   ierr = PetscMalloc1(nrows, &val);CHKERRQ(ierr);
319   ierr = PetscMalloc1(nrows, &lvec);CHKERRQ(ierr);
320   ierr = PetscMalloc1(nrows, &max_row_nnz);CHKERRQ(ierr);
321   if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
322 
323   ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
324   ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
325   ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
326 
327   /* Check correct format of sparseness pattern */
328   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
329 
330   /* Count the nonzeros on transpose of pattern */
331   for (i = 0; i<nrows; i++)  {
332     p_nnz  = pattern.row_nnz[i];
333     p_icol = pattern.icols[i];
334     for (j=0; j<p_nnz; j++)  {
335       max_row_nnz[i+p_icol[j]]++;
336     }
337   }
338 
339   /* Calculate rows of L */
340   for (i = 0; i<nrows; i++)  {
341     p_nnz  = pattern.row_nnz[i];
342     p_icol = pattern.icols[i];
343 
344     r_nnz  = retval.row_nnz[i];
345     r_icol = retval.icols[i];
346     r_val  = retval.values[i];
347     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
348     A_icol = &aj[ai[rip[i]]];
349     A_val  = &aa[ai[rip[i]]];
350 
351     /* Calculate  val += A(i,i:n)'; */
352     for (j=0; j<A_nnz; j++) {
353       k = riip[A_icol[j]];
354       if (k>=i) val[k] = A_val[j];
355     }
356 
357     /*  Add regularization */
358     val[i] += epsdiag;
359 
360     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
361         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
362     for (j=0; j<r_nnz; j++)  {
363       k       = r_icol[j];
364       lvec[k] = diag[k] * r_val[j];
365       val[i] -= r_val[j] * lvec[k];
366     }
367 
368     /* Calculate the new diagonal */
369     diag[i] = val[i];
370     if (PetscRealPart(diag[i])<droptol) {
371       ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr);
372       ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr);
373 
374       /* Delete the whole matrix at once. */
375       ierr = spbas_delete(retval);CHKERRQ(ierr);
376       return NEGATIVE_DIAGONAL;
377     }
378 
379     /* If necessary, allocate arrays */
380     if (r_nnz==0) {
381       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
382       if (ierr == PETSC_ERR_MEM) {
383         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
384         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
385       } else CHKERRQ(ierr);
386       r_icol = retval.icols[i];
387       r_val  = retval.values[i];
388     }
389 
390     /* Now, fill in */
391     r_icol[r_nnz] = i;
392     r_val [r_nnz] = 1.0;
393     r_nnz++;
394     retval.row_nnz[i]++;
395 
396     retval.nnz += r_nnz;
397 
398     /* Calculate
399         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
400     for (j=1; j<p_nnz; j++)  {
401       k       = i+p_icol[j];
402       r1_icol = retval.icols[k];
403       r1_val  = retval.values[k];
404       for (jL=0; jL<retval.row_nnz[k]; jL++) {
405         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
406       }
407       val[k] /= diag[i];
408 
409       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
410         /* If necessary, allocate arrays */
411         if (retval.row_nnz[k]==0) {
412           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
413           if (ierr == PETSC_ERR_MEM) {
414             ierr   = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
415             ierr   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
416             r_icol = retval.icols[i];
417             r_val  = retval.values[i];
418           } else CHKERRQ(ierr);
419         }
420 
421         retval.icols[k][retval.row_nnz[k]]  = i;
422         retval.values[k][retval.row_nnz[k]] = val[k];
423         retval.row_nnz[k]++;
424       }
425       val[k] = 0;
426     }
427 
428     /* Erase the values used in the work arrays */
429     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
430   }
431 
432   ierr=PetscFree(lvec);CHKERRQ(ierr);
433   ierr=PetscFree(val);CHKERRQ(ierr);
434 
435   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
436   ierr = PetscFree(max_row_nnz);CHKERRQ(ierr);
437 
438   /* Place the inverse of the diagonals in the matrix */
439   for (i=0; i<nrows; i++) {
440     r_nnz = retval.row_nnz[i];
441 
442     retval.values[i][r_nnz-1] = 1.0 / diag[i];
443     for (j=0; j<r_nnz-1; j++) {
444       retval.values[i][j] *= -1;
445     }
446   }
447   ierr      = PetscFree(diag);CHKERRQ(ierr);
448   *matrix_L = retval;
449   PetscFunctionReturn(0);
450 }
451 
452