xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 5e71baeff2f3138f93cd4f5927dfd596eb8325cc)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
15   PetscFunctionReturn(0);
16 }
17 
18 
19 /*
20   spbas_cholesky_garbage_collect:
21      move the rows which have been calculated so far, as well as
22      those currently under construction, to the front of the
23      array, while putting them in the proper order.
24      When it seems necessary, enlarge the current arrays.
25 
26      NB: re-allocation is being simulated using
27          PetscMalloc, memcpy, PetscFree, because
28          PetscRealloc does not seem to exist.
29 
30 */
31 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
32                                                                                     Only the storage, not the contents of this matrix is changed in this function */
33                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
34                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
35                                                                                     places in the arrays: they need not be moved any more */
36                                               PetscInt     *n_alloc_used,   /* I/O:  */
37                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
38 {
39 /* PSEUDO-CODE:
40   1. Choose the appropriate size for the arrays
41   2. Rescue the arrays which would be lost during garbage collection
42   3. Reallocate and correct administration
43   4. Move all arrays so that they are in proper order */
44 
45   PetscInt        i,j;
46   PetscInt        nrows         = result->nrows;
47   PetscInt        n_alloc_ok    =0;
48   PetscInt        n_alloc_ok_max=0;
49   PetscErrorCode  ierr;
50   PetscInt        need_already  = 0;
51   PetscInt        n_rows_ahead  =0;
52   PetscInt        max_need_extra= 0;
53   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
54   PetscInt        n_alloc_now     = result->n_alloc_icol;
55   PetscInt        *alloc_icol_old = result->alloc_icol;
56   PetscScalar     *alloc_val_old  = result->alloc_val;
57   PetscInt        *icol_rescue;
58   PetscScalar     *val_rescue;
59   PetscInt        n_rescue;
60   PetscInt        n_row_rescue;
61   PetscInt        i_here, i_last, n_copy;
62   const PetscReal xtra_perc = 20;
63 
64   PetscFunctionBegin;
65   /*********************************************************
66   1. Choose appropriate array size
67   Count number of rows and memory usage which is already final */
68   for (i=0; i<i_row; i++)  {
69     n_alloc_ok     += result->row_nnz[i];
70     n_alloc_ok_max += max_row_nnz[i];
71   }
72 
73   /* Count currently needed memory usage and future memory requirements
74     (max, predicted)*/
75   for (i=i_row; i<nrows; i++) {
76     if (!result->row_nnz[i]) {
77       max_need_extra += max_row_nnz[i];
78     } else {
79       need_already += max_row_nnz[i];
80       n_rows_ahead++;
81     }
82   }
83 
84   /* Make maximal and realistic memory requirement estimates */
85   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
86   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
87 
88   /* Choose array sizes */
89   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
90   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
91   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
92   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
93 
94   /* If new estimate is less than what we already have,
95     don't reallocate, just garbage-collect */
96   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
97     n_alloc = result->n_alloc_icol;
98   }
99 
100   /* Motivate dimension choice */
101   ierr = PetscInfo1(NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
102   if (n_alloc_max == n_alloc_est) {
103     ierr = PetscInfo(NULL,"this is the correct size\n");CHKERRQ(ierr);
104   } else if (n_alloc_now >= n_alloc_est) {
105     ierr = PetscInfo(NULL,"the current size, which seems enough\n");CHKERRQ(ierr);
106   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
107     ierr = PetscInfo(NULL,"the maximum estimate\n");CHKERRQ(ierr);
108   } else {
109     ierr = PetscInfo1(NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr);
110   }
111 
112 
113   /**********************************************************
114   2. Rescue arrays which would be lost
115   Count how many rows and nonzeros will have to be rescued
116   when moving all arrays in place */
117   n_row_rescue = 0; n_rescue = 0;
118   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
119   else {
120     i = *n_row_alloc_ok - 1;
121 
122     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
123   }
124 
125   for (i=*n_row_alloc_ok; i<nrows; i++) {
126     i_here = result->icols[i]-result->alloc_icol;
127     i_last = i_here + result->row_nnz[i];
128     if (result->row_nnz[i]>0) {
129       if (*n_alloc_used > i_here || i_last > n_alloc) {
130         n_rescue += result->row_nnz[i];
131         n_row_rescue++;
132       }
133 
134       if (i<i_row) *n_alloc_used += result->row_nnz[i];
135       else         *n_alloc_used += max_row_nnz[i];
136     }
137   }
138 
139   /* Allocate rescue arrays */
140   ierr = PetscMalloc1(n_rescue, &icol_rescue);CHKERRQ(ierr);
141   ierr = PetscMalloc1(n_rescue, &val_rescue);CHKERRQ(ierr);
142 
143   /* Rescue the arrays which need rescuing */
144   n_row_rescue = 0; n_rescue = 0;
145   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
146   else {
147     i             = *n_row_alloc_ok - 1;
148     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
149   }
150 
151   for (i=*n_row_alloc_ok; i<nrows; i++) {
152     i_here = result->icols[i]-result->alloc_icol;
153     i_last = i_here + result->row_nnz[i];
154     if (result->row_nnz[i]>0) {
155       if (*n_alloc_used > i_here || i_last > n_alloc) {
156         ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr);
157         ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
158 
159         n_rescue += result->row_nnz[i];
160         n_row_rescue++;
161       }
162 
163       if (i<i_row) *n_alloc_used += result->row_nnz[i];
164       else         *n_alloc_used += max_row_nnz[i];
165     }
166   }
167 
168   /**********************************************************
169   3. Reallocate and correct administration */
170 
171   if (n_alloc != result->n_alloc_icol) {
172     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
173 
174     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
175 
176         Allocate new icol-data, copy old contents */
177     ierr = PetscMalloc1(n_alloc, &result->alloc_icol);CHKERRQ(ierr);
178     ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(PetscInt));CHKERRQ(ierr);
179 
180     /* Update administration, Reset pointers to new arrays  */
181     result->n_alloc_icol = n_alloc;
182     for (i=0; i<nrows; i++) {
183       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
184       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
185     }
186 
187     /* Delete old array */
188     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
189 
190     /* Allocate new value-data, copy old contents */
191     ierr = PetscMalloc1(n_alloc, &result->alloc_val);CHKERRQ(ierr);
192     ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
193 
194     /* Update administration, Reset pointers to new arrays  */
195     result->n_alloc_val = n_alloc;
196     for (i=0; i<nrows; i++) {
197       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
198     }
199 
200     /* Delete old array */
201     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
202   }
203 
204 
205   /*********************************************************
206   4. Copy all the arrays to their proper places */
207   n_row_rescue = 0; n_rescue = 0;
208   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
209   else {
210     i = *n_row_alloc_ok - 1;
211 
212     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
213   }
214 
215   for (i=*n_row_alloc_ok; i<nrows; i++) {
216     i_here = result->icols[i]-result->alloc_icol;
217     i_last = i_here + result->row_nnz[i];
218 
219     result->icols[i] = result->alloc_icol + *n_alloc_used;
220     result->values[i]= result->alloc_val  + *n_alloc_used;
221 
222     if (result->row_nnz[i]>0) {
223       if (*n_alloc_used > i_here || i_last > n_alloc) {
224         ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(PetscInt));CHKERRQ(ierr);
225         ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
226 
227         n_rescue += result->row_nnz[i];
228         n_row_rescue++;
229       } else {
230         for (j=0; j<result->row_nnz[i]; j++) {
231           result->icols[i][j]  = result->alloc_icol[i_here+j];
232           result->values[i][j] = result->alloc_val[i_here+j];
233         }
234       }
235       if (i<i_row) *n_alloc_used += result->row_nnz[i];
236       else         *n_alloc_used += max_row_nnz[i];
237     }
238   }
239 
240   /* Delete the rescue arrays */
241   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
242   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
243 
244   *n_row_alloc_ok = i_row;
245   PetscFunctionReturn(0);
246 }
247 
248 /*
249   spbas_incomplete_cholesky:
250      incomplete Cholesky decomposition of a square, symmetric,
251      positive definite matrix.
252 
253      In case negative diagonals are encountered, function returns
254      NEGATIVE_DIAGONAL. When this happens, call this function again
255      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
256      droptol
257 */
258 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
259 {
260   PetscInt        jL;
261   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
262   PetscInt        *ai=a->i,*aj=a->j;
263   MatScalar       *aa=a->a;
264   PetscInt        nrows, ncols;
265   PetscInt        *max_row_nnz;
266   PetscErrorCode  ierr;
267   spbas_matrix    retval;
268   PetscScalar     *diag;
269   PetscScalar     *val;
270   PetscScalar     *lvec;
271   PetscScalar     epsdiag;
272   PetscInt        i,j,k;
273   const PetscBool do_values = PETSC_TRUE;
274   PetscInt        *r1_icol;
275   PetscScalar     *r1_val;
276   PetscInt        *r_icol;
277   PetscInt        r_nnz;
278   PetscScalar     *r_val;
279   PetscInt        *A_icol;
280   PetscInt        A_nnz;
281   PetscScalar     *A_val;
282   PetscInt        *p_icol;
283   PetscInt        p_nnz;
284   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
285   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
286 
287   PetscFunctionBegin;
288   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
289   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
290   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
291 
292   epsdiag *= epsdiag_in / nrows;
293 
294   ierr = PetscInfo2(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol);CHKERRQ(ierr);
295 
296   if (droptol<1e-10) droptol=1e-10;
297 
298   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
299 
300   retval.nrows        = nrows;
301   retval.ncols        = nrows;
302   retval.nnz          = pattern.nnz/10;
303   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
304   retval.block_data   = PETSC_TRUE;
305 
306   ierr       = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
307   ierr       = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr);
308   ierr       = spbas_allocate_data(&retval);CHKERRQ(ierr);
309   retval.nnz = 0;
310 
311   ierr = PetscMalloc1(nrows, &diag);CHKERRQ(ierr);
312   ierr = PetscMalloc1(nrows, &val);CHKERRQ(ierr);
313   ierr = PetscMalloc1(nrows, &lvec);CHKERRQ(ierr);
314   ierr = PetscMalloc1(nrows, &max_row_nnz);CHKERRQ(ierr);
315 
316   ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
317   ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
318   ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(PetscInt));CHKERRQ(ierr);
319 
320   /* Check correct format of sparseness pattern */
321   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
322 
323   /* Count the nonzeros on transpose of pattern */
324   for (i = 0; i<nrows; i++)  {
325     p_nnz  = pattern.row_nnz[i];
326     p_icol = pattern.icols[i];
327     for (j=0; j<p_nnz; j++)  {
328       max_row_nnz[i+p_icol[j]]++;
329     }
330   }
331 
332   /* Calculate rows of L */
333   for (i = 0; i<nrows; i++)  {
334     p_nnz  = pattern.row_nnz[i];
335     p_icol = pattern.icols[i];
336 
337     r_nnz  = retval.row_nnz[i];
338     r_icol = retval.icols[i];
339     r_val  = retval.values[i];
340     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
341     A_icol = &aj[ai[rip[i]]];
342     A_val  = &aa[ai[rip[i]]];
343 
344     /* Calculate  val += A(i,i:n)'; */
345     for (j=0; j<A_nnz; j++) {
346       k = riip[A_icol[j]];
347       if (k>=i) val[k] = A_val[j];
348     }
349 
350     /*  Add regularization */
351     val[i] += epsdiag;
352 
353     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
354         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
355     for (j=0; j<r_nnz; j++)  {
356       k       = r_icol[j];
357       lvec[k] = diag[k] * r_val[j];
358       val[i] -= r_val[j] * lvec[k];
359     }
360 
361     /* Calculate the new diagonal */
362     diag[i] = val[i];
363     if (PetscRealPart(diag[i])<droptol) {
364       ierr = PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n");CHKERRQ(ierr);
365       ierr = PetscInfo1(NULL,"Negative diagonal in row %d\n",i+1);CHKERRQ(ierr);
366 
367       /* Delete the whole matrix at once. */
368       ierr = spbas_delete(retval);CHKERRQ(ierr);
369       return NEGATIVE_DIAGONAL;
370     }
371 
372     /* If necessary, allocate arrays */
373     if (r_nnz==0) {
374       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
375       if (ierr == PETSC_ERR_MEM) {
376         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
377         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
378       } else CHKERRQ(ierr);
379       r_icol = retval.icols[i];
380       r_val  = retval.values[i];
381     }
382 
383     /* Now, fill in */
384     r_icol[r_nnz] = i;
385     r_val [r_nnz] = 1.0;
386     r_nnz++;
387     retval.row_nnz[i]++;
388 
389     retval.nnz += r_nnz;
390 
391     /* Calculate
392         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
393     for (j=1; j<p_nnz; j++)  {
394       k       = i+p_icol[j];
395       r1_icol = retval.icols[k];
396       r1_val  = retval.values[k];
397       for (jL=0; jL<retval.row_nnz[k]; jL++) {
398         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
399       }
400       val[k] /= diag[i];
401 
402       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
403         /* If necessary, allocate arrays */
404         if (retval.row_nnz[k]==0) {
405           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
406           if (ierr == PETSC_ERR_MEM) {
407             ierr   = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
408             ierr   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
409             r_icol = retval.icols[i];
410           } else CHKERRQ(ierr);
411         }
412 
413         retval.icols[k][retval.row_nnz[k]]  = i;
414         retval.values[k][retval.row_nnz[k]] = val[k];
415         retval.row_nnz[k]++;
416       }
417       val[k] = 0;
418     }
419 
420     /* Erase the values used in the work arrays */
421     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
422   }
423 
424   ierr=PetscFree(lvec);CHKERRQ(ierr);
425   ierr=PetscFree(val);CHKERRQ(ierr);
426 
427   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
428   ierr = PetscFree(max_row_nnz);CHKERRQ(ierr);
429 
430   /* Place the inverse of the diagonals in the matrix */
431   for (i=0; i<nrows; i++) {
432     r_nnz = retval.row_nnz[i];
433 
434     retval.values[i][r_nnz-1] = 1.0 / diag[i];
435     for (j=0; j<r_nnz-1; j++) {
436       retval.values[i][j] *= -1;
437     }
438   }
439   ierr      = PetscFree(diag);CHKERRQ(ierr);
440   *matrix_L = retval;
441   PetscFunctionReturn(0);
442 }
443 
444