xref: /petsc/src/ksp/ksp/impls/gmres/dgmres/dgmres.c (revision f6714f7e2f29a00d39a12212bd9dea2f7b8ed983)
1 /*
2     Implements deflated GMRES.
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h> /*I  "petscksp.h"  I*/
6 
7 PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
8 
9 static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
10 static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
11 static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
12 
KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)13 static PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
14 {
15   PetscFunctionBegin;
16   PetscTryMethod(ksp, "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
17   PetscFunctionReturn(PETSC_SUCCESS);
18 }
KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)19 static PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
20 {
21   PetscFunctionBegin;
22   PetscTryMethod(ksp, "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
23   PetscFunctionReturn(PETSC_SUCCESS);
24 }
KSPDGMRESComputeSchurForm(KSP ksp,PetscInt * neig)25 static PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
26 {
27   PetscFunctionBegin;
28   PetscUseMethod(ksp, "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
29   PetscFunctionReturn(PETSC_SUCCESS);
30 }
KSPDGMRESComputeDeflationData(KSP ksp,PetscInt * curneigh)31 PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
32 {
33   PetscFunctionBegin;
34   PetscUseMethod(ksp, "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
35   PetscFunctionReturn(PETSC_SUCCESS);
36 }
KSPDGMRESApplyDeflation(KSP ksp,Vec x,Vec y)37 static PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
38 {
39   PetscFunctionBegin;
40   PetscUseMethod(ksp, "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
KSPDGMRESImproveEig(KSP ksp,PetscInt neig)44 static PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
45 {
46   PetscFunctionBegin;
47   PetscUseMethod(ksp, "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
KSPSetUp_DGMRES(KSP ksp)51 PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
52 {
53   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
54   PetscInt    neig   = dgmres->neig + EIG_OFFSET;
55   PetscInt    max_k  = dgmres->max_k + 1;
56 
57   PetscFunctionBegin;
58   PetscCall(KSPSetUp_GMRES(ksp));
59   if (!dgmres->neig) PetscFunctionReturn(PETSC_SUCCESS);
60 
61   /* Allocate workspace for the Schur vectors*/
62   PetscCall(PetscMalloc1(neig * max_k, &SR));
63   dgmres->wr    = NULL;
64   dgmres->wi    = NULL;
65   dgmres->perm  = NULL;
66   dgmres->modul = NULL;
67   dgmres->Q     = NULL;
68   dgmres->Z     = NULL;
69 
70   UU   = NULL;
71   XX   = NULL;
72   MX   = NULL;
73   AUU  = NULL;
74   XMX  = NULL;
75   XMU  = NULL;
76   UMX  = NULL;
77   AUAU = NULL;
78   TT   = NULL;
79   TTF  = NULL;
80   INVP = NULL;
81   X1   = NULL;
82   X2   = NULL;
83   MU   = NULL;
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /*
88  Run GMRES, possibly with restart.  Return residual history if requested.
89  input parameters:
90 
91  .       gmres  - structure containing parameters and work areas
92 
93  output parameters:
94  .        nres    - residuals (from preconditioned system) at each step.
95  If restarting, consider passing nres+it.  If null,
96  ignored
97  .        itcount - number of iterations used.  nres[0] to nres[itcount]
98  are defined.  If null, ignored.
99 
100  Notes:
101  On entry, the value in vector VEC_VV(0) should be the initial residual
102  (this allows shortcuts where the initial preconditioned residual is 0).
103  */
KSPDGMRESCycle(PetscInt * itcount,KSP ksp)104 static PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
105 {
106   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
107   PetscReal   res_norm, res, hapbnd, tt;
108   PetscInt    it     = 0;
109   PetscInt    max_k  = dgmres->max_k;
110   PetscBool   hapend = PETSC_FALSE;
111   PetscReal   res_old;
112   PetscInt    test = 0;
113 
114   PetscFunctionBegin;
115   PetscCall(VecNormalize(VEC_VV(0), &res_norm));
116   KSPCheckNorm(ksp, res_norm);
117   res     = res_norm;
118   *GRS(0) = res_norm;
119 
120   /* check for the convergence */
121   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
122   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
123   else ksp->rnorm = 0.0;
124   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
125   dgmres->it = (it - 1);
126   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
127   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
128   if (!res) {
129     if (itcount) *itcount = 0;
130     ksp->reason = KSP_CONVERGED_ATOL;
131     PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
132     PetscFunctionReturn(PETSC_SUCCESS);
133   }
134   /* record the residual norm to test if deflation is needed */
135   res_old = res;
136 
137   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
138   while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
139     if (it) {
140       PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
141       PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
142     }
143     dgmres->it = (it - 1);
144     if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPDGMRESGetNewVectors(ksp, it + 1));
145     if (dgmres->r > 0) {
146       if (ksp->pc_side == PC_LEFT) {
147         /* Apply the first preconditioner */
148         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP));
149         /* Then apply Deflation as a preconditioner */
150         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it)));
151       } else if (ksp->pc_side == PC_RIGHT) {
152         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP));
153         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP));
154       }
155     } else {
156       PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP));
157     }
158     dgmres->matvecs += 1;
159     /* update Hessenberg matrix and do Gram-Schmidt */
160     PetscCall((*dgmres->orthog)(ksp, it));
161 
162     /* vv(i+1) . vv(i+1) */
163     PetscCall(VecNormalize(VEC_VV(it + 1), &tt));
164     /* save the magnitude */
165     *HH(it + 1, it)  = tt;
166     *HES(it + 1, it) = tt;
167 
168     /* check for the happy breakdown */
169     hapbnd = PetscAbsScalar(tt / *GRS(it));
170     if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
171     if (tt < hapbnd) {
172       PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt));
173       hapend = PETSC_TRUE;
174     }
175     PetscCall(KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res));
176 
177     it++;
178     dgmres->it = (it - 1); /* For converged */
179     ksp->its++;
180     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
181     else ksp->rnorm = 0.0;
182     if (ksp->reason) break;
183 
184     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
185 
186     /* Catch error in happy breakdown and signal convergence and break from loop */
187     if (hapend) {
188       if (!ksp->reason) {
189         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
190         ksp->reason = KSP_DIVERGED_BREAKDOWN;
191         break;
192       }
193     }
194   }
195 
196   if (itcount) *itcount = it;
197 
198   /*
199    Down here we have to solve for the "best" coefficients of the Krylov
200    columns, add the solution values together, and possibly unwind the
201    preconditioning from the solution
202    */
203   /* Form the solution (or the solution so far) */
204   PetscCall(KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1));
205 
206   /* Monitor if we know that we will not return for a restart */
207   if (ksp->reason == KSP_CONVERGED_ITERATING && ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
208   if (it && ksp->reason) {
209     PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
210     PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
211   }
212 
213   /* Compute data for the deflation to be used during the next restart */
214   if (!ksp->reason && ksp->its < ksp->max_it) {
215     test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
216     /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed  */
217     if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) PetscCall(KSPDGMRESComputeDeflationData(ksp, NULL));
218   }
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
KSPSolve_DGMRES(KSP ksp)222 PetscErrorCode KSPSolve_DGMRES(KSP ksp)
223 {
224   PetscInt    i, its = 0, itcount;
225   KSP_DGMRES *dgmres     = (KSP_DGMRES *)ksp->data;
226   PetscBool   guess_zero = ksp->guess_zero;
227 
228   PetscFunctionBegin;
229   PetscCheck(!ksp->calc_sings || dgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
230 
231   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
232   ksp->its        = 0;
233   dgmres->matvecs = 0;
234   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
235 
236   itcount = 0;
237   while (!ksp->reason) {
238     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
239     if (ksp->pc_side == PC_LEFT) {
240       dgmres->matvecs += 1;
241       if (dgmres->r > 0) {
242         PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP));
243         PetscCall(VecCopy(VEC_TEMP, VEC_VV(0)));
244       }
245     }
246 
247     PetscCall(KSPDGMRESCycle(&its, ksp));
248     itcount += its;
249     if (itcount >= ksp->max_it) {
250       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
251       break;
252     }
253     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
254   }
255   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
256 
257   for (i = 0; i < dgmres->r; i++) PetscCall(VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs"));
258   PetscFunctionReturn(PETSC_SUCCESS);
259 }
260 
KSPDestroy_DGMRES(KSP ksp)261 PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
262 {
263   KSP_DGMRES *dgmres   = (KSP_DGMRES *)ksp->data;
264   PetscInt    neig1    = dgmres->neig + EIG_OFFSET;
265   PetscInt    max_neig = dgmres->max_neig;
266 
267   PetscFunctionBegin;
268   if (dgmres->r) {
269     PetscCall(VecDestroyVecs(max_neig, &UU));
270     PetscCall(VecDestroyVecs(max_neig, &MU));
271     if (XX) {
272       PetscCall(VecDestroyVecs(neig1, &XX));
273       PetscCall(VecDestroyVecs(neig1, &MX));
274     }
275     PetscCall(PetscFree(TT));
276     PetscCall(PetscFree(TTF));
277     PetscCall(PetscFree(INVP));
278     PetscCall(PetscFree(XMX));
279     PetscCall(PetscFree(UMX));
280     PetscCall(PetscFree(XMU));
281     PetscCall(PetscFree(X1));
282     PetscCall(PetscFree(X2));
283     PetscCall(PetscFree(dgmres->work));
284     PetscCall(PetscFree(dgmres->iwork));
285     PetscCall(PetscFree(dgmres->wr));
286     PetscCall(PetscFree(dgmres->wi));
287     PetscCall(PetscFree(dgmres->modul));
288     PetscCall(PetscFree(dgmres->Q));
289     PetscCall(PetscFree(ORTH));
290     PetscCall(PetscFree(AUAU));
291     PetscCall(PetscFree(AUU));
292     PetscCall(PetscFree(SR2));
293   }
294   PetscCall(PetscFree(SR));
295   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL));
296   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL));
297   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL));
298   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL));
299   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL));
300   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL));
301   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL));
302   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL));
303   PetscCall(KSPDestroy_GMRES(ksp));
304   PetscFunctionReturn(PETSC_SUCCESS);
305 }
306 
307 /*
308  KSPDGMRESBuildSoln - create the solution from the starting vector and the
309  current iterates.
310 
311  Input parameters:
312  nrs - work area of size it + 1.
313  vs  - index of initial guess
314  vdest - index of result.  Note that vs may == vdest (replace
315  guess with the solution).
316 
317  This is an internal routine that knows about the GMRES internals.
318  */
KSPDGMRESBuildSoln(PetscScalar * nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)319 static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
320 {
321   PetscScalar tt;
322   PetscInt    ii, k, j;
323   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
324 
325   /* Solve for solution vector that minimizes the residual */
326 
327   PetscFunctionBegin;
328   /* If it is < 0, no gmres steps have been performed */
329   if (it < 0) {
330     PetscCall(VecCopy(vs, vdest)); /* VecCopy() is smart, exists immediately if vguess == vdest */
331     PetscFunctionReturn(PETSC_SUCCESS);
332   }
333   PetscCheck(*HH(it, it) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %" PetscInt_FMT " GRS(it) = %g", it, (double)PetscAbsScalar(*GRS(it)));
334   if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
335   else nrs[it] = 0.0;
336 
337   for (ii = 1; ii <= it; ii++) {
338     k  = it - ii;
339     tt = *GRS(k);
340     for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
341     PetscCheck(*HH(k, k) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is singular. HH(k,k) is identically zero; it = %" PetscInt_FMT " k = %" PetscInt_FMT, it, k);
342     nrs[k] = tt / *HH(k, k);
343   }
344 
345   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
346   PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &VEC_VV(0)));
347 
348   /* Apply deflation */
349   if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
350     PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP));
351     PetscCall(VecCopy(VEC_TEMP_MATOP, VEC_TEMP));
352   }
353   PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));
354 
355   /* add solution to previous solution */
356   if (vdest != vs) PetscCall(VecCopy(vs, vdest));
357   PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 
361 /*
362  Do the scalar work for the orthogonalization.  Return new residual norm.
363  */
KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal * res)364 static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
365 {
366   PetscScalar *hh, *cc, *ss, tt;
367   PetscInt     j;
368   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
369 
370   PetscFunctionBegin;
371   hh = HH(0, it);
372   cc = CC(0);
373   ss = SS(0);
374 
375   /* Apply all the previously computed plane rotations to the new column
376    of the Hessenberg matrix */
377   for (j = 1; j <= it; j++) {
378     tt  = *hh;
379     *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
380     hh++;
381     *hh = *cc++ * *hh - (*ss++ * tt);
382   }
383 
384   /*
385    compute the new plane rotation, and apply it to:
386    1) the right-hand side of the Hessenberg system
387    2) the new column of the Hessenberg matrix
388    thus obtaining the updated value of the residual
389    */
390   if (!hapend) {
391     tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
392     if (tt == 0.0) {
393       ksp->reason = KSP_DIVERGED_NULL;
394       PetscFunctionReturn(PETSC_SUCCESS);
395     }
396     *cc          = *hh / tt;
397     *ss          = *(hh + 1) / tt;
398     *GRS(it + 1) = -(*ss * *GRS(it));
399     *GRS(it)     = PetscConj(*cc) * *GRS(it);
400     *hh          = PetscConj(*cc) * *hh + *ss * *(hh + 1);
401     *res         = PetscAbsScalar(*GRS(it + 1));
402   } else {
403     /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
404      another rotation matrix (so RH doesn't change).  The new residual is
405      always the new sine term times the residual from last time (GRS(it)),
406      but now the new sine rotation would be zero...so the residual should
407      be zero...so we will multiply "zero" by the last residual.  This might
408      not be exactly what we want to do here -could just return "zero". */
409     *res = 0.0;
410   }
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 /*
415   Allocates more work vectors, starting from VEC_VV(it).
416 */
KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)417 static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
418 {
419   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
420   PetscInt    nwork  = dgmres->nwork_alloc, k, nalloc;
421 
422   PetscFunctionBegin;
423   nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
424   /* Adjust the number to allocate to make sure that we don't exceed the
425    number of available slots */
426   if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
427   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
428 
429   dgmres->vv_allocated += nalloc;
430 
431   PetscCall(KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL));
432 
433   dgmres->mwork_alloc[nwork] = nalloc;
434   for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
435   dgmres->nwork_alloc++;
436   PetscFunctionReturn(PETSC_SUCCESS);
437 }
438 
KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec * result)439 PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
440 {
441   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
442 
443   PetscFunctionBegin;
444   if (!ptr) {
445     if (!dgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &dgmres->sol_temp));
446     ptr = dgmres->sol_temp;
447   }
448   if (!dgmres->nrs) {
449     /* allocate the work area */
450     PetscCall(PetscMalloc1(dgmres->max_k, &dgmres->nrs));
451   }
452   PetscCall(KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it));
453   if (result) *result = ptr;
454   PetscFunctionReturn(PETSC_SUCCESS);
455 }
456 
KSPView_DGMRES(KSP ksp,PetscViewer viewer)457 static PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
458 {
459   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
460   PetscBool   isascii, isharmonic;
461 
462   PetscFunctionBegin;
463   PetscCall(KSPView_GMRES(ksp, viewer));
464   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
465   if (isascii) {
466     PetscCall(PetscViewerASCIIPrintf(viewer, "    Adaptive strategy is used: %s\n", PetscBools[dgmres->force]));
467     PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic));
468     if (isharmonic) {
469       PetscCall(PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig));
470     } else {
471       PetscCall(PetscViewerASCIIPrintf(viewer, "   Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig));
472     }
473     PetscCall(PetscViewerASCIIPrintf(viewer, "   Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r));
474     PetscCall(PetscViewerASCIIPrintf(viewer, "   Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig));
475     PetscCall(PetscViewerASCIIPrintf(viewer, "   relaxation parameter for the adaptive strategy(smv)  = %g\n", (double)dgmres->smv));
476     PetscCall(PetscViewerASCIIPrintf(viewer, "   Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs));
477   }
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)481 PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
482 {
483   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
484 
485   PetscFunctionBegin;
486   PetscCheck(neig >= 0 && neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of neig must be positive and less than the restart value ");
487   dgmres->neig = neig;
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)491 static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
492 {
493   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
494 
495   PetscFunctionBegin;
496   PetscCheck(max_neig >= 0 && max_neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of max_neig must be positive and less than the restart value ");
497   dgmres->max_neig = max_neig;
498   PetscFunctionReturn(PETSC_SUCCESS);
499 }
500 
KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)501 static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
502 {
503   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
504 
505   PetscFunctionBegin;
506   PetscCheck(ratio > 0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The relaxation parameter value must be positive");
507   dgmres->smv = ratio;
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)511 static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
512 {
513   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
514 
515   PetscFunctionBegin;
516   dgmres->force = force;
517   PetscFunctionReturn(PETSC_SUCCESS);
518 }
519 
KSPSetFromOptions_DGMRES(KSP ksp,PetscOptionItems PetscOptionsObject)520 PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems PetscOptionsObject)
521 {
522   PetscInt    neig;
523   PetscInt    max_neig;
524   KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
525   PetscBool   flg;
526 
527   PetscFunctionBegin;
528   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
529   PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
530   PetscCall(PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg));
531   if (flg) PetscCall(KSPDGMRESSetEigen(ksp, neig));
532   PetscCall(PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg));
533   if (flg) PetscCall(KSPDGMRESSetMaxEigen(ksp, max_neig));
534   PetscCall(PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL));
535   PetscCall(PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL));
536   PetscCall(PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL));
537   PetscOptionsHeadEnd();
538   PetscFunctionReturn(PETSC_SUCCESS);
539 }
540 
KSPDGMRESComputeDeflationData_DGMRES(KSP ksp,PetscInt * ExtrNeig)541 PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
542 {
543   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
544   PetscInt     i, j, k;
545   PetscBLASInt nr, bmax;
546   PetscInt     r = dgmres->r;
547   PetscInt     neig;                                 /* number of eigenvalues to extract at each restart */
548   PetscInt     neig1    = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
549   PetscInt     max_neig = dgmres->max_neig;          /* Max number of eigenvalues to extract during the iterative process */
550   PetscInt     N        = dgmres->max_k + 1;
551   PetscInt     n        = dgmres->it + 1;
552   PetscReal    alpha;
553 
554   PetscFunctionBegin;
555   PetscCall(PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
556   if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
557     PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
558     PetscFunctionReturn(PETSC_SUCCESS);
559   }
560 
561   PetscCall(KSPDGMRESComputeSchurForm(ksp, &neig));
562   /* Form the extended Schur vectors X=VV*Sr */
563   if (!XX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &XX));
564   for (j = 0; j < neig; j++) PetscCall(VecMAXPBY(XX[j], n, &SR[j * N], 0, &VEC_VV(0)));
565 
566   /* Orthogonalize X against U */
567   if (!ORTH) PetscCall(PetscMalloc1(max_neig, &ORTH));
568   if (r > 0) {
569     /* modified Gram-Schmidt */
570     for (j = 0; j < neig; j++) {
571       for (i = 0; i < r; i++) {
572         /* First, compute U'*X[j] */
573         PetscCall(VecDot(XX[j], UU[i], &alpha));
574         /* Then, compute X(j)=X(j)-U*U'*X(j) */
575         PetscCall(VecAXPY(XX[j], -alpha, UU[i]));
576       }
577     }
578   }
579   /* Compute MX = M^{-1}*A*X */
580   if (!MX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &MX));
581   for (j = 0; j < neig; j++) PetscCall(KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP));
582   dgmres->matvecs += neig;
583 
584   if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- expensive to do this */
585     PetscCall(KSPDGMRESImproveEig(ksp, neig));
586     PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
587     PetscFunctionReturn(PETSC_SUCCESS); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
588   }
589 
590   /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
591   if (!XMX) PetscCall(PetscMalloc1(neig1 * neig1, &XMX));
592   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, XX, &XMX[j * neig1]));
593 
594   if (r > 0) {
595     /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
596     if (!UMX) PetscCall(PetscMalloc1(max_neig * neig1, &UMX));
597     for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, UU, &UMX[j * max_neig]));
598     /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
599     if (!XMU) PetscCall(PetscMalloc1(max_neig * neig1, &XMU));
600     for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, XX, &XMU[j * neig1]));
601   }
602 
603   /* Form the new matrix T = [T UMX; XMU XMX]; */
604   if (!TT) PetscCall(PetscMalloc1(max_neig * max_neig, &TT));
605   if (r > 0) {
606     /* Add XMU to T */
607     for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&TT[max_neig * j + r], &XMU[neig1 * j], neig));
608     /* Add [UMX; XMX] to T */
609     for (j = 0; j < neig; j++) {
610       k = r + j;
611       PetscCall(PetscArraycpy(&TT[max_neig * k], &UMX[max_neig * j], r));
612       PetscCall(PetscArraycpy(&TT[max_neig * k + r], &XMX[neig1 * j], neig));
613     }
614   } else { /* Add XMX to T */
615     for (j = 0; j < neig; j++) PetscCall(PetscArraycpy(&TT[max_neig * j], &XMX[neig1 * j], neig));
616   }
617 
618   dgmres->r += neig;
619   r = dgmres->r;
620   PetscCall(PetscBLASIntCast(r, &nr));
621   /*LU Factorize T with Lapack xgetrf routine */
622 
623   PetscCall(PetscBLASIntCast(max_neig, &bmax));
624   if (!TTF) PetscCall(PetscMalloc1(bmax * bmax, &TTF));
625   PetscCall(PetscArraycpy(TTF, TT, bmax * r));
626   if (!INVP) PetscCall(PetscMalloc1(bmax, &INVP));
627   {
628     PetscBLASInt info;
629     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
630     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
631   }
632 
633   /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
634   if (!UU) {
635     PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &UU));
636     PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &MU));
637   }
638   for (j = 0; j < neig; j++) {
639     PetscCall(VecCopy(XX[j], UU[r - neig + j]));
640     PetscCall(VecCopy(MX[j], MU[r - neig + j]));
641   }
642   PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
643   PetscFunctionReturn(PETSC_SUCCESS);
644 }
645 
KSPDGMRESComputeSchurForm_DGMRES(KSP ksp,PetscInt * neig)646 PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
647 {
648   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
649   PetscInt      N = dgmres->max_k + 1, n = dgmres->it + 1;
650   PetscBLASInt  bn;
651   PetscReal    *A;
652   PetscBLASInt  ihi;
653   PetscBLASInt  ldA = 0; /* leading dimension of A */
654   PetscBLASInt  ldQ;     /* leading dimension of Q */
655   PetscReal    *Q;       /*  orthogonal matrix of  (left) Schur vectors */
656   PetscReal    *work;    /* working vector */
657   PetscBLASInt  lwork;   /* size of the working vector */
658   PetscInt     *perm;    /* Permutation vector to sort eigenvalues */
659   PetscInt      i, j;
660   PetscBLASInt  NbrEig;          /* Number of eigenvalues really extracted */
661   PetscReal    *wr, *wi, *modul; /* Real and imaginary part and modulus of the eigenvalues of A */
662   PetscBLASInt *select;
663   PetscBLASInt *iwork;
664   PetscBLASInt  liwork;
665   PetscScalar  *Ht;   /* Transpose of the Hessenberg matrix */
666   PetscScalar  *t;    /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
667   PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
668   PetscBool     flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
669 
670   PetscFunctionBegin;
671   PetscCall(PetscBLASIntCast(n, &bn));
672   PetscCall(PetscBLASIntCast(N, &ldA));
673   ihi = ldQ = bn;
674   PetscCall(PetscBLASIntCast(5 * N, &lwork));
675 
676 #if defined(PETSC_USE_COMPLEX)
677   SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
678 #endif
679 
680   PetscCall(PetscMalloc1(ldA * ldA, &A));
681   PetscCall(PetscMalloc1(ldQ * n, &Q));
682   PetscCall(PetscMalloc1(lwork, &work));
683   if (!dgmres->wr) {
684     PetscCall(PetscMalloc1(n, &dgmres->wr));
685     PetscCall(PetscMalloc1(n, &dgmres->wi));
686   }
687   wr = dgmres->wr;
688   wi = dgmres->wi;
689   PetscCall(PetscMalloc1(n, &modul));
690   PetscCall(PetscMalloc1(n, &perm));
691   /* copy the Hessenberg matrix to work space */
692   PetscCall(PetscArraycpy(A, dgmres->hes_origin, ldA * ldA));
693   PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag));
694   if (flag) {
695     /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
696     /* Transpose the Hessenberg matrix */
697     PetscCall(PetscMalloc1(bn * bn, &Ht));
698     for (i = 0; i < bn; i++) {
699       for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
700     }
701 
702     /* Solve the system H^T*t = h_{m+1,m}e_m */
703     PetscCall(PetscCalloc1(bn, &t));
704     t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
705     PetscCall(PetscMalloc1(bn, &ipiv));
706     /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
707     {
708       PetscBLASInt info;
709       PetscBLASInt nrhs = 1;
710       PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
711       PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error while calling the Lapack routine DGESV");
712     }
713     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
714     for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
715     PetscCall(PetscFree(t));
716     PetscCall(PetscFree(Ht));
717   }
718   /* Compute eigenvalues with the Schur form */
719   {
720     PetscBLASInt info = 0;
721     PetscBLASInt ilo  = 1;
722     PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
723     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XHSEQR %" PetscBLASInt_FMT, info);
724   }
725   PetscCall(PetscFree(work));
726 
727   /* sort the eigenvalues */
728   for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
729   for (i = 0; i < n; i++) perm[i] = i;
730 
731   PetscCall(PetscSortRealWithPermutation(n, modul, perm));
732   /* save the complex modulus of the largest eigenvalue in magnitude */
733   if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
734   /* count the number of extracted eigenvalues (with complex conjugates) */
735   NbrEig = 0;
736   while (NbrEig < dgmres->neig) {
737     if (wi[perm[NbrEig]] != 0) NbrEig += 2;
738     else NbrEig += 1;
739   }
740   /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
741 
742   PetscCall(PetscCalloc1(n, &select));
743 
744   if (!dgmres->GreatestEig) {
745     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
746   } else {
747     for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
748   }
749   /* call Lapack dtrsen */
750   lwork  = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
751   liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
752   PetscCall(PetscMalloc1(lwork, &work));
753   PetscCall(PetscMalloc1(liwork, &iwork));
754   {
755     PetscBLASInt info = 0;
756     PetscReal    CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
757     PetscReal    CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
758     PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
759     PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ILL-CONDITIONED PROBLEM");
760   }
761   PetscCall(PetscFree(select));
762 
763   /* Extract the Schur vectors */
764   for (j = 0; j < NbrEig; j++) PetscCall(PetscArraycpy(&SR[j * N], &Q[j * ldQ], n));
765   *neig = NbrEig;
766   PetscCall(PetscFree(A));
767   PetscCall(PetscFree(work));
768   PetscCall(PetscFree(perm));
769   PetscCall(PetscFree(work));
770   PetscCall(PetscFree(iwork));
771   PetscCall(PetscFree(modul));
772   PetscCall(PetscFree(Q));
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
KSPDGMRESApplyDeflation_DGMRES(KSP ksp,Vec x,Vec y)776 PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
777 {
778   KSP_DGMRES  *dgmres = (KSP_DGMRES *)ksp->data;
779   PetscInt     i, r = dgmres->r;
780   PetscReal    alpha    = 1.0;
781   PetscInt     max_neig = dgmres->max_neig;
782   PetscBLASInt br, bmax;
783   PetscReal    lambda = dgmres->lambdaN;
784 
785   PetscFunctionBegin;
786   PetscCall(PetscBLASIntCast(r, &br));
787   PetscCall(PetscBLASIntCast(max_neig, &bmax));
788   PetscCall(PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
789   if (!r) {
790     PetscCall(VecCopy(x, y));
791     PetscFunctionReturn(PETSC_SUCCESS);
792   }
793   /* Compute U'*x */
794   if (!X1) {
795     PetscCall(PetscMalloc1(bmax, &X1));
796     PetscCall(PetscMalloc1(bmax, &X2));
797   }
798   PetscCall(VecMDot(x, r, UU, X1));
799 
800   /* Solve T*X1=X2 for X1*/
801   PetscCall(PetscArraycpy(X2, X1, br));
802   {
803     PetscBLASInt info;
804     PetscBLASInt nrhs = 1;
805     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
806     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRS %" PetscBLASInt_FMT, info);
807   }
808   /* Iterative refinement -- is it really necessary ?? */
809   if (!WORK) {
810     PetscCall(PetscMalloc1(3 * bmax, &WORK));
811     PetscCall(PetscMalloc1(bmax, &IWORK));
812   }
813   {
814     PetscBLASInt info;
815     PetscReal    berr, ferr;
816     PetscBLASInt nrhs = 1;
817     PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
818     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGERFS %" PetscBLASInt_FMT, info);
819   }
820 
821   for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];
822 
823   /* Compute X2=U*X2 */
824   PetscCall(VecMAXPBY(y, r, X2, 0, UU));
825   PetscCall(VecAXPY(y, alpha, x));
826 
827   PetscCall(PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
KSPDGMRESImproveEig_DGMRES(KSP ksp,PetscInt neig)831 static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
832 {
833   KSP_DGMRES   *dgmres = (KSP_DGMRES *)ksp->data;
834   PetscInt      j, r_old, r = dgmres->r;
835   PetscBLASInt  i     = 0;
836   PetscInt      neig1 = dgmres->neig + EIG_OFFSET;
837   PetscInt      bmax  = dgmres->max_neig;
838   PetscInt      aug   = r + neig;       /* actual size of the augmented invariant basis */
839   PetscInt      aug1  = bmax + neig1;   /* maximum size of the augmented invariant basis */
840   PetscBLASInt  ldA;                    /* leading dimension of AUAU and AUU*/
841   PetscBLASInt  N;                      /* size of AUAU */
842   PetscReal    *Q;                      /*  orthogonal matrix of  (left) schur vectors */
843   PetscReal    *Z;                      /*  orthogonal matrix of  (right) schur vectors */
844   PetscReal    *work;                   /* working vector */
845   PetscBLASInt  lwork;                  /* size of the working vector */
846   PetscInt     *perm;                   /* Permutation vector to sort eigenvalues */
847   PetscReal    *wr, *wi, *beta, *modul; /* Real and imaginary part and modulus of the eigenvalues of A*/
848   PetscBLASInt  NbrEig = 0, nr, bm;
849   PetscBLASInt *select;
850   PetscBLASInt  liwork, *iwork;
851 
852   PetscFunctionBegin;
853   /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
854   if (!AUU) {
855     PetscCall(PetscMalloc1(aug1 * aug1, &AUU));
856     PetscCall(PetscMalloc1(aug1 * aug1, &AUAU));
857   }
858   /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
859    * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
860   /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
861   for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], r, MU, &AUU[j * aug1]));
862   /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
863   for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]));
864   /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
865   for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]));
866   /* (MX)'*X size (neig neig) --  store in AUU from the column <r> and the row <r>*/
867   for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]));
868 
869   /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
870   /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
871   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, MU, &AUAU[j * aug1]));
872   /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
873   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]));
874   /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
875   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]));
876   /* (MX)'*MX size (neig neig) --  store in AUAU from the column <r> and the row <r>*/
877   for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]));
878 
879   /* Computation of the eigenvectors */
880   PetscCall(PetscBLASIntCast(aug1, &ldA));
881   PetscCall(PetscBLASIntCast(aug, &N));
882   lwork = 8 * N + 20; /* sizeof the working space */
883   PetscCall(PetscMalloc1(N, &wr));
884   PetscCall(PetscMalloc1(N, &wi));
885   PetscCall(PetscMalloc1(N, &beta));
886   PetscCall(PetscMalloc1(N, &modul));
887   PetscCall(PetscMalloc1(N, &perm));
888   PetscCall(PetscMalloc1(N * N, &Q));
889   PetscCall(PetscMalloc1(N * N, &Z));
890   PetscCall(PetscMalloc1(lwork, &work));
891   {
892     PetscBLASInt info = 0;
893     PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
894     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGGES %" PetscBLASInt_FMT, info);
895   }
896   for (i = 0; i < N; i++) {
897     if (beta[i] != 0.0) {
898       wr[i] /= beta[i];
899       wi[i] /= beta[i];
900     }
901   }
902   /* sort the eigenvalues */
903   for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
904   for (i = 0; i < N; i++) perm[i] = i;
905   PetscCall(PetscSortRealWithPermutation(N, modul, perm));
906   /* Save the norm of the largest eigenvalue */
907   if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
908   /* Allocate space to extract the first r schur vectors   */
909   if (!SR2) PetscCall(PetscMalloc1(aug1 * bmax, &SR2));
910   /* count the number of extracted eigenvalues (complex conjugates count as 2) */
911   while (NbrEig < bmax) {
912     if (wi[perm[NbrEig]] == 0) NbrEig += 1;
913     else NbrEig += 2;
914   }
915   if (NbrEig > bmax) PetscCall(PetscBLASIntCast(bmax - 1, &NbrEig));
916   r_old     = r; /* previous size of r */
917   dgmres->r = r = NbrEig;
918 
919   /* Select the eigenvalues to reorder */
920   PetscCall(PetscCalloc1(N, &select));
921   if (!dgmres->GreatestEig) {
922     for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
923   } else {
924     for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
925   }
926   /* Reorder and extract the new <r> schur vectors */
927   lwork  = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
928   liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
929   PetscCall(PetscFree(work));
930   PetscCall(PetscMalloc1(lwork, &work));
931   PetscCall(PetscMalloc1(liwork, &iwork));
932   {
933     PetscBLASInt info = 0;
934     PetscReal    Dif[2];
935     PetscBLASInt ijob  = 2;
936     PetscBLASInt wantQ = 1, wantZ = 1;
937     PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &Dif[0], work, &lwork, iwork, &liwork, &info));
938     PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
939   }
940   PetscCall(PetscFree(select));
941 
942   for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&SR2[j * aug1], &Z[j * N], N));
943 
944   /* Multiply the Schur vectors SR2 by U (and X)  to get a new U
945    -- save it temporarily in MU */
946   for (j = 0; j < r; j++) {
947     PetscCall(VecMAXPBY(MU[j], r_old, &SR2[j * aug1], 0, UU));
948     PetscCall(VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX));
949   }
950   /* Form T = U'*MU*U */
951   for (j = 0; j < r; j++) {
952     PetscCall(VecCopy(MU[j], UU[j]));
953     PetscCall(KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP));
954   }
955   dgmres->matvecs += r;
956   for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, UU, &TT[j * bmax]));
957   /* Factorize T */
958   PetscCall(PetscArraycpy(TTF, TT, bmax * r));
959   PetscCall(PetscBLASIntCast(r, &nr));
960   PetscCall(PetscBLASIntCast(bmax, &bm));
961   {
962     PetscBLASInt info;
963     PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
964     PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
965   }
966   /* Free Memory */
967   PetscCall(PetscFree(wr));
968   PetscCall(PetscFree(wi));
969   PetscCall(PetscFree(beta));
970   PetscCall(PetscFree(modul));
971   PetscCall(PetscFree(perm));
972   PetscCall(PetscFree(Q));
973   PetscCall(PetscFree(Z));
974   PetscCall(PetscFree(work));
975   PetscCall(PetscFree(iwork));
976   PetscFunctionReturn(PETSC_SUCCESS);
977 }
978 
979 /*MC
980    KSPDGMRES - Implements the deflated GMRES as defined in {cite}`erhel1996restarted` and {cite}`wakam2013memory`
981 
982    Options Database Keys:
983    GMRES Options (inherited):
984 +   -ksp_gmres_restart <restart>                                                - the number of Krylov directions to orthogonalize against
985 .   -ksp_gmres_haptol <tol>                                                     - sets the tolerance for "happy ending" (exact convergence)
986 .   -ksp_gmres_preallocate                                                      - preallocate all the Krylov search directions initially
987                                                                                   (otherwise groups of vectors are allocated as needed)
988 .   -ksp_gmres_classicalgramschmidt                                             - use classical (unmodified) Gram-Schmidt to orthogonalize against
989                                                                                   the Krylov space (fast) (the default)
990 .   -ksp_gmres_modifiedgramschmidt                                              - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
991 .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
992                                                                                   stability of the classical Gram-Schmidt orthogonalization.
993 -   -ksp_gmres_krylov_monitor                                                   - plot the Krylov space generated
994 
995    DGMRES Options Database Keys:
996 +   -ksp_dgmres_eigen <neig>                     - number of smallest eigenvalues to extract at each restart
997 .   -ksp_dgmres_max_eigen <max_neig>             - maximum number of eigenvalues that can be extracted during the iterative process
998 .   -ksp_dgmres_force                            - use the deflation at each restart; switch off the adaptive strategy.
999 -   -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1000                                                    parsed by `PetscOptionsCreateViewer()`.  If neig > 1, viewerspec should
1001                                                    end with ":append".  No vectors will be viewed if the adaptive
1002                                                    strategy chooses not to deflate, so -ksp_dgmres_force should also
1003                                                    be given.
1004                                                    The deflation vectors span a subspace that may be a good
1005                                                    approximation of the subspace of smallest eigenvectors of the
1006                                                    preconditioned operator, so this option can aid in understanding
1007                                                    the performance of a preconditioner.
1008 
1009    Level: beginner
1010 
1011    Notes:
1012    Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported
1013 
1014    In this implementation, the adaptive strategy allows switching to deflated GMRES when the stagnation occurs.
1015 
1016    Contributed by:
1017    Desire NUENTSA WAKAM, INRIA
1018 
1019 .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1020            `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1021            `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1022            `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1023  M*/
1024 
KSPCreate_DGMRES(KSP ksp)1025 PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1026 {
1027   KSP_DGMRES *dgmres;
1028 
1029   PetscFunctionBegin;
1030   PetscCall(PetscNew(&dgmres));
1031   ksp->data = (void *)dgmres;
1032 
1033   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
1034   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
1035   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
1036 
1037   ksp->ops->buildsolution                = KSPBuildSolution_DGMRES;
1038   ksp->ops->setup                        = KSPSetUp_DGMRES;
1039   ksp->ops->solve                        = KSPSolve_DGMRES;
1040   ksp->ops->destroy                      = KSPDestroy_DGMRES;
1041   ksp->ops->view                         = KSPView_DGMRES;
1042   ksp->ops->setfromoptions               = KSPSetFromOptions_DGMRES;
1043   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1044   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
1045 
1046   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
1047   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
1048   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
1049   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
1050   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
1051   /* -- New functions defined in DGMRES -- */
1052   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
1053   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES));
1054   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES));
1055   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES));
1056   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
1057   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
1058   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));
1059   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES));
1060 
1061   PetscCall(PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData));
1062   PetscCall(PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation));
1063 
1064   dgmres->haptol         = 1.0e-30;
1065   dgmres->q_preallocate  = PETSC_FALSE;
1066   dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1067   dgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
1068   dgmres->nrs            = NULL;
1069   dgmres->sol_temp       = NULL;
1070   dgmres->max_k          = GMRES_DEFAULT_MAXK;
1071   dgmres->Rsvd           = NULL;
1072   dgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
1073   dgmres->orthogwork     = NULL;
1074 
1075   /* Default values for the deflation */
1076   dgmres->r           = 0;
1077   dgmres->neig        = DGMRES_DEFAULT_EIG;
1078   dgmres->max_neig    = DGMRES_DEFAULT_MAXEIG - 1;
1079   dgmres->lambdaN     = 0.0;
1080   dgmres->smv         = SMV;
1081   dgmres->matvecs     = 0;
1082   dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1083   dgmres->HasSchur    = PETSC_FALSE;
1084   PetscFunctionReturn(PETSC_SUCCESS);
1085 }
1086