1 /*
2 Implements deflated GMRES.
3 */
4
5 #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h> /*I "petscksp.h" I*/
6
7 PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
8
9 static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
10 static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
11 static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
12
KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)13 static PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
14 {
15 PetscFunctionBegin;
16 PetscTryMethod(ksp, "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
17 PetscFunctionReturn(PETSC_SUCCESS);
18 }
KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)19 static PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
20 {
21 PetscFunctionBegin;
22 PetscTryMethod(ksp, "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
23 PetscFunctionReturn(PETSC_SUCCESS);
24 }
KSPDGMRESComputeSchurForm(KSP ksp,PetscInt * neig)25 static PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
26 {
27 PetscFunctionBegin;
28 PetscUseMethod(ksp, "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
29 PetscFunctionReturn(PETSC_SUCCESS);
30 }
KSPDGMRESComputeDeflationData(KSP ksp,PetscInt * curneigh)31 PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
32 {
33 PetscFunctionBegin;
34 PetscUseMethod(ksp, "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
35 PetscFunctionReturn(PETSC_SUCCESS);
36 }
KSPDGMRESApplyDeflation(KSP ksp,Vec x,Vec y)37 static PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
38 {
39 PetscFunctionBegin;
40 PetscUseMethod(ksp, "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
41 PetscFunctionReturn(PETSC_SUCCESS);
42 }
43
KSPDGMRESImproveEig(KSP ksp,PetscInt neig)44 static PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
45 {
46 PetscFunctionBegin;
47 PetscUseMethod(ksp, "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
48 PetscFunctionReturn(PETSC_SUCCESS);
49 }
50
KSPSetUp_DGMRES(KSP ksp)51 PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
52 {
53 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
54 PetscInt neig = dgmres->neig + EIG_OFFSET;
55 PetscInt max_k = dgmres->max_k + 1;
56
57 PetscFunctionBegin;
58 PetscCall(KSPSetUp_GMRES(ksp));
59 if (!dgmres->neig) PetscFunctionReturn(PETSC_SUCCESS);
60
61 /* Allocate workspace for the Schur vectors*/
62 PetscCall(PetscMalloc1(neig * max_k, &SR));
63 dgmres->wr = NULL;
64 dgmres->wi = NULL;
65 dgmres->perm = NULL;
66 dgmres->modul = NULL;
67 dgmres->Q = NULL;
68 dgmres->Z = NULL;
69
70 UU = NULL;
71 XX = NULL;
72 MX = NULL;
73 AUU = NULL;
74 XMX = NULL;
75 XMU = NULL;
76 UMX = NULL;
77 AUAU = NULL;
78 TT = NULL;
79 TTF = NULL;
80 INVP = NULL;
81 X1 = NULL;
82 X2 = NULL;
83 MU = NULL;
84 PetscFunctionReturn(PETSC_SUCCESS);
85 }
86
87 /*
88 Run GMRES, possibly with restart. Return residual history if requested.
89 input parameters:
90
91 . gmres - structure containing parameters and work areas
92
93 output parameters:
94 . nres - residuals (from preconditioned system) at each step.
95 If restarting, consider passing nres+it. If null,
96 ignored
97 . itcount - number of iterations used. nres[0] to nres[itcount]
98 are defined. If null, ignored.
99
100 Notes:
101 On entry, the value in vector VEC_VV(0) should be the initial residual
102 (this allows shortcuts where the initial preconditioned residual is 0).
103 */
KSPDGMRESCycle(PetscInt * itcount,KSP ksp)104 static PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
105 {
106 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
107 PetscReal res_norm, res, hapbnd, tt;
108 PetscInt it = 0;
109 PetscInt max_k = dgmres->max_k;
110 PetscBool hapend = PETSC_FALSE;
111 PetscReal res_old;
112 PetscInt test = 0;
113
114 PetscFunctionBegin;
115 PetscCall(VecNormalize(VEC_VV(0), &res_norm));
116 KSPCheckNorm(ksp, res_norm);
117 res = res_norm;
118 *GRS(0) = res_norm;
119
120 /* check for the convergence */
121 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
122 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
123 else ksp->rnorm = 0.0;
124 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
125 dgmres->it = (it - 1);
126 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
127 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
128 if (!res) {
129 if (itcount) *itcount = 0;
130 ksp->reason = KSP_CONVERGED_ATOL;
131 PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
132 PetscFunctionReturn(PETSC_SUCCESS);
133 }
134 /* record the residual norm to test if deflation is needed */
135 res_old = res;
136
137 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
138 while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
139 if (it) {
140 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
141 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
142 }
143 dgmres->it = (it - 1);
144 if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPDGMRESGetNewVectors(ksp, it + 1));
145 if (dgmres->r > 0) {
146 if (ksp->pc_side == PC_LEFT) {
147 /* Apply the first preconditioner */
148 PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP));
149 /* Then apply Deflation as a preconditioner */
150 PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it)));
151 } else if (ksp->pc_side == PC_RIGHT) {
152 PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP));
153 PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP));
154 }
155 } else {
156 PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP));
157 }
158 dgmres->matvecs += 1;
159 /* update Hessenberg matrix and do Gram-Schmidt */
160 PetscCall((*dgmres->orthog)(ksp, it));
161
162 /* vv(i+1) . vv(i+1) */
163 PetscCall(VecNormalize(VEC_VV(it + 1), &tt));
164 /* save the magnitude */
165 *HH(it + 1, it) = tt;
166 *HES(it + 1, it) = tt;
167
168 /* check for the happy breakdown */
169 hapbnd = PetscAbsScalar(tt / *GRS(it));
170 if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
171 if (tt < hapbnd) {
172 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt));
173 hapend = PETSC_TRUE;
174 }
175 PetscCall(KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res));
176
177 it++;
178 dgmres->it = (it - 1); /* For converged */
179 ksp->its++;
180 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
181 else ksp->rnorm = 0.0;
182 if (ksp->reason) break;
183
184 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
185
186 /* Catch error in happy breakdown and signal convergence and break from loop */
187 if (hapend) {
188 if (!ksp->reason) {
189 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
190 ksp->reason = KSP_DIVERGED_BREAKDOWN;
191 break;
192 }
193 }
194 }
195
196 if (itcount) *itcount = it;
197
198 /*
199 Down here we have to solve for the "best" coefficients of the Krylov
200 columns, add the solution values together, and possibly unwind the
201 preconditioning from the solution
202 */
203 /* Form the solution (or the solution so far) */
204 PetscCall(KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1));
205
206 /* Monitor if we know that we will not return for a restart */
207 if (ksp->reason == KSP_CONVERGED_ITERATING && ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
208 if (it && ksp->reason) {
209 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
210 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
211 }
212
213 /* Compute data for the deflation to be used during the next restart */
214 if (!ksp->reason && ksp->its < ksp->max_it) {
215 test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
216 /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
217 if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) PetscCall(KSPDGMRESComputeDeflationData(ksp, NULL));
218 }
219 PetscFunctionReturn(PETSC_SUCCESS);
220 }
221
KSPSolve_DGMRES(KSP ksp)222 PetscErrorCode KSPSolve_DGMRES(KSP ksp)
223 {
224 PetscInt i, its = 0, itcount;
225 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
226 PetscBool guess_zero = ksp->guess_zero;
227
228 PetscFunctionBegin;
229 PetscCheck(!ksp->calc_sings || dgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
230
231 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
232 ksp->its = 0;
233 dgmres->matvecs = 0;
234 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
235
236 itcount = 0;
237 while (!ksp->reason) {
238 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
239 if (ksp->pc_side == PC_LEFT) {
240 dgmres->matvecs += 1;
241 if (dgmres->r > 0) {
242 PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP));
243 PetscCall(VecCopy(VEC_TEMP, VEC_VV(0)));
244 }
245 }
246
247 PetscCall(KSPDGMRESCycle(&its, ksp));
248 itcount += its;
249 if (itcount >= ksp->max_it) {
250 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
251 break;
252 }
253 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
254 }
255 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
256
257 for (i = 0; i < dgmres->r; i++) PetscCall(VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs"));
258 PetscFunctionReturn(PETSC_SUCCESS);
259 }
260
KSPDestroy_DGMRES(KSP ksp)261 PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
262 {
263 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
264 PetscInt neig1 = dgmres->neig + EIG_OFFSET;
265 PetscInt max_neig = dgmres->max_neig;
266
267 PetscFunctionBegin;
268 if (dgmres->r) {
269 PetscCall(VecDestroyVecs(max_neig, &UU));
270 PetscCall(VecDestroyVecs(max_neig, &MU));
271 if (XX) {
272 PetscCall(VecDestroyVecs(neig1, &XX));
273 PetscCall(VecDestroyVecs(neig1, &MX));
274 }
275 PetscCall(PetscFree(TT));
276 PetscCall(PetscFree(TTF));
277 PetscCall(PetscFree(INVP));
278 PetscCall(PetscFree(XMX));
279 PetscCall(PetscFree(UMX));
280 PetscCall(PetscFree(XMU));
281 PetscCall(PetscFree(X1));
282 PetscCall(PetscFree(X2));
283 PetscCall(PetscFree(dgmres->work));
284 PetscCall(PetscFree(dgmres->iwork));
285 PetscCall(PetscFree(dgmres->wr));
286 PetscCall(PetscFree(dgmres->wi));
287 PetscCall(PetscFree(dgmres->modul));
288 PetscCall(PetscFree(dgmres->Q));
289 PetscCall(PetscFree(ORTH));
290 PetscCall(PetscFree(AUAU));
291 PetscCall(PetscFree(AUU));
292 PetscCall(PetscFree(SR2));
293 }
294 PetscCall(PetscFree(SR));
295 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL));
296 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL));
297 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL));
298 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL));
299 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL));
300 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL));
301 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL));
302 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL));
303 PetscCall(KSPDestroy_GMRES(ksp));
304 PetscFunctionReturn(PETSC_SUCCESS);
305 }
306
307 /*
308 KSPDGMRESBuildSoln - create the solution from the starting vector and the
309 current iterates.
310
311 Input parameters:
312 nrs - work area of size it + 1.
313 vs - index of initial guess
314 vdest - index of result. Note that vs may == vdest (replace
315 guess with the solution).
316
317 This is an internal routine that knows about the GMRES internals.
318 */
KSPDGMRESBuildSoln(PetscScalar * nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)319 static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
320 {
321 PetscScalar tt;
322 PetscInt ii, k, j;
323 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
324
325 /* Solve for solution vector that minimizes the residual */
326
327 PetscFunctionBegin;
328 /* If it is < 0, no gmres steps have been performed */
329 if (it < 0) {
330 PetscCall(VecCopy(vs, vdest)); /* VecCopy() is smart, exists immediately if vguess == vdest */
331 PetscFunctionReturn(PETSC_SUCCESS);
332 }
333 PetscCheck(*HH(it, it) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %" PetscInt_FMT " GRS(it) = %g", it, (double)PetscAbsScalar(*GRS(it)));
334 if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
335 else nrs[it] = 0.0;
336
337 for (ii = 1; ii <= it; ii++) {
338 k = it - ii;
339 tt = *GRS(k);
340 for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
341 PetscCheck(*HH(k, k) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is singular. HH(k,k) is identically zero; it = %" PetscInt_FMT " k = %" PetscInt_FMT, it, k);
342 nrs[k] = tt / *HH(k, k);
343 }
344
345 /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
346 PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &VEC_VV(0)));
347
348 /* Apply deflation */
349 if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
350 PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP));
351 PetscCall(VecCopy(VEC_TEMP_MATOP, VEC_TEMP));
352 }
353 PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));
354
355 /* add solution to previous solution */
356 if (vdest != vs) PetscCall(VecCopy(vs, vdest));
357 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
358 PetscFunctionReturn(PETSC_SUCCESS);
359 }
360
361 /*
362 Do the scalar work for the orthogonalization. Return new residual norm.
363 */
KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal * res)364 static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
365 {
366 PetscScalar *hh, *cc, *ss, tt;
367 PetscInt j;
368 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
369
370 PetscFunctionBegin;
371 hh = HH(0, it);
372 cc = CC(0);
373 ss = SS(0);
374
375 /* Apply all the previously computed plane rotations to the new column
376 of the Hessenberg matrix */
377 for (j = 1; j <= it; j++) {
378 tt = *hh;
379 *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
380 hh++;
381 *hh = *cc++ * *hh - (*ss++ * tt);
382 }
383
384 /*
385 compute the new plane rotation, and apply it to:
386 1) the right-hand side of the Hessenberg system
387 2) the new column of the Hessenberg matrix
388 thus obtaining the updated value of the residual
389 */
390 if (!hapend) {
391 tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
392 if (tt == 0.0) {
393 ksp->reason = KSP_DIVERGED_NULL;
394 PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 *cc = *hh / tt;
397 *ss = *(hh + 1) / tt;
398 *GRS(it + 1) = -(*ss * *GRS(it));
399 *GRS(it) = PetscConj(*cc) * *GRS(it);
400 *hh = PetscConj(*cc) * *hh + *ss * *(hh + 1);
401 *res = PetscAbsScalar(*GRS(it + 1));
402 } else {
403 /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
404 another rotation matrix (so RH doesn't change). The new residual is
405 always the new sine term times the residual from last time (GRS(it)),
406 but now the new sine rotation would be zero...so the residual should
407 be zero...so we will multiply "zero" by the last residual. This might
408 not be exactly what we want to do here -could just return "zero". */
409 *res = 0.0;
410 }
411 PetscFunctionReturn(PETSC_SUCCESS);
412 }
413
414 /*
415 Allocates more work vectors, starting from VEC_VV(it).
416 */
KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)417 static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
418 {
419 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
420 PetscInt nwork = dgmres->nwork_alloc, k, nalloc;
421
422 PetscFunctionBegin;
423 nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
424 /* Adjust the number to allocate to make sure that we don't exceed the
425 number of available slots */
426 if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
427 if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
428
429 dgmres->vv_allocated += nalloc;
430
431 PetscCall(KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL));
432
433 dgmres->mwork_alloc[nwork] = nalloc;
434 for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
435 dgmres->nwork_alloc++;
436 PetscFunctionReturn(PETSC_SUCCESS);
437 }
438
KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec * result)439 PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
440 {
441 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
442
443 PetscFunctionBegin;
444 if (!ptr) {
445 if (!dgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &dgmres->sol_temp));
446 ptr = dgmres->sol_temp;
447 }
448 if (!dgmres->nrs) {
449 /* allocate the work area */
450 PetscCall(PetscMalloc1(dgmres->max_k, &dgmres->nrs));
451 }
452 PetscCall(KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it));
453 if (result) *result = ptr;
454 PetscFunctionReturn(PETSC_SUCCESS);
455 }
456
KSPView_DGMRES(KSP ksp,PetscViewer viewer)457 static PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
458 {
459 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
460 PetscBool isascii, isharmonic;
461
462 PetscFunctionBegin;
463 PetscCall(KSPView_GMRES(ksp, viewer));
464 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
465 if (isascii) {
466 PetscCall(PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: %s\n", PetscBools[dgmres->force]));
467 PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic));
468 if (isharmonic) {
469 PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig));
470 } else {
471 PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig));
472 }
473 PetscCall(PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r));
474 PetscCall(PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig));
475 PetscCall(PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", (double)dgmres->smv));
476 PetscCall(PetscViewerASCIIPrintf(viewer, " Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs));
477 }
478 PetscFunctionReturn(PETSC_SUCCESS);
479 }
480
KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)481 PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
482 {
483 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
484
485 PetscFunctionBegin;
486 PetscCheck(neig >= 0 && neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of neig must be positive and less than the restart value ");
487 dgmres->neig = neig;
488 PetscFunctionReturn(PETSC_SUCCESS);
489 }
490
KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)491 static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
492 {
493 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
494
495 PetscFunctionBegin;
496 PetscCheck(max_neig >= 0 && max_neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of max_neig must be positive and less than the restart value ");
497 dgmres->max_neig = max_neig;
498 PetscFunctionReturn(PETSC_SUCCESS);
499 }
500
KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)501 static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
502 {
503 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
504
505 PetscFunctionBegin;
506 PetscCheck(ratio > 0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The relaxation parameter value must be positive");
507 dgmres->smv = ratio;
508 PetscFunctionReturn(PETSC_SUCCESS);
509 }
510
KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)511 static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
512 {
513 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
514
515 PetscFunctionBegin;
516 dgmres->force = force;
517 PetscFunctionReturn(PETSC_SUCCESS);
518 }
519
KSPSetFromOptions_DGMRES(KSP ksp,PetscOptionItems PetscOptionsObject)520 PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems PetscOptionsObject)
521 {
522 PetscInt neig;
523 PetscInt max_neig;
524 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
525 PetscBool flg;
526
527 PetscFunctionBegin;
528 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
529 PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
530 PetscCall(PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg));
531 if (flg) PetscCall(KSPDGMRESSetEigen(ksp, neig));
532 PetscCall(PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg));
533 if (flg) PetscCall(KSPDGMRESSetMaxEigen(ksp, max_neig));
534 PetscCall(PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL));
535 PetscCall(PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL));
536 PetscCall(PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL));
537 PetscOptionsHeadEnd();
538 PetscFunctionReturn(PETSC_SUCCESS);
539 }
540
KSPDGMRESComputeDeflationData_DGMRES(KSP ksp,PetscInt * ExtrNeig)541 PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
542 {
543 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
544 PetscInt i, j, k;
545 PetscBLASInt nr, bmax;
546 PetscInt r = dgmres->r;
547 PetscInt neig; /* number of eigenvalues to extract at each restart */
548 PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
549 PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
550 PetscInt N = dgmres->max_k + 1;
551 PetscInt n = dgmres->it + 1;
552 PetscReal alpha;
553
554 PetscFunctionBegin;
555 PetscCall(PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
556 if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
557 PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
558 PetscFunctionReturn(PETSC_SUCCESS);
559 }
560
561 PetscCall(KSPDGMRESComputeSchurForm(ksp, &neig));
562 /* Form the extended Schur vectors X=VV*Sr */
563 if (!XX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &XX));
564 for (j = 0; j < neig; j++) PetscCall(VecMAXPBY(XX[j], n, &SR[j * N], 0, &VEC_VV(0)));
565
566 /* Orthogonalize X against U */
567 if (!ORTH) PetscCall(PetscMalloc1(max_neig, &ORTH));
568 if (r > 0) {
569 /* modified Gram-Schmidt */
570 for (j = 0; j < neig; j++) {
571 for (i = 0; i < r; i++) {
572 /* First, compute U'*X[j] */
573 PetscCall(VecDot(XX[j], UU[i], &alpha));
574 /* Then, compute X(j)=X(j)-U*U'*X(j) */
575 PetscCall(VecAXPY(XX[j], -alpha, UU[i]));
576 }
577 }
578 }
579 /* Compute MX = M^{-1}*A*X */
580 if (!MX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &MX));
581 for (j = 0; j < neig; j++) PetscCall(KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP));
582 dgmres->matvecs += neig;
583
584 if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- expensive to do this */
585 PetscCall(KSPDGMRESImproveEig(ksp, neig));
586 PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
587 PetscFunctionReturn(PETSC_SUCCESS); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
588 }
589
590 /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
591 if (!XMX) PetscCall(PetscMalloc1(neig1 * neig1, &XMX));
592 for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, XX, &XMX[j * neig1]));
593
594 if (r > 0) {
595 /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
596 if (!UMX) PetscCall(PetscMalloc1(max_neig * neig1, &UMX));
597 for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, UU, &UMX[j * max_neig]));
598 /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
599 if (!XMU) PetscCall(PetscMalloc1(max_neig * neig1, &XMU));
600 for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, XX, &XMU[j * neig1]));
601 }
602
603 /* Form the new matrix T = [T UMX; XMU XMX]; */
604 if (!TT) PetscCall(PetscMalloc1(max_neig * max_neig, &TT));
605 if (r > 0) {
606 /* Add XMU to T */
607 for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&TT[max_neig * j + r], &XMU[neig1 * j], neig));
608 /* Add [UMX; XMX] to T */
609 for (j = 0; j < neig; j++) {
610 k = r + j;
611 PetscCall(PetscArraycpy(&TT[max_neig * k], &UMX[max_neig * j], r));
612 PetscCall(PetscArraycpy(&TT[max_neig * k + r], &XMX[neig1 * j], neig));
613 }
614 } else { /* Add XMX to T */
615 for (j = 0; j < neig; j++) PetscCall(PetscArraycpy(&TT[max_neig * j], &XMX[neig1 * j], neig));
616 }
617
618 dgmres->r += neig;
619 r = dgmres->r;
620 PetscCall(PetscBLASIntCast(r, &nr));
621 /*LU Factorize T with Lapack xgetrf routine */
622
623 PetscCall(PetscBLASIntCast(max_neig, &bmax));
624 if (!TTF) PetscCall(PetscMalloc1(bmax * bmax, &TTF));
625 PetscCall(PetscArraycpy(TTF, TT, bmax * r));
626 if (!INVP) PetscCall(PetscMalloc1(bmax, &INVP));
627 {
628 PetscBLASInt info;
629 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
630 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
631 }
632
633 /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
634 if (!UU) {
635 PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &UU));
636 PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &MU));
637 }
638 for (j = 0; j < neig; j++) {
639 PetscCall(VecCopy(XX[j], UU[r - neig + j]));
640 PetscCall(VecCopy(MX[j], MU[r - neig + j]));
641 }
642 PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
643 PetscFunctionReturn(PETSC_SUCCESS);
644 }
645
KSPDGMRESComputeSchurForm_DGMRES(KSP ksp,PetscInt * neig)646 PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
647 {
648 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
649 PetscInt N = dgmres->max_k + 1, n = dgmres->it + 1;
650 PetscBLASInt bn;
651 PetscReal *A;
652 PetscBLASInt ihi;
653 PetscBLASInt ldA = 0; /* leading dimension of A */
654 PetscBLASInt ldQ; /* leading dimension of Q */
655 PetscReal *Q; /* orthogonal matrix of (left) Schur vectors */
656 PetscReal *work; /* working vector */
657 PetscBLASInt lwork; /* size of the working vector */
658 PetscInt *perm; /* Permutation vector to sort eigenvalues */
659 PetscInt i, j;
660 PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
661 PetscReal *wr, *wi, *modul; /* Real and imaginary part and modulus of the eigenvalues of A */
662 PetscBLASInt *select;
663 PetscBLASInt *iwork;
664 PetscBLASInt liwork;
665 PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
666 PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
667 PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
668 PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
669
670 PetscFunctionBegin;
671 PetscCall(PetscBLASIntCast(n, &bn));
672 PetscCall(PetscBLASIntCast(N, &ldA));
673 ihi = ldQ = bn;
674 PetscCall(PetscBLASIntCast(5 * N, &lwork));
675
676 #if defined(PETSC_USE_COMPLEX)
677 SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
678 #endif
679
680 PetscCall(PetscMalloc1(ldA * ldA, &A));
681 PetscCall(PetscMalloc1(ldQ * n, &Q));
682 PetscCall(PetscMalloc1(lwork, &work));
683 if (!dgmres->wr) {
684 PetscCall(PetscMalloc1(n, &dgmres->wr));
685 PetscCall(PetscMalloc1(n, &dgmres->wi));
686 }
687 wr = dgmres->wr;
688 wi = dgmres->wi;
689 PetscCall(PetscMalloc1(n, &modul));
690 PetscCall(PetscMalloc1(n, &perm));
691 /* copy the Hessenberg matrix to work space */
692 PetscCall(PetscArraycpy(A, dgmres->hes_origin, ldA * ldA));
693 PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag));
694 if (flag) {
695 /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
696 /* Transpose the Hessenberg matrix */
697 PetscCall(PetscMalloc1(bn * bn, &Ht));
698 for (i = 0; i < bn; i++) {
699 for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
700 }
701
702 /* Solve the system H^T*t = h_{m+1,m}e_m */
703 PetscCall(PetscCalloc1(bn, &t));
704 t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
705 PetscCall(PetscMalloc1(bn, &ipiv));
706 /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
707 {
708 PetscBLASInt info;
709 PetscBLASInt nrhs = 1;
710 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
711 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error while calling the Lapack routine DGESV");
712 }
713 /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
714 for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
715 PetscCall(PetscFree(t));
716 PetscCall(PetscFree(Ht));
717 }
718 /* Compute eigenvalues with the Schur form */
719 {
720 PetscBLASInt info = 0;
721 PetscBLASInt ilo = 1;
722 PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
723 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XHSEQR %" PetscBLASInt_FMT, info);
724 }
725 PetscCall(PetscFree(work));
726
727 /* sort the eigenvalues */
728 for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
729 for (i = 0; i < n; i++) perm[i] = i;
730
731 PetscCall(PetscSortRealWithPermutation(n, modul, perm));
732 /* save the complex modulus of the largest eigenvalue in magnitude */
733 if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
734 /* count the number of extracted eigenvalues (with complex conjugates) */
735 NbrEig = 0;
736 while (NbrEig < dgmres->neig) {
737 if (wi[perm[NbrEig]] != 0) NbrEig += 2;
738 else NbrEig += 1;
739 }
740 /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
741
742 PetscCall(PetscCalloc1(n, &select));
743
744 if (!dgmres->GreatestEig) {
745 for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
746 } else {
747 for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
748 }
749 /* call Lapack dtrsen */
750 lwork = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
751 liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
752 PetscCall(PetscMalloc1(lwork, &work));
753 PetscCall(PetscMalloc1(liwork, &iwork));
754 {
755 PetscBLASInt info = 0;
756 PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
757 PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
758 PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
759 PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ILL-CONDITIONED PROBLEM");
760 }
761 PetscCall(PetscFree(select));
762
763 /* Extract the Schur vectors */
764 for (j = 0; j < NbrEig; j++) PetscCall(PetscArraycpy(&SR[j * N], &Q[j * ldQ], n));
765 *neig = NbrEig;
766 PetscCall(PetscFree(A));
767 PetscCall(PetscFree(work));
768 PetscCall(PetscFree(perm));
769 PetscCall(PetscFree(work));
770 PetscCall(PetscFree(iwork));
771 PetscCall(PetscFree(modul));
772 PetscCall(PetscFree(Q));
773 PetscFunctionReturn(PETSC_SUCCESS);
774 }
775
KSPDGMRESApplyDeflation_DGMRES(KSP ksp,Vec x,Vec y)776 PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
777 {
778 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
779 PetscInt i, r = dgmres->r;
780 PetscReal alpha = 1.0;
781 PetscInt max_neig = dgmres->max_neig;
782 PetscBLASInt br, bmax;
783 PetscReal lambda = dgmres->lambdaN;
784
785 PetscFunctionBegin;
786 PetscCall(PetscBLASIntCast(r, &br));
787 PetscCall(PetscBLASIntCast(max_neig, &bmax));
788 PetscCall(PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
789 if (!r) {
790 PetscCall(VecCopy(x, y));
791 PetscFunctionReturn(PETSC_SUCCESS);
792 }
793 /* Compute U'*x */
794 if (!X1) {
795 PetscCall(PetscMalloc1(bmax, &X1));
796 PetscCall(PetscMalloc1(bmax, &X2));
797 }
798 PetscCall(VecMDot(x, r, UU, X1));
799
800 /* Solve T*X1=X2 for X1*/
801 PetscCall(PetscArraycpy(X2, X1, br));
802 {
803 PetscBLASInt info;
804 PetscBLASInt nrhs = 1;
805 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
806 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRS %" PetscBLASInt_FMT, info);
807 }
808 /* Iterative refinement -- is it really necessary ?? */
809 if (!WORK) {
810 PetscCall(PetscMalloc1(3 * bmax, &WORK));
811 PetscCall(PetscMalloc1(bmax, &IWORK));
812 }
813 {
814 PetscBLASInt info;
815 PetscReal berr, ferr;
816 PetscBLASInt nrhs = 1;
817 PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
818 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGERFS %" PetscBLASInt_FMT, info);
819 }
820
821 for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];
822
823 /* Compute X2=U*X2 */
824 PetscCall(VecMAXPBY(y, r, X2, 0, UU));
825 PetscCall(VecAXPY(y, alpha, x));
826
827 PetscCall(PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
828 PetscFunctionReturn(PETSC_SUCCESS);
829 }
830
KSPDGMRESImproveEig_DGMRES(KSP ksp,PetscInt neig)831 static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
832 {
833 KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
834 PetscInt j, r_old, r = dgmres->r;
835 PetscBLASInt i = 0;
836 PetscInt neig1 = dgmres->neig + EIG_OFFSET;
837 PetscInt bmax = dgmres->max_neig;
838 PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
839 PetscInt aug1 = bmax + neig1; /* maximum size of the augmented invariant basis */
840 PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
841 PetscBLASInt N; /* size of AUAU */
842 PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
843 PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
844 PetscReal *work; /* working vector */
845 PetscBLASInt lwork; /* size of the working vector */
846 PetscInt *perm; /* Permutation vector to sort eigenvalues */
847 PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modulus of the eigenvalues of A*/
848 PetscBLASInt NbrEig = 0, nr, bm;
849 PetscBLASInt *select;
850 PetscBLASInt liwork, *iwork;
851
852 PetscFunctionBegin;
853 /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
854 if (!AUU) {
855 PetscCall(PetscMalloc1(aug1 * aug1, &AUU));
856 PetscCall(PetscMalloc1(aug1 * aug1, &AUAU));
857 }
858 /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
859 * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
860 /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
861 for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], r, MU, &AUU[j * aug1]));
862 /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
863 for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]));
864 /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
865 for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]));
866 /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
867 for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]));
868
869 /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
870 /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
871 for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, MU, &AUAU[j * aug1]));
872 /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
873 for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]));
874 /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
875 for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]));
876 /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
877 for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]));
878
879 /* Computation of the eigenvectors */
880 PetscCall(PetscBLASIntCast(aug1, &ldA));
881 PetscCall(PetscBLASIntCast(aug, &N));
882 lwork = 8 * N + 20; /* sizeof the working space */
883 PetscCall(PetscMalloc1(N, &wr));
884 PetscCall(PetscMalloc1(N, &wi));
885 PetscCall(PetscMalloc1(N, &beta));
886 PetscCall(PetscMalloc1(N, &modul));
887 PetscCall(PetscMalloc1(N, &perm));
888 PetscCall(PetscMalloc1(N * N, &Q));
889 PetscCall(PetscMalloc1(N * N, &Z));
890 PetscCall(PetscMalloc1(lwork, &work));
891 {
892 PetscBLASInt info = 0;
893 PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
894 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGGES %" PetscBLASInt_FMT, info);
895 }
896 for (i = 0; i < N; i++) {
897 if (beta[i] != 0.0) {
898 wr[i] /= beta[i];
899 wi[i] /= beta[i];
900 }
901 }
902 /* sort the eigenvalues */
903 for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
904 for (i = 0; i < N; i++) perm[i] = i;
905 PetscCall(PetscSortRealWithPermutation(N, modul, perm));
906 /* Save the norm of the largest eigenvalue */
907 if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
908 /* Allocate space to extract the first r schur vectors */
909 if (!SR2) PetscCall(PetscMalloc1(aug1 * bmax, &SR2));
910 /* count the number of extracted eigenvalues (complex conjugates count as 2) */
911 while (NbrEig < bmax) {
912 if (wi[perm[NbrEig]] == 0) NbrEig += 1;
913 else NbrEig += 2;
914 }
915 if (NbrEig > bmax) PetscCall(PetscBLASIntCast(bmax - 1, &NbrEig));
916 r_old = r; /* previous size of r */
917 dgmres->r = r = NbrEig;
918
919 /* Select the eigenvalues to reorder */
920 PetscCall(PetscCalloc1(N, &select));
921 if (!dgmres->GreatestEig) {
922 for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
923 } else {
924 for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
925 }
926 /* Reorder and extract the new <r> schur vectors */
927 lwork = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
928 liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
929 PetscCall(PetscFree(work));
930 PetscCall(PetscMalloc1(lwork, &work));
931 PetscCall(PetscMalloc1(liwork, &iwork));
932 {
933 PetscBLASInt info = 0;
934 PetscReal Dif[2];
935 PetscBLASInt ijob = 2;
936 PetscBLASInt wantQ = 1, wantZ = 1;
937 PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &Dif[0], work, &lwork, iwork, &liwork, &info));
938 PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
939 }
940 PetscCall(PetscFree(select));
941
942 for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&SR2[j * aug1], &Z[j * N], N));
943
944 /* Multiply the Schur vectors SR2 by U (and X) to get a new U
945 -- save it temporarily in MU */
946 for (j = 0; j < r; j++) {
947 PetscCall(VecMAXPBY(MU[j], r_old, &SR2[j * aug1], 0, UU));
948 PetscCall(VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX));
949 }
950 /* Form T = U'*MU*U */
951 for (j = 0; j < r; j++) {
952 PetscCall(VecCopy(MU[j], UU[j]));
953 PetscCall(KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP));
954 }
955 dgmres->matvecs += r;
956 for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, UU, &TT[j * bmax]));
957 /* Factorize T */
958 PetscCall(PetscArraycpy(TTF, TT, bmax * r));
959 PetscCall(PetscBLASIntCast(r, &nr));
960 PetscCall(PetscBLASIntCast(bmax, &bm));
961 {
962 PetscBLASInt info;
963 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
964 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
965 }
966 /* Free Memory */
967 PetscCall(PetscFree(wr));
968 PetscCall(PetscFree(wi));
969 PetscCall(PetscFree(beta));
970 PetscCall(PetscFree(modul));
971 PetscCall(PetscFree(perm));
972 PetscCall(PetscFree(Q));
973 PetscCall(PetscFree(Z));
974 PetscCall(PetscFree(work));
975 PetscCall(PetscFree(iwork));
976 PetscFunctionReturn(PETSC_SUCCESS);
977 }
978
979 /*MC
980 KSPDGMRES - Implements the deflated GMRES as defined in {cite}`erhel1996restarted` and {cite}`wakam2013memory`
981
982 Options Database Keys:
983 GMRES Options (inherited):
984 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
985 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
986 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially
987 (otherwise groups of vectors are allocated as needed)
988 . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against
989 the Krylov space (fast) (the default)
990 . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
991 . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
992 stability of the classical Gram-Schmidt orthogonalization.
993 - -ksp_gmres_krylov_monitor - plot the Krylov space generated
994
995 DGMRES Options Database Keys:
996 + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
997 . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative process
998 . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
999 - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1000 parsed by `PetscOptionsCreateViewer()`. If neig > 1, viewerspec should
1001 end with ":append". No vectors will be viewed if the adaptive
1002 strategy chooses not to deflate, so -ksp_dgmres_force should also
1003 be given.
1004 The deflation vectors span a subspace that may be a good
1005 approximation of the subspace of smallest eigenvectors of the
1006 preconditioned operator, so this option can aid in understanding
1007 the performance of a preconditioner.
1008
1009 Level: beginner
1010
1011 Notes:
1012 Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported
1013
1014 In this implementation, the adaptive strategy allows switching to deflated GMRES when the stagnation occurs.
1015
1016 Contributed by:
1017 Desire NUENTSA WAKAM, INRIA
1018
1019 .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1020 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1021 `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1022 `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1023 M*/
1024
KSPCreate_DGMRES(KSP ksp)1025 PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1026 {
1027 KSP_DGMRES *dgmres;
1028
1029 PetscFunctionBegin;
1030 PetscCall(PetscNew(&dgmres));
1031 ksp->data = (void *)dgmres;
1032
1033 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
1034 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
1035 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
1036
1037 ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1038 ksp->ops->setup = KSPSetUp_DGMRES;
1039 ksp->ops->solve = KSPSolve_DGMRES;
1040 ksp->ops->destroy = KSPDestroy_DGMRES;
1041 ksp->ops->view = KSPView_DGMRES;
1042 ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1043 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1044 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1045
1046 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
1047 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
1048 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
1049 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
1050 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
1051 /* -- New functions defined in DGMRES -- */
1052 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
1053 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES));
1054 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES));
1055 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES));
1056 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
1057 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
1058 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));
1059 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES));
1060
1061 PetscCall(PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData));
1062 PetscCall(PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation));
1063
1064 dgmres->haptol = 1.0e-30;
1065 dgmres->q_preallocate = PETSC_FALSE;
1066 dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1067 dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1068 dgmres->nrs = NULL;
1069 dgmres->sol_temp = NULL;
1070 dgmres->max_k = GMRES_DEFAULT_MAXK;
1071 dgmres->Rsvd = NULL;
1072 dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1073 dgmres->orthogwork = NULL;
1074
1075 /* Default values for the deflation */
1076 dgmres->r = 0;
1077 dgmres->neig = DGMRES_DEFAULT_EIG;
1078 dgmres->max_neig = DGMRES_DEFAULT_MAXEIG - 1;
1079 dgmres->lambdaN = 0.0;
1080 dgmres->smv = SMV;
1081 dgmres->matvecs = 0;
1082 dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1083 dgmres->HasSchur = PETSC_FALSE;
1084 PetscFunctionReturn(PETSC_SUCCESS);
1085 }
1086