1 /*
2 This file computes data for the deflated restarting in the Newton-basis GMRES.
3 At each restart (or at each detected stagnation in the adaptive strategy), a basis of an
4 (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace.
5 It is then used to augment the Newton basis.
6 */
7
8 #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>
9
10 /* Quicksort algorithm to sort the eigenvalues in increasing orders
11 val_r - real part of eigenvalues, unchanged on exit.
12 val_i - Imaginary part of eigenvalues unchanged on exit.
13 size - Number of eigenvalues (with complex conjugates)
14 perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
15 */
16 #define DEPTH 500
KSPAGMRESQuickSort(PetscScalar * val_r,PetscScalar * val_i,PetscInt size,PetscInt * perm)17 static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
18 {
19 PetscInt deb[DEPTH], fin[DEPTH];
20 PetscInt ipivot;
21 PetscScalar pivot_r, pivot_i;
22 PetscInt i, L, R, j;
23 PetscScalar abs_pivot;
24 PetscScalar abs_val;
25
26 PetscFunctionBegin;
27 /* initialize perm vector */
28 for (j = 0; j < size; j++) perm[j] = j;
29
30 deb[0] = 0;
31 fin[0] = size;
32 i = 0;
33 while (i >= 0) {
34 L = deb[i];
35 R = fin[i] - 1;
36 if (L < R) {
37 pivot_r = val_r[L];
38 pivot_i = val_i[L];
39 abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
40 ipivot = perm[L];
41 PetscCheck(i != DEPTH - 1, PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
42 while (L < R) {
43 abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
44 while (abs_val >= abs_pivot && L < R) {
45 R--;
46 abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
47 }
48 if (L < R) {
49 val_r[L] = val_r[R];
50 val_i[L] = val_i[R];
51 perm[L] = perm[R];
52 L += 1;
53 }
54 abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
55 while (abs_val <= abs_pivot && L < R) {
56 L++;
57 abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
58 }
59 if (L < R) {
60 val_r[R] = val_r[L];
61 val_i[R] = val_i[L];
62 perm[R] = perm[L];
63 R -= 1;
64 }
65 }
66 val_r[L] = pivot_r;
67 val_i[L] = pivot_i;
68 perm[L] = ipivot;
69 deb[i + 1] = L + 1;
70 fin[i + 1] = fin[i];
71 fin[i] = L;
72 i += 1;
73 PetscCheck(i != DEPTH - 1, PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
74 } else i--;
75 }
76 PetscFunctionReturn(PETSC_SUCCESS);
77 }
78
79 /*
80 Compute the Schur vectors from the generalized eigenvalue problem A.u =\lambda.B.u
81 KspSize - rank of the matrices A and B, size of the current Krylov basis
82 A - Left matrix
83 B - Right matrix
84 ldA - first dimension of A as declared in the calling program
85 ldB - first dimension of B as declared in the calling program
86 IsReduced - specifies if the matrices are already in the reduced form,
87 i.e A is a Hessenberg matrix and B is upper triangular.
88 Sr - on exit, the extracted Schur vectors corresponding
89 the smallest eigenvalues (with complex conjugates)
90 CurNeig - Number of extracted eigenvalues
91 */
KSPAGMRESSchurForm(KSP ksp,PetscBLASInt KspSize,PetscScalar * A,PetscBLASInt ldA,PetscScalar * B,PetscBLASInt ldB,PetscBool IsReduced,PetscScalar * Sr,PetscInt * CurNeig)92 static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
93 {
94 KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
95 PetscInt max_k = agmres->max_k;
96 PetscBLASInt r;
97 PetscInt neig = agmres->neig;
98 PetscScalar *wr = agmres->wr;
99 PetscScalar *wi = agmres->wi;
100 PetscScalar *beta = agmres->beta;
101 PetscScalar *Q = agmres->Q;
102 PetscScalar *Z = agmres->Z;
103 PetscScalar *work = agmres->work;
104 PetscBLASInt *select = agmres->select;
105 PetscInt *perm = agmres->perm;
106 PetscBLASInt sdim = 0;
107 PetscInt i, j;
108 PetscBLASInt info;
109 PetscBLASInt *iwork = agmres->iwork;
110 PetscBLASInt N;
111 PetscBLASInt lwork, liwork;
112 PetscBLASInt ilo;
113 PetscBLASInt ijob, wantQ, wantZ;
114 PetscScalar Dif[2];
115
116 PetscFunctionBegin;
117 ijob = 2;
118 wantQ = 1;
119 wantZ = 1;
120 PetscCall(PetscBLASIntCast(MAXKSPSIZE, &N));
121 PetscCall(PetscBLASIntCast(PetscMax(8 * N + 16, 4 * neig * (N - neig)), &lwork));
122 PetscCall(PetscBLASIntCast(2 * N * neig, &liwork));
123 ilo = 1;
124
125 /* Compute the Schur form */
126 if (IsReduced) { /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
127 PetscCallBLAS("LAPACKhgeqz", LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
128 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Error while calling LAPACK routine xhgeqz_");
129 } else {
130 PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
131 PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "Error while calling LAPACK routine xgges_");
132 }
133
134 /* We should avoid computing these ratio... */
135 for (i = 0; i < KspSize; i++) {
136 if (beta[i] != 0.0) {
137 wr[i] /= beta[i];
138 wi[i] /= beta[i];
139 }
140 }
141
142 /* Sort the eigenvalues to extract the smallest ones */
143 PetscCall(KSPAGMRESQuickSort(wr, wi, KspSize, perm));
144
145 /* Count the number of extracted eigenvalues (with complex conjugates) */
146 r = 0;
147 while (r < neig) {
148 if (wi[r] != 0) r += 2;
149 else r += 1;
150 }
151 /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
152 PetscCall(PetscArrayzero(select, N));
153 if (!agmres->GreatestEig) {
154 for (j = 0; j < r; j++) select[perm[j]] = 1;
155 } else {
156 for (j = 0; j < r; j++) select[perm[KspSize - j - 1]] = 1;
157 }
158 PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &Dif[0], work, &lwork, iwork, &liwork, &info));
159 PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
160 /* Extract the Schur vectors associated to the r smallest eigenvalues */
161 PetscCall(PetscArrayzero(Sr, (N + 1) * r));
162 for (j = 0; j < r; j++) {
163 for (i = 0; i < KspSize; i++) Sr[j * (N + 1) + i] = Z[j * N + i];
164 }
165
166 /* Broadcast Sr to all other processes to have consistent data;
167 * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
168 PetscCallMPI(MPI_Bcast(Sr, (N + 1) * r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp)));
169 /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
170 if (agmres->DeflPrecond) PetscCall(KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k));
171 *CurNeig = r; /* Number of extracted eigenvalues */
172 PetscFunctionReturn(PETSC_SUCCESS);
173 }
174
175 /*
176 Forms the matrices for the generalized eigenvalue problem,
177 it then compute the Schur vectors needed to augment the Newton basis.
178 */
KSPAGMRESComputeDeflationData(KSP ksp)179 PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
180 {
181 KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
182 Vec *U = agmres->U;
183 Vec *TmpU = agmres->TmpU;
184 PetscScalar *MatEigL = agmres->MatEigL;
185 PetscScalar *MatEigR = agmres->MatEigR;
186 PetscScalar *Sr = agmres->Sr;
187 PetscScalar alpha, beta;
188 PetscInt i, j;
189 PetscInt max_k = agmres->max_k; /* size of the non - augmented subspace */
190 PetscInt CurNeig; /* Current number of extracted eigenvalues */
191 PetscInt N = MAXKSPSIZE;
192 PetscBLASInt bN, iKspSize;
193 PetscInt lC = N + 1;
194 PetscInt KspSize = KSPSIZE;
195 PetscBLASInt blC, bKspSize;
196 PetscInt PrevNeig = agmres->r;
197
198 PetscFunctionBegin;
199 PetscCall(PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0));
200 if (agmres->neig <= 1) PetscFunctionReturn(PETSC_SUCCESS);
201 /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
202 alpha = 1.0;
203 beta = 0.0;
204 PetscCall(PetscBLASIntCast(KspSize, &bKspSize));
205 PetscCall(PetscBLASIntCast(lC, &blC));
206 PetscCall(PetscBLASIntCast(N, &bN));
207 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
208 if (!agmres->ritz) {
209 /* Form TmpU = V*H where V is the Newton basis orthogonalized with roddec*/
210 for (j = 0; j < KspSize; j++) {
211 /* Apply the elementary reflectors (stored in Qloc) on H */
212 PetscCall(KSPAGMRESRodvec(ksp, KspSize + 1, &agmres->hes_origin[j * lC], TmpU[j]));
213 }
214 /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
215 for (j = 0; j < max_k; j++) PetscCall(VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j * N]));
216 for (j = max_k; j < KspSize; j++) PetscCall(VecMDot(U[j - max_k], KspSize, TmpU, &MatEigR[j * N]));
217 } else { /* Form H^T */
218 for (j = 0; j < N; j++) {
219 for (i = 0; i < N; i++) MatEigR[j * N + i] = agmres->hes_origin[i * lC + j];
220 }
221 }
222 /* Obtain the Schur form of the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
223 PetscCall(PetscBLASIntCast(KspSize, &iKspSize));
224 if (agmres->DeflPrecond) {
225 PetscCall(KSPAGMRESSchurForm(ksp, iKspSize, agmres->hes_origin, blC, agmres->Rloc, blC, PETSC_TRUE, Sr, &CurNeig));
226 } else {
227 PetscCall(KSPAGMRESSchurForm(ksp, iKspSize, MatEigL, bN, MatEigR, bN, PETSC_FALSE, Sr, &CurNeig));
228 }
229
230 if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
231 agmres->HasSchur = PETSC_TRUE;
232 PetscCall(KSPDGMRESComputeDeflationData(ksp, &CurNeig));
233 PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
236 for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
237 PetscCall(VecCopy(U[j], TmpU[j]));
238 }
239
240 for (j = 0; j < CurNeig; j++) {
241 PetscCall(VecMAXPBY(U[j], max_k, &Sr[j * (N + 1)], 0, &VEC_V(0)));
242 PetscCall(VecMAXPY(U[j], PrevNeig, &Sr[j * (N + 1) + max_k], TmpU));
243 }
244 agmres->r = CurNeig;
245 PetscCall(PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0, 0, 0));
246 PetscFunctionReturn(PETSC_SUCCESS);
247 }
248