xref: /petsc/src/ksp/ksp/utils/lmvm/tests/ex1.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1 const char help[] = "Test correctness of MatLMVM implementations";
2 
3 #include <petscksp.h>
4 
MatSolveHermitianTranspose(Mat B,Vec x,Vec y)5 static PetscErrorCode MatSolveHermitianTranspose(Mat B, Vec x, Vec y)
6 {
7   Vec x_conj = x;
8 
9   PetscFunctionBegin;
10   if (PetscDefined(USE_COMPLEX)) {
11     PetscCall(VecDuplicate(x, &x_conj));
12     PetscCall(VecCopy(x, x_conj));
13     PetscCall(VecConjugate(x_conj));
14   }
15   PetscCall(MatSolveTranspose(B, x_conj, y));
16   if (PetscDefined(USE_COMPLEX)) {
17     PetscCall(VecDestroy(&x_conj));
18     PetscCall(VecConjugate(y));
19   }
20   PetscFunctionReturn(PETSC_SUCCESS);
21 }
22 
HermitianTransposeTest(Mat B,PetscRandom rand,PetscBool inverse)23 static PetscErrorCode HermitianTransposeTest(Mat B, PetscRandom rand, PetscBool inverse)
24 {
25   Vec         x, f, Bx, Bhf;
26   PetscScalar dot_a, dot_b;
27   PetscReal   x_norm, Bhf_norm, Bx_norm, f_norm;
28   PetscReal   err;
29   PetscReal   scale;
30 
31   PetscFunctionBegin;
32   PetscCall(MatCreateVecs(B, &x, &f));
33   PetscCall(VecSetRandom(x, rand));
34   PetscCall(VecSetRandom(f, rand));
35   PetscCall(MatCreateVecs(B, &Bhf, &Bx));
36   PetscCall((inverse ? MatSolve : MatMult)(B, x, Bx));
37   PetscCall((inverse ? MatSolveHermitianTranspose : MatMultHermitianTranspose)(B, f, Bhf));
38   PetscCall(VecNorm(x, NORM_2, &x_norm));
39   PetscCall(VecNorm(Bhf, NORM_2, &Bhf_norm));
40   PetscCall(VecNorm(Bx, NORM_2, &Bx_norm));
41   PetscCall(VecNorm(f, NORM_2, &f_norm));
42   PetscCall(VecDot(x, Bhf, &dot_a));
43   PetscCall(VecDot(Bx, f, &dot_b));
44   err   = PetscAbsScalar(dot_a - dot_b);
45   scale = PetscMax(x_norm * Bhf_norm, Bx_norm * f_norm);
46   PetscCall(PetscInfo((PetscObject)B, "Hermitian transpose error %g, relative error %g \n", (double)err, (double)(err / scale)));
47   PetscCheck(err <= PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Hermitian transpose error %g", (double)err);
48   PetscCall(VecDestroy(&x));
49   PetscCall(VecDestroy(&f));
50   PetscCall(VecDestroy(&Bx));
51   PetscCall(VecDestroy(&Bhf));
52   PetscFunctionReturn(PETSC_SUCCESS);
53 }
54 
InverseTest(Mat B,PetscRandom rand)55 static PetscErrorCode InverseTest(Mat B, PetscRandom rand)
56 {
57   Vec       x, Bx, BinvBx;
58   PetscReal x_norm, Bx_norm, err;
59   PetscReal scale;
60 
61   PetscFunctionBegin;
62   PetscCall(MatCreateVecs(B, &x, &Bx));
63   PetscCall(VecDuplicate(x, &BinvBx));
64   PetscCall(VecSetRandom(x, rand));
65   PetscCall(MatMult(B, x, Bx));
66   PetscCall(MatSolve(B, Bx, BinvBx));
67   PetscCall(VecNorm(x, NORM_2, &x_norm));
68   PetscCall(VecNorm(Bx, NORM_2, &Bx_norm));
69   PetscCall(VecAXPY(BinvBx, -1.0, x));
70   PetscCall(VecNorm(BinvBx, NORM_2, &err));
71   scale = PetscMax(x_norm, Bx_norm);
72   PetscCall(PetscInfo((PetscObject)B, "Inverse error %g, relative error %g\n", (double)err, (double)(err / scale)));
73   PetscCheck(err <= 100.0 * PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Inverse error %g", (double)err);
74   PetscCall(VecDestroy(&BinvBx));
75   PetscCall(VecDestroy(&Bx));
76   PetscCall(VecDestroy(&x));
77   PetscFunctionReturn(PETSC_SUCCESS);
78 }
79 
IsHermitianTest(Mat B,PetscRandom rand,PetscBool inverse)80 static PetscErrorCode IsHermitianTest(Mat B, PetscRandom rand, PetscBool inverse)
81 {
82   Vec         x, y, Bx, By;
83   PetscScalar dot_a, dot_b;
84   PetscReal   x_norm, By_norm, Bx_norm, y_norm;
85   PetscReal   err;
86   PetscReal   scale;
87 
88   PetscFunctionBegin;
89   PetscCall(MatCreateVecs(B, &x, &y));
90   PetscCall(VecDuplicate(x, &By));
91   PetscCall(VecDuplicate(y, &Bx));
92   PetscCall(VecSetRandom(x, rand));
93   PetscCall(VecSetRandom(y, rand));
94   PetscCall((inverse ? MatSolve : MatMult)(B, x, Bx));
95   PetscCall((inverse ? MatSolve : MatMult)(B, y, By));
96   PetscCall(VecNorm(x, NORM_2, &x_norm));
97   PetscCall(VecNorm(By, NORM_2, &By_norm));
98   PetscCall(VecNorm(Bx, NORM_2, &Bx_norm));
99   PetscCall(VecNorm(y, NORM_2, &y_norm));
100   PetscCall(VecDot(x, By, &dot_a));
101   PetscCall(VecDot(Bx, y, &dot_b));
102   err   = PetscAbsScalar(dot_a - dot_b);
103   scale = PetscMax(x_norm * By_norm, Bx_norm * y_norm);
104   PetscCall(PetscInfo((PetscObject)B, "Is Hermitian error %g, relative error %g\n", (double)err, (double)(err / scale)));
105   PetscCheck(err <= PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Is Hermitian error %g", (double)err);
106   PetscCall(VecDestroy(&By));
107   PetscCall(VecDestroy(&Bx));
108   PetscCall(VecDestroy(&y));
109   PetscCall(VecDestroy(&x));
110   PetscFunctionReturn(PETSC_SUCCESS);
111 }
112 
SecantTest(Mat B,Vec dx,Vec df,PetscBool is_hermitian,PetscBool test_inverse)113 static PetscErrorCode SecantTest(Mat B, Vec dx, Vec df, PetscBool is_hermitian, PetscBool test_inverse)
114 {
115   Vec       B_x;
116   PetscReal err, scale;
117 
118   PetscFunctionBegin;
119   if (!test_inverse) {
120     PetscCall(VecDuplicate(df, &B_x));
121     PetscCall(MatMult(B, dx, B_x));
122     PetscCall(VecAXPY(B_x, -1.0, df));
123     PetscCall(VecNorm(B_x, NORM_2, &err));
124     PetscCall(VecNorm(df, NORM_2, &scale));
125     PetscCall(PetscInfo((PetscObject)B, "Secant error %g, relative error %g\n", (double)err, (double)(err / scale)));
126     PetscCheck(err <= 10.0 * PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Secant error %g", (double)err);
127 
128     if (is_hermitian) {
129       PetscCall(MatMultHermitianTranspose(B, dx, B_x));
130       PetscCall(VecAXPY(B_x, -1.0, df));
131       PetscCall(VecNorm(B_x, NORM_2, &err));
132       PetscCall(PetscInfo((PetscObject)B, "Adjoint secant error %g, relative error %g\n", (double)err, (double)(err / scale)));
133       PetscCheck(err <= 10.0 * PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Adjoint secant error %g", (double)err);
134     }
135   } else {
136     PetscCall(VecDuplicate(df, &B_x));
137     PetscCall(MatSolve(B, df, B_x));
138     PetscCall(VecAXPY(B_x, -1.0, dx));
139 
140     PetscCall(VecNorm(B_x, NORM_2, &err));
141     PetscCall(VecNorm(dx, NORM_2, &scale));
142     PetscCall(PetscInfo((PetscObject)B, "Inverse secant error %g, relative error %g\n", (double)err, (double)(err / scale)));
143     PetscCheck(err <= PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Inverse secant error %g", (double)err);
144 
145     if (is_hermitian) {
146       PetscCall(MatSolveHermitianTranspose(B, df, B_x));
147       PetscCall(VecAXPY(B_x, -1.0, dx));
148       PetscCall(VecNorm(B_x, NORM_2, &err));
149       PetscCall(PetscInfo((PetscObject)B, "Adjoint inverse secant error %g, relative error %g\n", (double)err, (double)(err / scale)));
150       PetscCheck(err <= PETSC_SMALL * scale, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Adjoint inverse secant error %g", (double)err);
151     }
152   }
153   PetscCall(VecDestroy(&B_x));
154   PetscFunctionReturn(PETSC_SUCCESS);
155 }
156 
RankOneAXPY(Mat C,PetscScalar alpha,Vec x,Vec y)157 static PetscErrorCode RankOneAXPY(Mat C, PetscScalar alpha, Vec x, Vec y)
158 {
159   PetscInt           m, n, M, N;
160   Mat                col_mat, row_mat;
161   const PetscScalar *x_a, *y_a;
162   PetscScalar       *x_mat_a, *y_mat_a;
163   Mat                outer_product;
164 
165   PetscFunctionBegin;
166   PetscCall(MatGetSize(C, &M, &N));
167   PetscCall(MatGetLocalSize(C, &m, &n));
168 
169   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)C), m, PETSC_DECIDE, M, 1, NULL, &col_mat));
170   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)C), n, PETSC_DECIDE, N, 1, NULL, &row_mat));
171 
172   PetscCall(VecGetArrayRead(x, &x_a));
173   PetscCall(VecGetArrayRead(y, &y_a));
174 
175   PetscCall(MatDenseGetColumn(col_mat, 0, &x_mat_a));
176   PetscCall(MatDenseGetColumn(row_mat, 0, &y_mat_a));
177 
178   PetscCall(PetscArraycpy(x_mat_a, x_a, m));
179   PetscCall(PetscArraycpy(y_mat_a, y_a, n));
180 
181   PetscCall(MatDenseRestoreColumn(row_mat, &y_mat_a));
182   PetscCall(MatDenseRestoreColumn(col_mat, &x_mat_a));
183 
184   PetscCall(VecRestoreArrayRead(y, &y_a));
185   PetscCall(VecRestoreArrayRead(x, &x_a));
186 
187   PetscCall(MatConjugate(row_mat));
188   PetscCall(MatMatTransposeMult(col_mat, row_mat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &outer_product));
189 
190   PetscCall(MatAXPY(C, alpha, outer_product, SAME_NONZERO_PATTERN));
191 
192   PetscCall(MatDestroy(&outer_product));
193   PetscCall(MatDestroy(&row_mat));
194   PetscCall(MatDestroy(&col_mat));
195   PetscFunctionReturn(PETSC_SUCCESS);
196 }
197 
BroydenUpdate_Explicit(Mat B,PetscReal unused_,Vec s,Vec y)198 static PetscErrorCode BroydenUpdate_Explicit(Mat B, PetscReal unused_, Vec s, Vec y)
199 {
200   PetscScalar sts;
201   Vec         ymBs;
202 
203   PetscFunctionBegin;
204   PetscCall(VecDot(s, s, &sts));
205   PetscCall(VecDuplicate(y, &ymBs));
206   PetscCall(MatMult(B, s, ymBs));
207   PetscCall(VecAYPX(ymBs, -1.0, y));
208   PetscCall(RankOneAXPY(B, 1.0 / sts, ymBs, s));
209   PetscCall(VecDestroy(&ymBs));
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
BadBroydenUpdate_Explicit(Mat B,PetscReal unused_,Vec s,Vec y)213 static PetscErrorCode BadBroydenUpdate_Explicit(Mat B, PetscReal unused_, Vec s, Vec y)
214 {
215   PetscScalar ytBs;
216   Vec         Bty, ymBs;
217 
218   PetscFunctionBegin;
219   PetscCall(VecDuplicate(y, &ymBs));
220   PetscCall(VecDuplicate(s, &Bty));
221   PetscCall(MatMult(B, s, ymBs));
222   PetscCall(VecDot(ymBs, y, &ytBs));
223   PetscCall(VecAYPX(ymBs, -1.0, y));
224   PetscCall(MatMultHermitianTranspose(B, y, Bty));
225   PetscCall(RankOneAXPY(B, 1.0 / ytBs, ymBs, Bty));
226   PetscCall(VecDestroy(&Bty));
227   PetscCall(VecDestroy(&ymBs));
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
SymmetricBroydenUpdate_Explicit(Mat B,PetscReal phi,Vec s,Vec y)231 static PetscErrorCode SymmetricBroydenUpdate_Explicit(Mat B, PetscReal phi, Vec s, Vec y)
232 {
233   Vec         Bs;
234   PetscScalar stBs, yts;
235 
236   PetscFunctionBegin;
237   PetscCall(VecDuplicate(y, &Bs));
238   PetscCall(MatMult(B, s, Bs));
239   PetscCall(VecDot(s, Bs, &stBs));
240   PetscCall(VecDot(s, y, &yts));
241   PetscCall(RankOneAXPY(B, (yts + phi * stBs) / (yts * yts), y, y));
242   PetscCall(RankOneAXPY(B, -phi / yts, y, Bs));
243   PetscCall(RankOneAXPY(B, -phi / yts, Bs, y));
244   PetscCall(RankOneAXPY(B, (phi - 1.0) / stBs, Bs, Bs));
245   PetscCall(VecDestroy(&Bs));
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
BFGSUpdate_Explicit(Mat B,PetscReal unused_,Vec s,Vec y)249 static PetscErrorCode BFGSUpdate_Explicit(Mat B, PetscReal unused_, Vec s, Vec y)
250 {
251   PetscFunctionBegin;
252   PetscCall(SymmetricBroydenUpdate_Explicit(B, 0.0, s, y));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
DFPUpdate_Explicit(Mat B,PetscReal unused_,Vec s,Vec y)256 static PetscErrorCode DFPUpdate_Explicit(Mat B, PetscReal unused_, Vec s, Vec y)
257 {
258   PetscFunctionBegin;
259   PetscCall(SymmetricBroydenUpdate_Explicit(B, 1.0, s, y));
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
SR1Update_Explicit(Mat B,PetscReal unused_,Vec s,Vec y)263 static PetscErrorCode SR1Update_Explicit(Mat B, PetscReal unused_, Vec s, Vec y)
264 {
265   PetscScalar ymBsts;
266   Vec         Bty, ymBs;
267 
268   PetscFunctionBegin;
269   PetscCall(VecDuplicate(y, &ymBs));
270   PetscCall(VecDuplicate(s, &Bty));
271   PetscCall(MatMult(B, s, ymBs));
272   PetscCall(VecAYPX(ymBs, -1.0, y));
273   PetscCall(VecDot(s, ymBs, &ymBsts));
274   PetscCall(RankOneAXPY(B, 1.0 / ymBsts, ymBs, ymBs));
275   PetscCall(VecDestroy(&Bty));
276   PetscCall(VecDestroy(&ymBs));
277   PetscFunctionReturn(PETSC_SUCCESS);
278 }
279 
MatMult_Solve(Mat A,Vec x,Vec y)280 static PetscErrorCode MatMult_Solve(Mat A, Vec x, Vec y)
281 {
282   Mat B;
283 
284   PetscFunctionBegin;
285   PetscCall(MatShellGetContext(A, &B));
286   PetscCall(MatSolve(B, x, y));
287   PetscFunctionReturn(PETSC_SUCCESS);
288 }
289 
MatMult_J0Solve(Mat A,Vec x,Vec y)290 static PetscErrorCode MatMult_J0Solve(Mat A, Vec x, Vec y)
291 {
292   Mat B;
293 
294   PetscFunctionBegin;
295   PetscCall(MatShellGetContext(A, &B));
296   PetscCall(MatLMVMApplyJ0Inv(B, x, y));
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
MatComputeInverseOperator(Mat B,Mat * B_k,PetscBool use_J0)300 static PetscErrorCode MatComputeInverseOperator(Mat B, Mat *B_k, PetscBool use_J0)
301 {
302   Mat      Binv;
303   PetscInt m, n, M, N;
304 
305   PetscFunctionBegin;
306   PetscCall(MatGetSize(B, &M, &N));
307   PetscCall(MatGetLocalSize(B, &m, &n));
308   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)B), m, n, M, N, (void *)B, &Binv));
309   PetscCall(MatShellSetOperation(Binv, MATOP_MULT, (PetscErrorCodeFn *)(use_J0 ? MatMult_J0Solve : MatMult_Solve)));
310   PetscCall(MatComputeOperator(Binv, MATDENSE, B_k));
311   PetscCall(MatDestroy(&Binv));
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
TestUpdate(Mat B,PetscInt iter,PetscRandom rand,PetscBool is_hermitian,Vec dxs[],Vec dfs[],Mat B_0,Mat H_0,PetscErrorCode (* B_update)(Mat,PetscReal,Vec,Vec),PetscErrorCode (* H_update)(Mat,PetscReal,Vec,Vec),PetscReal phi)315 static PetscErrorCode TestUpdate(Mat B, PetscInt iter, PetscRandom rand, PetscBool is_hermitian, Vec dxs[], Vec dfs[], Mat B_0, Mat H_0, PetscErrorCode (*B_update)(Mat, PetscReal, Vec, Vec), PetscErrorCode (*H_update)(Mat, PetscReal, Vec, Vec), PetscReal phi)
316 {
317   PetscLayout rmap, cmap;
318   PetscBool   is_invertible;
319   Mat         J_0;
320   Vec         x, dx, f, x_prev, f_prev, df;
321   PetscInt    m;
322   PetscScalar rho;
323 
324   PetscFunctionBegin;
325   PetscCall(MatLMVMGetHistorySize(B, &m));
326   PetscCall(MatGetLayouts(B, &rmap, &cmap));
327   PetscCall(PetscLayoutCompare(rmap, cmap, &is_invertible));
328 
329   PetscCall(MatLMVMGetJ0(B, &J_0));
330 
331   dx = dxs[iter];
332   df = dfs[iter];
333 
334   PetscCall(MatLMVMGetLastUpdate(B, &x_prev, &f_prev));
335   PetscCall(VecDuplicate(x_prev, &x));
336   PetscCall(VecDuplicate(f_prev, &f));
337   PetscCall(VecSetRandom(dx, rand));
338   PetscCall(VecSetRandom(df, rand));
339 
340   if (is_hermitian) {
341     PetscCall(VecDot(dx, df, &rho));
342     PetscCall(VecScale(dx, PetscAbsScalar(rho) / rho));
343   } else {
344     Vec Bdx;
345 
346     PetscCall(VecDuplicate(df, &Bdx));
347     PetscCall(MatMult(B, dx, Bdx));
348     PetscCall(VecDot(Bdx, df, &rho));
349     PetscCall(VecScale(dx, PetscAbsScalar(rho) / rho));
350     PetscCall(VecDestroy(&Bdx));
351   }
352   PetscCall(VecWAXPY(x, 1.0, x_prev, dx));
353   PetscCall(VecWAXPY(f, 1.0, f_prev, df));
354   PetscCall(MatLMVMUpdate(B, x, f));
355   PetscCall(VecDestroy(&x));
356 
357   PetscCall(HermitianTransposeTest(B, rand, PETSC_FALSE));
358   if (is_hermitian) PetscCall(IsHermitianTest(B, rand, PETSC_FALSE));
359   if (is_invertible) {
360     PetscCall(InverseTest(B, rand));
361     PetscCall(HermitianTransposeTest(B, rand, PETSC_TRUE));
362     if (is_hermitian) PetscCall(IsHermitianTest(B, rand, PETSC_TRUE));
363   }
364 
365   if (iter < m) {
366     PetscCall(SecantTest(B, dx, df, is_hermitian, PETSC_FALSE));
367     if (is_invertible) PetscCall(SecantTest(B, dx, df, is_hermitian, PETSC_TRUE));
368   }
369 
370   if (is_invertible) {
371     // some implementations use internal caching to compute the product Hf: double check that this is working
372     Vec       f_copy, Hf, Hf_copy;
373     PetscReal norm, err;
374 
375     PetscCall(VecDuplicate(f, &f_copy));
376     PetscCall(VecCopy(f, f_copy));
377     PetscCall(VecDuplicate(x_prev, &Hf));
378     PetscCall(VecDuplicate(x_prev, &Hf_copy));
379     PetscCall(MatSolve(B, f, Hf));
380     PetscCall(MatSolve(B, f_copy, Hf_copy));
381     PetscCall(VecNorm(Hf_copy, NORM_2, &norm));
382     PetscCall(VecAXPY(Hf, -1.0, Hf_copy));
383     PetscCall(VecNorm(Hf, NORM_2, &err));
384     PetscCheck(err <= PETSC_SMALL * norm, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Gradient solve error %g", (double)err);
385 
386     PetscCall(VecDestroy(&Hf_copy));
387     PetscCall(VecDestroy(&Hf));
388     PetscCall(VecDestroy(&f_copy));
389   }
390 
391   PetscCall(VecDestroy(&f));
392 
393   if (B_update) {
394     PetscInt  oldest, next;
395     Mat       B_k_exp;
396     Mat       B_k;
397     PetscReal norm, err;
398 
399     oldest = PetscMax(0, iter + 1 - m);
400     next   = iter + 1;
401 
402     PetscCall(MatComputeOperator(B, MATDENSE, &B_k));
403     PetscCall(MatDuplicate(B_0, MAT_COPY_VALUES, &B_k_exp));
404 
405     for (PetscInt i = oldest; i < next; i++) PetscCall((*B_update)(B_k_exp, phi, dxs[i], dfs[i]));
406     PetscCall(MatNorm(B_k_exp, NORM_FROBENIUS, &norm));
407     PetscCall(MatAXPY(B_k_exp, -1.0, B_k, SAME_NONZERO_PATTERN));
408     PetscCall(MatNorm(B_k_exp, NORM_FROBENIUS, &err));
409     PetscCall(PetscInfo((PetscObject)B_k, "Forward update error %g, relative error %g\n", (double)err, (double)(err / norm)));
410     PetscCheck(err <= PETSC_SMALL * norm, PetscObjectComm((PetscObject)B_k), PETSC_ERR_PLIB, "Forward update error %g", (double)err);
411 
412     PetscCall(MatDestroy(&B_k_exp));
413     PetscCall(MatDestroy(&B_k));
414   }
415   if (H_update) {
416     PetscInt  oldest, next;
417     Mat       H_k;
418     Mat       H_k_exp;
419     PetscReal norm, err;
420 
421     oldest = PetscMax(0, iter + 1 - m);
422     next   = iter + 1;
423 
424     PetscCall(MatComputeInverseOperator(B, &H_k, PETSC_FALSE));
425     PetscCall(MatDuplicate(H_0, MAT_COPY_VALUES, &H_k_exp));
426     for (PetscInt i = oldest; i < next; i++) PetscCall((*H_update)(H_k_exp, phi, dfs[i], dxs[i]));
427     PetscCall(MatNorm(H_k_exp, NORM_FROBENIUS, &norm));
428     PetscCall(MatAXPY(H_k_exp, -1.0, H_k, SAME_NONZERO_PATTERN));
429     PetscCall(MatNorm(H_k_exp, NORM_FROBENIUS, &err));
430     PetscCall(PetscInfo((PetscObject)H_k, "Forward update error %g, relative error %g\n", (double)err, (double)(err / norm)));
431     PetscCheck(err <= PETSC_SMALL * norm, PetscObjectComm((PetscObject)H_k), PETSC_ERR_PLIB, "Inverse update error %g", (double)err);
432 
433     PetscCall(MatDestroy(&H_k_exp));
434     PetscCall(MatDestroy(&H_k));
435   }
436   PetscFunctionReturn(PETSC_SUCCESS);
437 }
438 
MatSetRandomWithShift(Mat J0,PetscRandom rand,PetscBool is_hermitian,PetscBool is_square)439 static PetscErrorCode MatSetRandomWithShift(Mat J0, PetscRandom rand, PetscBool is_hermitian, PetscBool is_square)
440 {
441   PetscFunctionBegin;
442   PetscCall(MatSetRandom(J0, rand));
443   if (is_hermitian) {
444     Mat J0H;
445 
446     PetscCall(MatHermitianTranspose(J0, MAT_INITIAL_MATRIX, &J0H));
447     PetscCall(MatAXPY(J0, 1.0, J0H, SAME_NONZERO_PATTERN));
448     PetscCall(MatDestroy(&J0H));
449   }
450   if (is_square) {
451     MPI_Comm    comm;
452     PetscInt    N;
453     PetscMPIInt count;
454     Mat         J0copy;
455     PetscReal  *real_eig, *imag_eig;
456     KSP         kspeig;
457     PC          pceig;
458     PetscReal   shift;
459 
460     PetscCall(PetscObjectGetComm((PetscObject)J0, &comm));
461     PetscCall(MatGetSize(J0, &N, NULL));
462     PetscCall(MatDuplicate(J0, MAT_COPY_VALUES, &J0copy));
463     PetscCall(PetscMalloc2(N, &real_eig, N, &imag_eig));
464     PetscCall(KSPCreate(comm, &kspeig));
465     if (is_hermitian) PetscCall(KSPSetType(kspeig, KSPMINRES));
466     else PetscCall(KSPSetType(kspeig, KSPGMRES));
467     PetscCall(KSPSetPCSide(kspeig, PC_LEFT));
468     PetscCall(KSPGetPC(kspeig, &pceig));
469     PetscCall(PCSetType(pceig, PCNONE));
470     PetscCall(KSPSetOperators(kspeig, J0copy, J0copy));
471     PetscCall(KSPComputeEigenvaluesExplicitly(kspeig, N, real_eig, imag_eig));
472     PetscCall(PetscMPIIntCast(N, &count));
473     PetscCallMPI(MPI_Bcast(real_eig, count, MPIU_REAL, 0, comm));
474     PetscCall(PetscSortReal(N, real_eig));
475     shift = PetscMax(2 * PetscAbsReal(real_eig[N - 1]), 2 * PetscAbsReal(real_eig[0]));
476     PetscCall(MatShift(J0, shift));
477     PetscCall(MatAssemblyBegin(J0, MAT_FINAL_ASSEMBLY));
478     PetscCall(MatAssemblyEnd(J0, MAT_FINAL_ASSEMBLY));
479     PetscCall(PetscFree2(real_eig, imag_eig));
480     PetscCall(KSPDestroy(&kspeig));
481     PetscCall(MatDestroy(&J0copy));
482   }
483   PetscFunctionReturn(PETSC_SUCCESS);
484 }
485 
main(int argc,char ** argv)486 int main(int argc, char **argv)
487 {
488   PetscInt    M = 15, N = 15, hist_size = 5, n_iter = 3 * hist_size, n_variable = n_iter / 2;
489   PetscReal   phi = 0.0;
490   MPI_Comm    comm;
491   Mat         B, B_0, J_0, H_0 = NULL;
492   PetscBool   B_is_h, B_is_h_known, is_hermitian, is_square, has_solve;
493   PetscBool   is_brdn, is_badbrdn, is_dfp, is_bfgs, is_sr1, is_symbrdn, is_symbadbrdn, is_dbfgs, is_ddfp;
494   PetscRandom rand;
495   Vec         x, f;
496   PetscLayout rmap, cmap;
497   Vec        *dxs, *dfs;
498   PetscErrorCode (*B_update)(Mat, PetscReal, Vec, Vec) = NULL;
499   PetscErrorCode (*H_update)(Mat, PetscReal, Vec, Vec) = NULL;
500 
501   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
502   comm = PETSC_COMM_WORLD;
503   PetscOptionsBegin(comm, NULL, help, NULL);
504   PetscCall(PetscOptionsInt("-m", "# Matrix rows", NULL, M, &M, NULL));
505   PetscCall(PetscOptionsInt("-n", "# Matrix columns", NULL, N, &N, NULL));
506   PetscCall(PetscOptionsInt("-n_iter", "# test iterations", NULL, n_iter, &n_iter, NULL));
507   PetscCall(PetscOptionsInt("-n_variable", "# test iterations where J0 changeschange J0 every iteration", NULL, n_variable, &n_variable, NULL));
508   PetscOptionsEnd();
509 
510   PetscCall(PetscRandomCreate(comm, &rand));
511   if (PetscDefined(USE_COMPLEX)) PetscCall(PetscRandomSetInterval(rand, -1.0 - PetscSqrtScalar(-1.0), 1.0 + PetscSqrtScalar(-1.0)));
512   else PetscCall(PetscRandomSetInterval(rand, -1.0, 1.0));
513 
514   PetscCall(VecCreate(comm, &x));
515   PetscCall(VecSetSizes(x, PETSC_DECIDE, N));
516   PetscCall(VecSetFromOptions(x));
517   PetscCall(VecSetRandom(x, rand));
518   PetscCall(VecCreate(comm, &f));
519   PetscCall(VecSetFromOptions(f));
520   PetscCall(VecSetSizes(f, PETSC_DECIDE, M));
521   PetscCall(VecSetRandom(f, rand));
522 
523   PetscCall(MatCreate(comm, &B));
524   PetscCall(MatSetSizes(B, PETSC_DECIDE, PETSC_DECIDE, M, N));
525   PetscCall(MatSetOptionsPrefix(B, "B_"));
526   PetscCall(KSPInitializePackage());
527   PetscCall(MatSetType(B, MATLMVMBROYDEN));
528   PetscCall(MatLMVMSetHistorySize(B, hist_size));
529   PetscCall(MatLMVMAllocate(B, x, f));
530   PetscCall(MatSetFromOptions(B));
531   PetscCall(MatSetUp(B));
532 
533   PetscCall(MatIsHermitianKnown(B, &B_is_h_known, &B_is_h));
534   is_hermitian = (B_is_h_known && B_is_h) ? PETSC_TRUE : PETSC_FALSE;
535   PetscCall(MatGetLayouts(B, &rmap, &cmap));
536   PetscCall(PetscLayoutCompare(rmap, cmap, &is_square));
537 
538   PetscCall(MatLMVMGetJ0(B, &J_0));
539   PetscCall(MatSetRandomWithShift(J_0, rand, is_hermitian, is_square));
540 
541   PetscCall(PetscObjectTypeCompareAny((PetscObject)J_0, &has_solve, MATCONSTANTDIAGONAL, MATDIAGONAL, ""));
542   if (is_square && !has_solve) {
543     KSP ksp;
544     PC  pc;
545 
546     PetscCall(MatLMVMGetJ0KSP(B, &ksp));
547     if (is_hermitian) {
548       PetscCall(KSPSetType(ksp, KSPCG));
549       PetscCall(KSPCGSetType(ksp, KSP_CG_HERMITIAN));
550     } else PetscCall(KSPSetType(ksp, KSPGMRES));
551     PetscCall(KSPSetPCSide(ksp, PC_LEFT));
552     PetscCall(KSPSetNormType(ksp, KSP_NORM_NONE));
553     PetscCall(KSPSetTolerances(ksp, 0.0, 0.0, 0.0, N));
554     PetscCall(KSPGetPC(ksp, &pc));
555     PetscCall(PCSetType(pc, PCNONE));
556     PetscCall(MatLMVMSetJ0KSP(B, ksp));
557   }
558 
559   PetscCall(MatViewFromOptions(B, NULL, "-view"));
560   PetscCall(MatViewFromOptions(J_0, NULL, "-view"));
561 
562   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMBROYDEN, &is_brdn));
563   if (is_brdn) {
564     B_update = BroydenUpdate_Explicit;
565     if (is_square) H_update = BadBroydenUpdate_Explicit;
566   }
567 
568   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMBADBROYDEN, &is_badbrdn));
569   if (is_badbrdn) {
570     B_update = BadBroydenUpdate_Explicit;
571     if (is_square) H_update = BroydenUpdate_Explicit;
572   }
573 
574   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDFP, &is_dfp));
575   if (is_dfp) {
576     B_update = DFPUpdate_Explicit;
577     H_update = BFGSUpdate_Explicit;
578   }
579 
580   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMBFGS, &is_bfgs));
581   if (is_bfgs) {
582     B_update = BFGSUpdate_Explicit;
583     H_update = DFPUpdate_Explicit;
584   }
585 
586   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMSYMBROYDEN, &is_symbrdn));
587   if (is_symbrdn) {
588     B_update = SymmetricBroydenUpdate_Explicit;
589     PetscCall(MatLMVMSymBroydenGetPhi(B, &phi));
590   }
591 
592   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMSYMBADBROYDEN, &is_symbadbrdn));
593   if (is_symbadbrdn) {
594     H_update = SymmetricBroydenUpdate_Explicit;
595     PetscCall(MatLMVMSymBadBroydenGetPsi(B, &phi));
596   }
597 
598   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMSR1, &is_sr1));
599   if (is_sr1) {
600     B_update = SR1Update_Explicit;
601     H_update = SR1Update_Explicit;
602   }
603 
604   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
605   if (is_dbfgs) {
606     B_update = BFGSUpdate_Explicit;
607     H_update = DFPUpdate_Explicit;
608   }
609 
610   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
611   if (is_ddfp) {
612     B_update = DFPUpdate_Explicit;
613     H_update = BFGSUpdate_Explicit;
614   }
615 
616   PetscCall(MatComputeOperator(J_0, MATDENSE, &B_0));
617   if (is_square) PetscCall(MatComputeInverseOperator(B, &H_0, PETSC_TRUE));
618 
619   // Initialize with the first location
620   PetscCall(MatLMVMUpdate(B, x, f));
621   PetscCall(VecDestroy(&x));
622   PetscCall(VecDestroy(&f));
623 
624   PetscCall(HermitianTransposeTest(B, rand, PETSC_FALSE));
625   if (is_hermitian) PetscCall(IsHermitianTest(B, rand, PETSC_FALSE));
626   if (is_square) {
627     PetscCall(InverseTest(B, rand));
628     if (is_hermitian) PetscCall(IsHermitianTest(B, rand, PETSC_TRUE));
629     PetscCall(HermitianTransposeTest(B, rand, PETSC_TRUE));
630   }
631 
632   PetscCall(PetscCalloc2(n_iter, &dxs, n_iter, &dfs));
633 
634   for (PetscInt i = 0; i < n_iter; i++) PetscCall(MatCreateVecs(B, &dxs[i], &dfs[i]));
635 
636   for (PetscInt i = 0; i < n_iter; i++) {
637     PetscCall(TestUpdate(B, i, rand, is_hermitian, dxs, dfs, B_0, H_0, B_update, H_update, phi));
638     if (i + n_variable >= n_iter) {
639       PetscCall(MatSetRandomWithShift(J_0, rand, is_hermitian, is_square));
640       PetscCall(MatLMVMSetJ0(B, J_0));
641       PetscCall(MatDestroy(&B_0));
642       PetscCall(MatDestroy(&H_0));
643       PetscCall(MatComputeOperator(J_0, MATDENSE, &B_0));
644       if (is_square) PetscCall(MatComputeInverseOperator(B, &H_0, PETSC_TRUE));
645     }
646   }
647 
648   for (PetscInt i = 0; i < n_iter; i++) {
649     PetscCall(VecDestroy(&dxs[i]));
650     PetscCall(VecDestroy(&dfs[i]));
651   }
652 
653   PetscCall(PetscFree2(dxs, dfs));
654 
655   PetscCall(PetscRandomDestroy(&rand));
656 
657   PetscCall(MatDestroy(&H_0));
658   PetscCall(MatDestroy(&B_0));
659   PetscCall(MatDestroy(&B));
660   PetscCall(PetscFinalize());
661   return 0;
662 }
663 
664 /*TEST
665 
666   # TODO: enable hip complex if `#undef PETSC_HAVE_COMPLEX` can be removed from src/vec/is/sf/impls/basic/cupm/hip/sfcupm.hip.hpp
667 
668   # rectangular tests
669   testset:
670     requires: !single
671     nsize: 2
672     output_file: output/empty.out
673     args: -m 15 -n 10 -B_mat_lmvm_J0_mat_type dense -B_mat_lmvm_mult_algorithm {{recursive dense compact_dense}}
674     test:
675       suffix: broyden_rectangular
676       args: -B_mat_type lmvmbroyden
677     test:
678       suffix: badbroyden_rectangular
679       args: -B_mat_type lmvmbadbroyden
680     test:
681       suffix: broyden_rectangular_cuda
682       requires: cuda
683       args: -B_mat_type lmvmbroyden -vec_type cuda -B_mat_lmvm_J0_mat_type densecuda
684     test:
685       suffix: badbroyden_rectangular_cuda
686       requires: cuda
687       args: -B_mat_type lmvmbadbroyden -vec_type cuda -B_mat_lmvm_J0_mat_type densecuda
688     test:
689       suffix: broyden_rectangular_hip
690       requires: hip !complex
691       args: -B_mat_type lmvmbroyden -vec_type hip -B_mat_lmvm_J0_mat_type densehip
692     test:
693       suffix: badbroyden_rectangular_hip
694       requires: hip !complex
695       args: -B_mat_type lmvmbadbroyden -vec_type hip -B_mat_lmvm_J0_mat_type densehip
696 
697   # square tests where compact_dense != dense
698   testset:
699     requires: !single
700     nsize: 2
701     output_file: output/empty.out
702     args: -m 15 -n 15 -B_mat_lmvm_J0_mat_type {{constantdiagonal diagonal}} -B_mat_lmvm_mult_algorithm {{recursive dense compact_dense}} -B_mat_lmvm_cache_J0_products {{false true}}
703     test:
704       suffix: broyden_square
705       args: -B_mat_type lmvmbroyden
706     test:
707       suffix: badbroyden_square
708       args: -B_mat_type lmvmbadbroyden
709     test:
710       suffix: broyden_square_cuda
711       requires: cuda
712       args: -B_mat_type lmvmbroyden -vec_type cuda
713     test:
714       suffix: badbroyden_square_cuda
715       requires: cuda
716       args: -B_mat_type lmvmbadbroyden -vec_type cuda
717     test:
718       suffix: broyden_square_hip
719       requires: hip !complex
720       args: -B_mat_type lmvmbroyden -vec_type hip
721     test:
722       suffix: badbroyden_square_hip
723       requires: hip !complex
724       args: -B_mat_type lmvmbadbroyden -vec_type hip
725 
726   # square tests where compact_dense == dense
727   testset:
728     requires: !single
729     nsize: 2
730     output_file: output/empty.out
731     args: -m 15 -n 15 -B_mat_lmvm_J0_mat_type dense -B_mat_lmvm_mult_algorithm {{recursive dense}}
732     test:
733       output_file: output/ex1_sr1.out
734       suffix: sr1
735       args: -B_mat_type lmvmsr1 -B_mat_lmvm_debug -B_view -B_mat_lmvm_cache_J0_products {{false true}}
736       filter: grep -v "variant HERMITIAN"
737     test:
738       suffix: symbroyden
739       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbroyden -B_mat_lmvm_phi {{0.0 0.6 1.0}}
740     test:
741       suffix: symbadbroyden
742       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbadbroyden -B_mat_lmvm_psi {{0.0 0.6 1.0}}
743     test:
744       suffix: sr1_cuda
745       requires: cuda
746       args: -B_mat_type lmvmsr1 -vec_type cuda -B_mat_lmvm_J0_mat_type densecuda
747     test:
748       suffix: symbroyden_cuda
749       requires: cuda
750       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbroyden -B_mat_lmvm_phi {{0.0 0.6 1.0}} -vec_type cuda -B_mat_lmvm_J0_mat_type densecuda
751     test:
752       suffix: symbadbroyden_cuda
753       requires: cuda
754       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbadbroyden -B_mat_lmvm_psi {{0.0 0.6 1.0}} -vec_type cuda -B_mat_lmvm_J0_mat_type densecuda
755     test:
756       suffix: sr1_hip
757       requires: hip !complex
758       args: -B_mat_type lmvmsr1 -vec_type hip -B_mat_lmvm_J0_mat_type densehip
759     test:
760       suffix: symbroyden_hip
761       requires: hip !complex
762       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbroyden -B_mat_lmvm_phi {{0.0 0.6 1.0}} -vec_type hip -B_mat_lmvm_J0_mat_type densehip
763     test:
764       suffix: symbadbroyden_hip
765       requires: hip !complex
766       args: -B_mat_lmvm_scale_type user -B_mat_type lmvmsymbadbroyden -B_mat_lmvm_psi {{0.0 0.6 1.0}} -vec_type hip -B_mat_lmvm_J0_mat_type densehip
767 
768   testset:
769     requires: !single
770     nsize: 2
771     output_file: output/empty.out
772     args: -m 15 -n 15 -B_mat_lmvm_J0_mat_type {{constantdiagonal diagonal}} -B_mat_lmvm_mult_algorithm {{recursive dense compact_dense}} -B_mat_lmvm_scale_type user
773     test:
774       suffix: bfgs
775       args: -B_mat_type lmvmbfgs
776     test:
777       suffix: dfp
778       args: -B_mat_type lmvmdfp
779     test:
780       suffix: bfgs_cuda
781       requires: cuda
782       args: -B_mat_type lmvmbfgs -vec_type cuda
783     test:
784       suffix: dfp_cuda
785       requires: cuda
786       args: -B_mat_type lmvmdfp -vec_type cuda
787     test:
788       suffix: bfgs_hip
789       requires: hip !complex
790       args: -B_mat_type lmvmbfgs -vec_type hip
791     test:
792       suffix: dfp_hip
793       requires: hip !complex
794       args: -B_mat_type lmvmdfp -vec_type hip
795 
796   testset:
797     requires: !single
798     nsize: 2
799     output_file: output/empty.out
800     args: -m 15 -n 15 -B_mat_lmvm_J0_mat_type diagonal -B_mat_lmvm_scale_type user
801     test:
802       suffix: dbfgs
803       args: -B_mat_type lmvmdbfgs -B_mat_lbfgs_recursive {{0 1}}
804     test:
805       suffix: ddfp
806       args: -B_mat_type lmvmddfp -B_mat_ldfp_recursive {{0 1}}
807     test:
808       requires: cuda
809       suffix: dbfgs_cuda
810       args: -B_mat_type lmvmdbfgs -B_mat_lbfgs_recursive {{0 1}} -vec_type cuda
811     test:
812       requires: cuda
813       suffix: ddfp_cuda
814       args: -B_mat_type lmvmddfp -B_mat_ldfp_recursive {{0 1}} -vec_type cuda
815     test:
816       requires: hip !complex
817       suffix: dbfgs_hip
818       args: -B_mat_type lmvmdbfgs -B_mat_lbfgs_recursive {{0 1}} -vec_type hip
819     test:
820       requires: hip !complex
821       suffix: ddfp_hip
822       args: -B_mat_type lmvmddfp -B_mat_ldfp_recursive {{0 1}} -vec_type hip
823 
824 TEST*/
825