1 #include <../src/tao/leastsquares/impls/pounders/pounders.h>
2
pounders_h(Tao subtao,Vec v,Mat H,Mat Hpre,PetscCtx ctx)3 static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, PetscCtx ctx)
4 {
5 PetscFunctionBegin;
6 PetscFunctionReturn(PETSC_SUCCESS);
7 }
8
pounders_fg(Tao subtao,Vec x,PetscReal * f,Vec g,PetscCtx ctx)9 static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, PetscCtx ctx)
10 {
11 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
12 PetscReal d1, d2;
13
14 PetscFunctionBegin;
15 /* g = A*x (add b later)*/
16 PetscCall(MatMult(mfqP->subH, x, g));
17
18 /* f = 1/2 * x'*(Ax) + b'*x */
19 PetscCall(VecDot(x, g, &d1));
20 PetscCall(VecDot(mfqP->subb, x, &d2));
21 *f = 0.5 * d1 + d2;
22
23 /* now g = g + b */
24 PetscCall(VecAXPY(g, 1.0, mfqP->subb));
25 PetscFunctionReturn(PETSC_SUCCESS);
26 }
27
pounders_feval(Tao tao,Vec x,Vec F,PetscReal * fsum)28 static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
29 {
30 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
31 PetscInt i, row, col;
32 PetscReal fr, fc;
33
34 PetscFunctionBegin;
35 PetscCall(TaoComputeResidual(tao, x, F));
36 if (tao->res_weights_v) {
37 PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F));
38 PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum));
39 } else if (tao->res_weights_w) {
40 *fsum = 0;
41 for (i = 0; i < tao->res_weights_n; i++) {
42 row = tao->res_weights_rows[i];
43 col = tao->res_weights_cols[i];
44 PetscCall(VecGetValues(F, 1, &row, &fr));
45 PetscCall(VecGetValues(F, 1, &col, &fc));
46 *fsum += tao->res_weights_w[i] * fc * fr;
47 }
48 } else {
49 PetscCall(VecDot(F, F, fsum));
50 }
51 PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum));
52 PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
53 PetscFunctionReturn(PETSC_SUCCESS);
54 }
55
gqtwrap(Tao tao,PetscReal * gnorm,PetscReal * qmin)56 static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
57 {
58 #if defined(PETSC_USE_REAL_SINGLE)
59 PetscReal atol = 1.0e-5;
60 #else
61 PetscReal atol = 1.0e-10;
62 #endif
63 PetscInt info, its;
64 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
65
66 PetscFunctionBegin;
67 if (!mfqP->usegqt) {
68 PetscReal maxval;
69 PetscInt i, j;
70
71 PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
72 PetscCall(VecAssemblyBegin(mfqP->subb));
73 PetscCall(VecAssemblyEnd(mfqP->subb));
74
75 PetscCall(VecSet(mfqP->subx, 0.0));
76
77 PetscCall(VecSet(mfqP->subndel, -1.0));
78 PetscCall(VecSet(mfqP->subpdel, +1.0));
79
80 /* Complete the lower triangle of the Hessian matrix */
81 for (i = 0; i < mfqP->n; i++) {
82 for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
83 }
84 PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES));
85 PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY));
86 PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY));
87
88 PetscCall(TaoResetStatistics(mfqP->subtao));
89 /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_CURRENT)); */
90 /* enforce bound constraints -- experimental */
91 if (tao->XU && tao->XL) {
92 PetscCall(VecCopy(tao->XU, mfqP->subxu));
93 PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution));
94 PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta));
95 PetscCall(VecCopy(tao->XL, mfqP->subxl));
96 PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution));
97 PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta));
98
99 PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel));
100 PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel));
101 } else {
102 PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu));
103 PetscCall(VecCopy(mfqP->subndel, mfqP->subxl));
104 }
105 /* Make sure xu > xl */
106 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
107 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
108 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
109 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem");
110 /* Make sure xu > tao->solution > xl */
111 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
112 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
113 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
114 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem");
115
116 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
117 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
118 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
119 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem");
120
121 PetscCall(TaoSolve(mfqP->subtao));
122 PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL));
123
124 /* test bounds post-solution*/
125 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
126 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
127 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
128 if (maxval > 1e-5) {
129 PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n"));
130 tao->reason = TAO_DIVERGED_TR_REDUCTION;
131 }
132
133 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
134 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
135 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
136 if (maxval > 1e-5) {
137 PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n"));
138 tao->reason = TAO_DIVERGED_TR_REDUCTION;
139 }
140 } else {
141 PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3));
142 }
143 *qmin *= -1;
144 PetscFunctionReturn(PETSC_SUCCESS);
145 }
146
pounders_update_res(Tao tao)147 static PetscErrorCode pounders_update_res(Tao tao)
148 {
149 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
150 PetscInt i, row, col;
151 PetscBLASInt blasn, blasn2, blasm, ione = 1;
152 PetscReal zero = 0.0, one = 1.0, wii, factor;
153
154 PetscFunctionBegin;
155 PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
156 PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
157 PetscCall(PetscBLASIntCast(mfqP->n * mfqP->n, &blasn2));
158 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
159 for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;
160
161 /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
162 if (tao->res_weights_v) {
163 /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
164 for (i = 0; i < mfqP->m; i++) {
165 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor));
166 factor = factor * mfqP->C[i];
167 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
168 }
169
170 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
171 /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
172 for (i = 0; i < mfqP->m; i++) {
173 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii));
174 if (tao->niter > 1) {
175 factor = wii * mfqP->C[i];
176 /* add wii * ci * Hi */
177 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
178 }
179 /* add wii * gi * gi' */
180 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
181 }
182 } else if (tao->res_weights_w) {
183 /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
184 for (i = 0; i < tao->res_weights_n; i++) {
185 row = tao->res_weights_rows[i];
186 col = tao->res_weights_cols[i];
187
188 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
189 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
190 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
191 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
192 }
193
194 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
195 /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
196 for (i = 0; i < tao->res_weights_n; i++) {
197 row = tao->res_weights_rows[i];
198 col = tao->res_weights_cols[i];
199 factor = tao->res_weights_w[i] / 2.0;
200 /* add wij * gi gj' + wij * gj gi' */
201 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
202 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
203 }
204 if (tao->niter > 1) {
205 for (i = 0; i < tao->res_weights_n; i++) {
206 row = tao->res_weights_rows[i];
207 col = tao->res_weights_cols[i];
208
209 /* add wij*cj*Hi */
210 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
211 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));
212
213 /* add wij*ci*Hj */
214 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
215 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
216 }
217 }
218 } else {
219 /* Default: Gres= sum_i[cigi] = G*c' */
220 PetscCall(PetscInfo(tao, "Identity weights\n"));
221 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));
222
223 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
224 /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */
225 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));
226
227 /* sum(F(xkin,i)*H(:,:,i)) */
228 if (tao->niter > 1) {
229 for (i = 0; i < mfqP->m; i++) {
230 factor = mfqP->C[i];
231 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
232 }
233 }
234 }
235 PetscFunctionReturn(PETSC_SUCCESS);
236 }
237
phi2eval(PetscReal * x,PetscInt n,PetscReal * phi)238 static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
239 {
240 /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
241 PetscInt i, j, k;
242 PetscReal sqrt2 = PetscSqrtReal(2.0);
243
244 PetscFunctionBegin;
245 j = 0;
246 for (i = 0; i < n; i++) {
247 phi[j] = 0.5 * x[i] * x[i];
248 j++;
249 for (k = i + 1; k < n; k++) {
250 phi[j] = x[i] * x[k] / sqrt2;
251 j++;
252 }
253 }
254 PetscFunctionReturn(PETSC_SUCCESS);
255 }
256
getquadpounders(TAO_POUNDERS * mfqP)257 static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
258 {
259 /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
260 that satisfies the interpolation conditions Q(X[:,j]) = f(j)
261 for j=1,...,m and with a Hessian matrix of least Frobenius norm */
262
263 /* NB --we are ignoring c */
264 PetscInt i, j, k, num, np = mfqP->nmodelpoints;
265 PetscReal one = 1.0, zero = 0.0, negone = -1.0;
266 PetscBLASInt blasnpmax, blasnplus1, blasnp, blasint, blasint2;
267 PetscBLASInt info, ione = 1;
268 PetscReal sqrt2 = PetscSqrtReal(2.0);
269
270 PetscFunctionBegin;
271 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
272 PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
273 PetscCall(PetscBLASIntCast(np, &blasnp));
274 PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint));
275 PetscCall(PetscBLASIntCast(np - mfqP->n - 1, &blasint2));
276 for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
277 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;
278
279 /* factor M */
280 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
281 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info);
282
283 if (np == mfqP->n + 1) {
284 for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
285 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
286 } else {
287 /* Let Ltmp = (L'*L) */
288 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));
289
290 /* factor Ltmp */
291 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
292 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info);
293 }
294
295 for (k = 0; k < mfqP->m; k++) {
296 if (np != mfqP->n + 1) {
297 /* Solve L'*L*Omega = Z' * RESk*/
298 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
299 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
300 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info);
301
302 /* Beta = L*Omega */
303 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
304 }
305
306 /* solve M'*Alpha = RESk - N'*Beta */
307 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
308 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
309 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info);
310
311 /* Gdel(:,k) = Alpha(2:n+1) */
312 for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];
313
314 /* Set Hdels */
315 num = 0;
316 for (i = 0; i < mfqP->n; i++) {
317 /* H[i,i,k] = Beta(num) */
318 mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
319 num++;
320 for (j = i + 1; j < mfqP->n; j++) {
321 /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
322 mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
323 mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
324 num++;
325 }
326 }
327 }
328 PetscFunctionReturn(PETSC_SUCCESS);
329 }
330
morepoints(TAO_POUNDERS * mfqP)331 static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
332 {
333 /* Assumes mfqP->model_indices[0] is minimum index
334 Finishes adding points to mfqP->model_indices (up to npmax)
335 Computes L,Z,M,N
336 np is actual number of points in model (should equal npmax?) */
337 PetscInt point, i, j, offset;
338 PetscInt reject;
339 PetscBLASInt blasn, blasnpmax, blasnplus1, info, blasnmax, blasint, blasint2, blasnp, blasmaxmn;
340 const PetscReal *x;
341 PetscReal normd;
342
343 PetscFunctionBegin;
344 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
345 PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
346 PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));
347 PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
348 PetscCall(PetscBLASIntCast(mfqP->n, &blasnp));
349 /* Initialize M,N */
350 for (i = 0; i < mfqP->n + 1; i++) {
351 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
352 mfqP->M[(mfqP->n + 1) * i] = 1.0;
353 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
354 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
355 PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]));
356 }
357
358 /* Now we add points until we have npmax starting with the most recent ones */
359 point = mfqP->nHist - 1;
360 mfqP->nmodelpoints = mfqP->n + 1;
361 while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
362 /* Reject any points already in the model */
363 reject = 0;
364 for (j = 0; j < mfqP->n + 1; j++) {
365 if (point == mfqP->model_indices[j]) {
366 reject = 1;
367 break;
368 }
369 }
370
371 /* Reject if norm(d) >c2 */
372 if (!reject) {
373 PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec));
374 PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]));
375 PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd));
376 normd /= mfqP->delta;
377 if (normd > mfqP->c2) reject = 1;
378 }
379 if (reject) {
380 point--;
381 continue;
382 }
383
384 PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x));
385 mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
386 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
387 PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x));
388 PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]));
389
390 /* Update QR factorization */
391 /* Copy M' to Q_tmp */
392 for (i = 0; i < mfqP->n + 1; i++) {
393 for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
394 }
395 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints + 1, &blasnp));
396 /* Q_tmp,R = qr(M') */
397 PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n + 1), &blasmaxmn));
398 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
399 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info);
400
401 /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
402 /* L = N*Qtmp */
403 PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint2));
404 /* Copy N to L_tmp */
405 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
406 /* Copy L_save to L_tmp */
407
408 /* L_tmp = N*Qtmp' */
409 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
410 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
411
412 /* Copy L_tmp to L_save */
413 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];
414
415 /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
416 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints - mfqP->n, &blasint));
417 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
418 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
419 PetscCall(PetscFPTrapPop());
420 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info);
421
422 if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
423 /* accept point */
424 mfqP->model_indices[mfqP->nmodelpoints] = point;
425 /* Copy Q_tmp to Q */
426 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
427 for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
428 mfqP->nmodelpoints++;
429 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));
430
431 /* Copy L_save to L */
432 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
433 }
434 point--;
435 }
436
437 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));
438 /* Copy Q(:,n+2:np) to Z */
439 /* First set Q_tmp to I */
440 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
441 for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;
442
443 /* Q_tmp = I * Q */
444 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
445 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
446
447 /* Copy Q_tmp(:,n+2:np) to Z) */
448 offset = mfqP->npmax * (mfqP->n + 1);
449 for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];
450
451 if (mfqP->nmodelpoints == mfqP->n + 1) {
452 /* Set L to I_{n+1} */
453 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
454 for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
455 }
456 PetscFunctionReturn(PETSC_SUCCESS);
457 }
458
459 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
addpoint(Tao tao,TAO_POUNDERS * mfqP,PetscInt index)460 static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
461 {
462 PetscFunctionBegin;
463 /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
464 PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]));
465 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES));
466 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
467 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
468 PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]));
469
470 /* Project into feasible region */
471 if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]));
472
473 /* Compute value of new vector */
474 PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]));
475 CHKMEMQ;
476 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
477
478 /* Add new vector to model */
479 mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
480 mfqP->nmodelpoints++;
481 mfqP->nHist++;
482 PetscFunctionReturn(PETSC_SUCCESS);
483 }
484
modelimprove(Tao tao,TAO_POUNDERS * mfqP,PetscInt addallpoints)485 static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
486 {
487 /* modeld = Q(:,np+1:n)' */
488 PetscInt i, j, minindex = 0;
489 PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
490 PetscBLASInt blasn, blasnpmax, blask, info;
491 PetscBLASInt blas1 = 1, blasnmax;
492
493 PetscFunctionBegin;
494 PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
495 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
496 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
497 PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));
498
499 /* Qtmp = I(n x n) */
500 for (i = 0; i < mfqP->n; i++) {
501 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
502 }
503 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;
504
505 /* Qtmp = Q * I */
506 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
507
508 for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
509 PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
510 if (dp > 0.0) { /* Model says use the other direction! */
511 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
512 }
513 /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
514 for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
515 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
516 PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
517 if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
518 minindex = i;
519 minvalue = mfqP->work[i];
520 }
521 if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i));
522 }
523 if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex));
524 PetscFunctionReturn(PETSC_SUCCESS);
525 }
526
affpoints(TAO_POUNDERS * mfqP,PetscReal * xmin,PetscReal c)527 static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
528 {
529 PetscInt i, j;
530 PetscBLASInt blasm, blasj, blask, blasn, ione = 1, info;
531 PetscBLASInt blasnpmax, blasmaxmn;
532 PetscReal proj, normd;
533 const PetscReal *x;
534
535 PetscFunctionBegin;
536 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
537 PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
538 PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
539 for (i = mfqP->nHist - 1; i >= 0; i--) {
540 PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x));
541 for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
542 PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x));
543 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
544 PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
545 if (normd <= c) {
546 PetscCall(PetscBLASIntCast(PetscMax(mfqP->n - mfqP->nmodelpoints, 0), &blasj));
547 if (!mfqP->q_is_I) {
548 /* project D onto null */
549 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
550 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
551 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info);
552 }
553 PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));
554
555 if (proj >= mfqP->theta1) { /* add this index to model */
556 mfqP->model_indices[mfqP->nmodelpoints] = i;
557 mfqP->nmodelpoints++;
558 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
559 PetscCall(PetscBLASIntCast(mfqP->npmax * (mfqP->nmodelpoints), &blask));
560 PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
561 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
562 PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n), &blasmaxmn));
563 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
564 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info);
565 mfqP->q_is_I = 0;
566 }
567 if (mfqP->nmodelpoints == mfqP->n) break;
568 }
569 }
570 PetscFunctionReturn(PETSC_SUCCESS);
571 }
572
TaoSolve_POUNDERS(Tao tao)573 static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
574 {
575 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
576 PetscInt i, ii, j, k, l;
577 PetscReal step = 1.0;
578 PetscInt low, high;
579 PetscReal minnorm;
580 PetscReal *x, *f;
581 const PetscReal *xmint, *fmin;
582 PetscReal deltaold;
583 PetscReal gnorm;
584 PetscBLASInt info, ione = 1, iblas;
585 PetscBool valid, same;
586 PetscReal mdec, rho, normxsp;
587 PetscReal one = 1.0, zero = 0.0, ratio;
588 PetscBLASInt blasm, blasn, blasncopy, blasnpmax;
589 static PetscBool set = PETSC_FALSE;
590
591 /* n = # of parameters
592 m = dimension (components) of function */
593 PetscFunctionBegin;
594 PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
595 "title = {Nuclear energy density optimization},\n"
596 "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n"
597 " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n"
598 "journal = {Phys. Rev. C},\n"
599 "volume = {82},\n"
600 "number = {2},\n"
601 "pages = {024313},\n"
602 "numpages = {18},\n"
603 "year = {2010},\n"
604 "month = {Aug},\n"
605 "doi = {10.1103/PhysRevC.82.024313}\n}\n",
606 &set));
607 tao->niter = 0;
608 if (tao->XL && tao->XU) {
609 /* Check x0 <= XU */
610 PetscReal val;
611
612 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
613 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
614 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
615 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound");
616
617 /* Check x0 >= xl */
618 PetscCall(VecCopy(tao->XL, mfqP->Xhist[0]));
619 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution));
620 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
621 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound");
622
623 /* Check x0 + delta < XU -- should be able to get around this eventually */
624
625 PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta));
626 PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution));
627 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
628 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
629 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound");
630 }
631
632 PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
633 PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
634 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
635 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;
636
637 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
638
639 /* This provides enough information to approximate the gradient of the objective */
640 /* using a forward difference scheme. */
641
642 PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta));
643 PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]));
644 mfqP->minindex = 0;
645 minnorm = mfqP->Fres[0];
646
647 PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high));
648 for (i = 1; i < mfqP->n + 1; ++i) {
649 PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]));
650
651 if (i - 1 >= low && i - 1 < high) {
652 PetscCall(VecGetArray(mfqP->Xhist[i], &x));
653 x[i - 1 - low] += mfqP->delta;
654 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
655 }
656 CHKMEMQ;
657 PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]));
658 if (mfqP->Fres[i] < minnorm) {
659 mfqP->minindex = i;
660 minnorm = mfqP->Fres[i];
661 }
662 }
663 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
664 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
665 PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm));
666
667 /* Gather mpi vecs to one big local vec */
668
669 /* Begin serial code */
670
671 /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
672 /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
673 /* (Column oriented for blas calls) */
674 ii = 0;
675
676 PetscCall(PetscInfo(tao, "Build matrix: %d\n", mfqP->size));
677 if (1 == mfqP->size) {
678 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
679 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
680 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
681 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
682 for (i = 0; i < mfqP->n + 1; i++) {
683 if (i == mfqP->minindex) continue;
684
685 PetscCall(VecGetArray(mfqP->Xhist[i], &x));
686 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
687 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
688
689 PetscCall(VecGetArray(mfqP->Fhist[i], &f));
690 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
691 PetscCall(VecRestoreArray(mfqP->Fhist[i], &f));
692
693 mfqP->model_indices[ii++] = i;
694 }
695 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
696 PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
697 } else {
698 PetscCall(VecSet(mfqP->localxmin, 0));
699 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
700 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
701
702 PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint));
703 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
704 PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint));
705
706 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
707 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
708 PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin));
709 for (i = 0; i < mfqP->n + 1; i++) {
710 if (i == mfqP->minindex) continue;
711
712 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
713 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
714 PetscCall(VecGetArray(mfqP->localx, &x));
715 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
716 PetscCall(VecRestoreArray(mfqP->localx, &x));
717
718 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
719 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
720 PetscCall(VecGetArray(mfqP->localf, &f));
721 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
722 PetscCall(VecRestoreArray(mfqP->localf, &f));
723
724 mfqP->model_indices[ii++] = i;
725 }
726 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
727 PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin));
728 }
729
730 /* Determine the initial quadratic models */
731 /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
732 /* D (nxn) Fdiff (nxm) => G (nxm) */
733 blasncopy = blasn;
734 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
735 PetscCall(PetscInfo(tao, "Linear solve return: %" PetscBLASInt_FMT "\n", info));
736
737 PetscCall(pounders_update_res(tao));
738
739 valid = PETSC_TRUE;
740
741 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
742 PetscCall(VecAssemblyBegin(tao->gradient));
743 PetscCall(VecAssemblyEnd(tao->gradient));
744 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
745 gnorm *= mfqP->delta;
746 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
747
748 tao->reason = TAO_CONTINUE_ITERATING;
749 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
750 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
751 PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
752
753 mfqP->nHist = mfqP->n + 1;
754 mfqP->nmodelpoints = mfqP->n + 1;
755 PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm));
756
757 while (tao->reason == TAO_CONTINUE_ITERATING) {
758 PetscReal gnm = 1e-4;
759 /* Call general purpose update function */
760 PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
761 tao->niter++;
762 /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
763 PetscCall(gqtwrap(tao, &gnm, &mdec));
764 /* Evaluate the function at the new point */
765
766 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
767 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]));
768 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]));
769 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES));
770 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
771 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
772
773 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
774
775 rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
776 mfqP->nHist++;
777
778 /* Update the center */
779 if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
780 /* Update model to reflect new base point */
781 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
782 for (j = 0; j < mfqP->m; j++) {
783 /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
784 G(:,j) = G(:,j) + H(:,:,j)*work' */
785 for (k = 0; k < mfqP->n; k++) {
786 mfqP->work2[k] = 0.0;
787 for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
788 }
789 for (i = 0; i < mfqP->n; i++) {
790 mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
791 mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
792 }
793 }
794 /* Cres += work*Gres + .5*work*Hres*work';
795 Gres += Hres*work'; */
796
797 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
798 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
799 mfqP->minindex = mfqP->nHist - 1;
800 minnorm = mfqP->Fres[mfqP->minindex];
801 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
802 /* Change current center */
803 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
804 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
805 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
806 }
807
808 /* Evaluate at a model-improving point if necessary */
809 if (valid == PETSC_FALSE) {
810 mfqP->q_is_I = 1;
811 mfqP->nmodelpoints = 0;
812 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
813 if (mfqP->nmodelpoints < mfqP->n) {
814 PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n"));
815 PetscCall(modelimprove(tao, mfqP, 1));
816 }
817 }
818
819 /* Update the trust region radius */
820 deltaold = mfqP->delta;
821 normxsp = 0;
822 for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
823 normxsp = PetscSqrtReal(normxsp);
824 if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
825 mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
826 } else if (valid == PETSC_TRUE) {
827 mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
828 }
829
830 /* Compute the next interpolation set */
831 mfqP->q_is_I = 1;
832 mfqP->nmodelpoints = 0;
833 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1));
834 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
835 if (mfqP->nmodelpoints == mfqP->n) {
836 valid = PETSC_TRUE;
837 } else {
838 valid = PETSC_FALSE;
839 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2));
840 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2));
841 if (mfqP->n > mfqP->nmodelpoints) {
842 PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n"));
843 PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints));
844 }
845 }
846 for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
847 mfqP->nmodelpoints++;
848 mfqP->model_indices[0] = mfqP->minindex;
849 PetscCall(morepoints(mfqP));
850 for (i = 0; i < mfqP->nmodelpoints; i++) {
851 PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
852 for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
853 PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
854 PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
855 for (j = 0; j < mfqP->m; j++) {
856 for (k = 0; k < mfqP->n; k++) {
857 mfqP->work[k] = 0.0;
858 for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
859 }
860 PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
861 }
862 PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
863 }
864
865 /* Update the quadratic model */
866 PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints));
867 PetscCall(getquadpounders(mfqP));
868 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
869 PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
870 /* G = G*(delta/deltaold) + Gdel */
871 ratio = mfqP->delta / deltaold;
872 iblas = blasm * blasn;
873 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
874 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
875 /* H = H*(delta/deltaold)^2 + Hdel */
876 iblas = blasm * blasn * blasn;
877 ratio *= ratio;
878 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
879 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));
880
881 /* Get residuals */
882 PetscCall(pounders_update_res(tao));
883
884 /* Export solution and gradient residual to TAO */
885 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
886 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
887 PetscCall(VecAssemblyBegin(tao->gradient));
888 PetscCall(VecAssemblyEnd(tao->gradient));
889 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
890 gnorm *= mfqP->delta;
891 /* final criticality test */
892 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
893 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
894 PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
895 /* test for repeated model */
896 if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
897 same = PETSC_TRUE;
898 } else {
899 same = PETSC_FALSE;
900 }
901 for (i = 0; i < mfqP->nmodelpoints; i++) {
902 if (same) {
903 if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
904 same = PETSC_TRUE;
905 } else {
906 same = PETSC_FALSE;
907 }
908 }
909 mfqP->last_model_indices[i] = mfqP->model_indices[i];
910 }
911 mfqP->last_nmodelpoints = mfqP->nmodelpoints;
912 if (same && mfqP->delta == deltaold) {
913 PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n"));
914 tao->reason = TAO_CONVERGED_STEPTOL;
915 }
916 }
917 PetscFunctionReturn(PETSC_SUCCESS);
918 }
919
TaoSetUp_POUNDERS(Tao tao)920 static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
921 {
922 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
923 PetscInt i, j;
924 IS isfloc, isfglob, isxloc, isxglob;
925
926 PetscFunctionBegin;
927 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
928 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
929 PetscCall(VecGetSize(tao->solution, &mfqP->n));
930 PetscCall(VecGetSize(tao->ls_res, &mfqP->m));
931 mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
932 if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1;
933 mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
934 mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);
935
936 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist));
937 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist));
938 for (i = 0; i < mfqP->n + 1; i++) {
939 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i]));
940 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i]));
941 }
942 PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec));
943 PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec));
944 mfqP->nHist = 0;
945
946 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres));
947 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES));
948 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work));
949 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2));
950 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3));
951 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork));
952 PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega));
953 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta));
954 PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha));
955
956 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H));
957 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q));
958 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp));
959 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L));
960 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp));
961 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save));
962 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N));
963 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M));
964 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z));
965 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau));
966 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp));
967 mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
968 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork));
969 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork));
970 PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin));
971 PetscCall(PetscMalloc1(mfqP->m, &mfqP->C));
972 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff));
973 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp));
974 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres));
975 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres));
976 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints));
977 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices));
978 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices));
979 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem));
980 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel));
981 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel));
982 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices));
983 PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork));
984 PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w));
985 for (i = 0; i < mfqP->m; i++) {
986 for (j = 0; j < mfqP->m; j++) {
987 if (i == j) {
988 mfqP->w[i + mfqP->m * j] = 1.0;
989 } else {
990 mfqP->w[i + mfqP->m * j] = 0.0;
991 }
992 }
993 }
994 for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
995 PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size));
996 if (mfqP->size > 1) {
997 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx));
998 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin));
999 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf));
1000 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin));
1001 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc));
1002 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob));
1003 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc));
1004 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob));
1005
1006 PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx));
1007 PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf));
1008
1009 PetscCall(ISDestroy(&isxloc));
1010 PetscCall(ISDestroy(&isxglob));
1011 PetscCall(ISDestroy(&isfloc));
1012 PetscCall(ISDestroy(&isfglob));
1013 }
1014
1015 if (!mfqP->usegqt) {
1016 KSP ksp;
1017 PC pc;
1018 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx));
1019 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl));
1020 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb));
1021 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu));
1022 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel));
1023 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel));
1024 PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao));
1025 PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1));
1026 PetscCall(TaoSetType(mfqP->subtao, TAOBNTR));
1027 PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_"));
1028 PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx));
1029 PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP));
1030 PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits));
1031 PetscCall(TaoSetFromOptions(mfqP->subtao));
1032 PetscCall(TaoGetKSP(mfqP->subtao, &ksp));
1033 if (ksp) {
1034 PetscCall(KSPGetPC(ksp, &pc));
1035 PetscCall(PCSetType(pc, PCNONE));
1036 }
1037 PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu));
1038 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH));
1039 PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP));
1040 }
1041 PetscFunctionReturn(PETSC_SUCCESS);
1042 }
1043
TaoDestroy_POUNDERS(Tao tao)1044 static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1045 {
1046 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1047 PetscInt i;
1048
1049 PetscFunctionBegin;
1050 if (!mfqP->usegqt) {
1051 PetscCall(TaoDestroy(&mfqP->subtao));
1052 PetscCall(VecDestroy(&mfqP->subx));
1053 PetscCall(VecDestroy(&mfqP->subxl));
1054 PetscCall(VecDestroy(&mfqP->subxu));
1055 PetscCall(VecDestroy(&mfqP->subb));
1056 PetscCall(VecDestroy(&mfqP->subpdel));
1057 PetscCall(VecDestroy(&mfqP->subndel));
1058 PetscCall(MatDestroy(&mfqP->subH));
1059 }
1060 PetscCall(PetscFree(mfqP->Fres));
1061 PetscCall(PetscFree(mfqP->RES));
1062 PetscCall(PetscFree(mfqP->work));
1063 PetscCall(PetscFree(mfqP->work2));
1064 PetscCall(PetscFree(mfqP->work3));
1065 PetscCall(PetscFree(mfqP->mwork));
1066 PetscCall(PetscFree(mfqP->omega));
1067 PetscCall(PetscFree(mfqP->beta));
1068 PetscCall(PetscFree(mfqP->alpha));
1069 PetscCall(PetscFree(mfqP->H));
1070 PetscCall(PetscFree(mfqP->Q));
1071 PetscCall(PetscFree(mfqP->Q_tmp));
1072 PetscCall(PetscFree(mfqP->L));
1073 PetscCall(PetscFree(mfqP->L_tmp));
1074 PetscCall(PetscFree(mfqP->L_save));
1075 PetscCall(PetscFree(mfqP->N));
1076 PetscCall(PetscFree(mfqP->M));
1077 PetscCall(PetscFree(mfqP->Z));
1078 PetscCall(PetscFree(mfqP->tau));
1079 PetscCall(PetscFree(mfqP->tau_tmp));
1080 PetscCall(PetscFree(mfqP->npmaxwork));
1081 PetscCall(PetscFree(mfqP->npmaxiwork));
1082 PetscCall(PetscFree(mfqP->xmin));
1083 PetscCall(PetscFree(mfqP->C));
1084 PetscCall(PetscFree(mfqP->Fdiff));
1085 PetscCall(PetscFree(mfqP->Disp));
1086 PetscCall(PetscFree(mfqP->Gres));
1087 PetscCall(PetscFree(mfqP->Hres));
1088 PetscCall(PetscFree(mfqP->Gpoints));
1089 PetscCall(PetscFree(mfqP->model_indices));
1090 PetscCall(PetscFree(mfqP->last_model_indices));
1091 PetscCall(PetscFree(mfqP->Xsubproblem));
1092 PetscCall(PetscFree(mfqP->Gdel));
1093 PetscCall(PetscFree(mfqP->Hdel));
1094 PetscCall(PetscFree(mfqP->indices));
1095 PetscCall(PetscFree(mfqP->iwork));
1096 PetscCall(PetscFree(mfqP->w));
1097 for (i = 0; i < mfqP->nHist; i++) {
1098 PetscCall(VecDestroy(&mfqP->Xhist[i]));
1099 PetscCall(VecDestroy(&mfqP->Fhist[i]));
1100 }
1101 PetscCall(VecDestroy(&mfqP->workxvec));
1102 PetscCall(VecDestroy(&mfqP->workfvec));
1103 PetscCall(PetscFree(mfqP->Xhist));
1104 PetscCall(PetscFree(mfqP->Fhist));
1105
1106 if (mfqP->size > 1) {
1107 PetscCall(VecDestroy(&mfqP->localx));
1108 PetscCall(VecDestroy(&mfqP->localxmin));
1109 PetscCall(VecDestroy(&mfqP->localf));
1110 PetscCall(VecDestroy(&mfqP->localfmin));
1111 }
1112 PetscCall(PetscFree(tao->data));
1113 PetscFunctionReturn(PETSC_SUCCESS);
1114 }
1115
TaoSetFromOptions_POUNDERS(Tao tao,PetscOptionItems PetscOptionsObject)1116 static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems PetscOptionsObject)
1117 {
1118 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1119
1120 PetscFunctionBegin;
1121 PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1122 PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL));
1123 mfqP->delta = mfqP->delta0;
1124 PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL));
1125 PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL));
1126 PetscOptionsHeadEnd();
1127 PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129
TaoView_POUNDERS(Tao tao,PetscViewer viewer)1130 static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1131 {
1132 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1133 PetscBool isascii;
1134
1135 PetscFunctionBegin;
1136 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1137 if (isascii) {
1138 PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0));
1139 PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta));
1140 PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints));
1141 if (mfqP->usegqt) {
1142 PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n"));
1143 } else {
1144 PetscCall(TaoView(mfqP->subtao, viewer));
1145 }
1146 }
1147 PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 /*MC
1150 TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares
1151
1152 Options Database Keys:
1153 + -tao_pounders_delta - initial step length
1154 . -tao_pounders_npmax - maximum number of points in model
1155 - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON
1156
1157 Level: beginner
1158
1159 M*/
1160
TaoCreate_POUNDERS(Tao tao)1161 PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1162 {
1163 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1164
1165 PetscFunctionBegin;
1166 tao->ops->setup = TaoSetUp_POUNDERS;
1167 tao->ops->solve = TaoSolve_POUNDERS;
1168 tao->ops->view = TaoView_POUNDERS;
1169 tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1170 tao->ops->destroy = TaoDestroy_POUNDERS;
1171
1172 PetscCall(PetscNew(&mfqP));
1173 tao->data = (void *)mfqP;
1174
1175 /* Override default settings (unless already changed) */
1176 PetscCall(TaoParametersInitialize(tao));
1177 PetscObjectParameterSetDefault(tao, max_it, 2000);
1178 PetscObjectParameterSetDefault(tao, max_funcs, 4000);
1179
1180 mfqP->npmax = PETSC_CURRENT;
1181 mfqP->delta0 = 0.1;
1182 mfqP->delta = 0.1;
1183 mfqP->deltamax = 1e3;
1184 mfqP->deltamin = 1e-6;
1185 mfqP->c2 = 10.0;
1186 mfqP->theta1 = 1e-5;
1187 mfqP->theta2 = 1e-4;
1188 mfqP->gamma0 = 0.5;
1189 mfqP->gamma1 = 2.0;
1190 mfqP->eta0 = 0.0;
1191 mfqP->eta1 = 0.1;
1192 mfqP->usegqt = PETSC_FALSE;
1193 mfqP->gqt_rtol = 0.001;
1194 mfqP->gqt_maxits = 50;
1195 mfqP->workxvec = NULL;
1196 PetscFunctionReturn(PETSC_SUCCESS);
1197 }
1198