1 #include <petsc/private/taoimpl.h> 2 3 /*T 4 Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares 5 Routines: TaoCreate(); 6 Routines: TaoSetType(); 7 Routines: TaoSetSolution(); 8 Routines: TaoSetObjective(); 9 Routines: TaoSetGradient(); 10 Routines: TaoSetConstraintsRoutine(); 11 Routines: TaoSetJacobianStateRoutine(); 12 Routines: TaoSetJacobianDesignRoutine(); 13 Routines: TaoSetStateDesignIS(); 14 Routines: TaoSetFromOptions(); 15 Routines: TaoSolve(); 16 Routines: TaoDestroy(); 17 Processors: n 18 T*/ 19 20 typedef struct { 21 PetscInt n; /* Number of total variables */ 22 PetscInt m; /* Number of constraints */ 23 PetscInt nstate; 24 PetscInt ndesign; 25 PetscInt mx; /* grid points in each direction */ 26 PetscInt ns; /* Number of data samples (1<=ns<=8) 27 Currently only ns=1 is supported */ 28 PetscInt ndata; /* Number of data points per sample */ 29 IS s_is; 30 IS d_is; 31 32 VecScatter state_scatter; 33 VecScatter design_scatter; 34 VecScatter *yi_scatter, *di_scatter; 35 Vec suby,subq,subd; 36 Mat Js,Jd,JsPrec,JsInv,JsBlock; 37 38 PetscReal alpha; /* Regularization parameter */ 39 PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */ 40 PetscReal noise; /* Amount of noise to add to data */ 41 PetscReal *ones; 42 Mat Q; 43 Mat MQ; 44 Mat L; 45 46 Mat Grad; 47 Mat Av,Avwork; 48 Mat Div, Divwork; 49 Mat DSG; 50 Mat Diag,Ones; 51 52 Vec q; 53 Vec ur; /* reference */ 54 55 Vec d; 56 Vec dwork; 57 58 Vec x; /* super vec of y,u */ 59 60 Vec y; /* state variables */ 61 Vec ywork; 62 63 Vec ytrue; 64 65 Vec u; /* design variables */ 66 Vec uwork; 67 68 Vec utrue; 69 70 Vec js_diag; 71 72 Vec c; /* constraint vector */ 73 Vec cwork; 74 75 Vec lwork; 76 Vec S; 77 Vec Swork,Twork,Sdiag,Ywork; 78 Vec Av_u; 79 80 KSP solver; 81 PC prec; 82 83 PetscReal tola,tolb,tolc,told; 84 PetscInt ksp_its; 85 PetscInt ksp_its_initial; 86 PetscLogStage stages[10]; 87 PetscBool use_ptap; 88 PetscBool use_lrc; 89 } AppCtx; 90 91 PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*); 92 PetscErrorCode FormGradient(Tao, Vec, Vec, void*); 93 PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*); 94 PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*); 95 PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*); 96 PetscErrorCode FormConstraints(Tao, Vec, Vec, void*); 97 PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*); 98 PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter); 99 PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter); 100 PetscErrorCode EllipticInitialize(AppCtx*); 101 PetscErrorCode EllipticDestroy(AppCtx*); 102 PetscErrorCode EllipticMonitor(Tao, void*); 103 104 PetscErrorCode StateBlockMatMult(Mat,Vec,Vec); 105 PetscErrorCode StateMatMult(Mat,Vec,Vec); 106 107 PetscErrorCode StateInvMatMult(Mat,Vec,Vec); 108 PetscErrorCode DesignMatMult(Mat,Vec,Vec); 109 PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec); 110 111 PetscErrorCode QMatMult(Mat,Vec,Vec); 112 PetscErrorCode QMatMultTranspose(Mat,Vec,Vec); 113 114 static char help[]=""; 115 116 int main(int argc, char **argv) 117 { 118 PetscErrorCode ierr; 119 Vec x0; 120 Tao tao; 121 AppCtx user; 122 PetscInt ntests = 1; 123 PetscInt i; 124 125 PetscCall(PetscInitialize(&argc, &argv, (char*)0,help)); 126 user.mx = 8; 127 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);PetscCall(ierr); 128 PetscCall(PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL)); 129 user.ns = 6; 130 PetscCall(PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL)); 131 user.ndata = 64; 132 PetscCall(PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL)); 133 user.alpha = 0.1; 134 PetscCall(PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL)); 135 user.beta = 0.00001; 136 PetscCall(PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL)); 137 user.noise = 0.01; 138 PetscCall(PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL)); 139 140 user.use_ptap = PETSC_FALSE; 141 PetscCall(PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL)); 142 user.use_lrc = PETSC_FALSE; 143 PetscCall(PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL)); 144 PetscCall(PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL)); 145 ierr = PetscOptionsEnd();PetscCall(ierr); 146 147 user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */ 148 user.nstate = user.m; 149 user.ndesign = user.mx*user.mx*user.mx; 150 user.n = user.nstate + user.ndesign; /* number of variables */ 151 152 /* Create TAO solver and set desired solution method */ 153 PetscCall(TaoCreate(PETSC_COMM_WORLD,&tao)); 154 PetscCall(TaoSetType(tao,TAOLCL)); 155 156 /* Set up initial vectors and matrices */ 157 PetscCall(EllipticInitialize(&user)); 158 159 PetscCall(Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter)); 160 PetscCall(VecDuplicate(user.x,&x0)); 161 PetscCall(VecCopy(user.x,x0)); 162 163 /* Set solution vector with an initial guess */ 164 PetscCall(TaoSetSolution(tao,user.x)); 165 PetscCall(TaoSetObjective(tao, FormFunction, &user)); 166 PetscCall(TaoSetGradient(tao, NULL, FormGradient, &user)); 167 PetscCall(TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user)); 168 169 PetscCall(TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user)); 170 PetscCall(TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user)); 171 172 PetscCall(TaoSetStateDesignIS(tao,user.s_is,user.d_is)); 173 PetscCall(TaoSetFromOptions(tao)); 174 175 /* SOLVE THE APPLICATION */ 176 PetscCall(PetscLogStageRegister("Trials",&user.stages[1])); 177 PetscCall(PetscLogStagePush(user.stages[1])); 178 for (i=0; i<ntests; i++) { 179 PetscCall(TaoSolve(tao)); 180 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its)); 181 PetscCall(VecCopy(x0,user.x)); 182 } 183 PetscCall(PetscLogStagePop()); 184 PetscCall(PetscBarrier((PetscObject)user.x)); 185 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ")); 186 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial)); 187 188 PetscCall(TaoDestroy(&tao)); 189 PetscCall(VecDestroy(&x0)); 190 PetscCall(EllipticDestroy(&user)); 191 PetscCall(PetscFinalize()); 192 return 0; 193 } 194 /* ------------------------------------------------------------------- */ 195 /* 196 dwork = Qy - d 197 lwork = L*(u-ur) 198 f = 1/2 * (dwork.dwork + alpha*lwork.lwork) 199 */ 200 PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr) 201 { 202 PetscReal d1=0,d2=0; 203 AppCtx *user = (AppCtx*)ptr; 204 205 PetscFunctionBegin; 206 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 207 PetscCall(MatMult(user->MQ,user->y,user->dwork)); 208 PetscCall(VecAXPY(user->dwork,-1.0,user->d)); 209 PetscCall(VecDot(user->dwork,user->dwork,&d1)); 210 PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u)); 211 PetscCall(MatMult(user->L,user->uwork,user->lwork)); 212 PetscCall(VecDot(user->lwork,user->lwork,&d2)); 213 *f = 0.5 * (d1 + user->alpha*d2); 214 PetscFunctionReturn(0); 215 } 216 217 /* ------------------------------------------------------------------- */ 218 /* 219 state: g_s = Q' *(Qy - d) 220 design: g_d = alpha*L'*L*(u-ur) 221 */ 222 PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr) 223 { 224 AppCtx *user = (AppCtx*)ptr; 225 226 PetscFunctionBegin; 227 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 228 PetscCall(MatMult(user->MQ,user->y,user->dwork)); 229 PetscCall(VecAXPY(user->dwork,-1.0,user->d)); 230 PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork)); 231 PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u)); 232 PetscCall(MatMult(user->L,user->uwork,user->lwork)); 233 PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork)); 234 PetscCall(VecScale(user->uwork, user->alpha)); 235 PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter)); 236 PetscFunctionReturn(0); 237 } 238 239 PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr) 240 { 241 PetscReal d1,d2; 242 AppCtx *user = (AppCtx*)ptr; 243 244 PetscFunctionBegin; 245 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 246 PetscCall(MatMult(user->MQ,user->y,user->dwork)); 247 PetscCall(VecAXPY(user->dwork,-1.0,user->d)); 248 PetscCall(VecDot(user->dwork,user->dwork,&d1)); 249 PetscCall(MatMultTranspose(user->MQ,user->dwork,user->ywork)); 250 251 PetscCall(VecWAXPY(user->uwork,-1.0,user->ur,user->u)); 252 PetscCall(MatMult(user->L,user->uwork,user->lwork)); 253 PetscCall(VecDot(user->lwork,user->lwork,&d2)); 254 PetscCall(MatMultTranspose(user->L,user->lwork,user->uwork)); 255 PetscCall(VecScale(user->uwork, user->alpha)); 256 *f = 0.5 * (d1 + user->alpha*d2); 257 PetscCall(Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter)); 258 PetscFunctionReturn(0); 259 } 260 261 /* ------------------------------------------------------------------- */ 262 /* A 263 MatShell object 264 */ 265 PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr) 266 { 267 AppCtx *user = (AppCtx*)ptr; 268 269 PetscFunctionBegin; 270 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 271 /* DSG = Div * (1/Av_u) * Grad */ 272 PetscCall(VecSet(user->uwork,0)); 273 PetscCall(VecAXPY(user->uwork,-1.0,user->u)); 274 PetscCall(VecExp(user->uwork)); 275 PetscCall(MatMult(user->Av,user->uwork,user->Av_u)); 276 PetscCall(VecCopy(user->Av_u,user->Swork)); 277 PetscCall(VecReciprocal(user->Swork)); 278 if (user->use_ptap) { 279 PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES)); 280 PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG)); 281 } else { 282 PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN)); 283 PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork)); 284 PetscCall(MatProductNumeric(user->DSG)); 285 } 286 PetscFunctionReturn(0); 287 } 288 /* ------------------------------------------------------------------- */ 289 /* B */ 290 PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr) 291 { 292 AppCtx *user = (AppCtx*)ptr; 293 294 PetscFunctionBegin; 295 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 296 PetscFunctionReturn(0); 297 } 298 299 PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y) 300 { 301 PetscReal sum; 302 AppCtx *user; 303 304 PetscFunctionBegin; 305 PetscCall(MatShellGetContext(J_shell,&user)); 306 PetscCall(MatMult(user->DSG,X,Y)); 307 PetscCall(VecSum(X,&sum)); 308 sum /= user->ndesign; 309 PetscCall(VecShift(Y,sum)); 310 PetscFunctionReturn(0); 311 } 312 313 PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y) 314 { 315 PetscInt i; 316 AppCtx *user; 317 318 PetscFunctionBegin; 319 PetscCall(MatShellGetContext(J_shell,&user)); 320 if (user->ns == 1) { 321 PetscCall(MatMult(user->JsBlock,X,Y)); 322 } else { 323 for (i=0;i<user->ns;i++) { 324 PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0)); 325 PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0)); 326 PetscCall(MatMult(user->JsBlock,user->subq,user->suby)); 327 PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0)); 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y) 334 { 335 PetscInt its,i; 336 AppCtx *user; 337 338 PetscFunctionBegin; 339 PetscCall(MatShellGetContext(J_shell,&user)); 340 PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG)); 341 if (Y == user->ytrue) { 342 /* First solve is done using true solution to set up problem */ 343 PetscCall(KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT)); 344 } else { 345 PetscCall(KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT)); 346 } 347 if (user->ns == 1) { 348 PetscCall(KSPSolve(user->solver,X,Y)); 349 PetscCall(KSPGetIterationNumber(user->solver,&its)); 350 user->ksp_its+=its; 351 } else { 352 for (i=0;i<user->ns;i++) { 353 PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0)); 354 PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0)); 355 PetscCall(KSPSolve(user->solver,user->subq,user->suby)); 356 PetscCall(KSPGetIterationNumber(user->solver,&its)); 357 user->ksp_its+=its; 358 PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0)); 359 } 360 } 361 PetscFunctionReturn(0); 362 } 363 PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y) 364 { 365 AppCtx *user; 366 PetscInt i; 367 368 PetscFunctionBegin; 369 PetscCall(MatShellGetContext(J_shell,&user)); 370 if (user->ns == 1) { 371 PetscCall(MatMult(user->Q,X,Y)); 372 } else { 373 for (i=0;i<user->ns;i++) { 374 PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0)); 375 PetscCall(Scatter(Y,user->subd,user->di_scatter[i],0,0)); 376 PetscCall(MatMult(user->Q,user->subq,user->subd)); 377 PetscCall(Gather(Y,user->subd,user->di_scatter[i],0,0)); 378 } 379 } 380 PetscFunctionReturn(0); 381 } 382 383 PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y) 384 { 385 AppCtx *user; 386 PetscInt i; 387 388 PetscFunctionBegin; 389 PetscCall(MatShellGetContext(J_shell,&user)); 390 if (user->ns == 1) { 391 PetscCall(MatMultTranspose(user->Q,X,Y)); 392 } else { 393 for (i=0;i<user->ns;i++) { 394 PetscCall(Scatter(X,user->subd,user->di_scatter[i],0,0)); 395 PetscCall(Scatter(Y,user->suby,user->yi_scatter[i],0,0)); 396 PetscCall(MatMultTranspose(user->Q,user->subd,user->suby)); 397 PetscCall(Gather(Y,user->suby,user->yi_scatter[i],0,0)); 398 } 399 } 400 PetscFunctionReturn(0); 401 } 402 403 PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y) 404 { 405 PetscInt i; 406 AppCtx *user; 407 408 PetscFunctionBegin; 409 PetscCall(MatShellGetContext(J_shell,&user)); 410 411 /* sdiag(1./v) */ 412 PetscCall(VecSet(user->uwork,0)); 413 PetscCall(VecAXPY(user->uwork,-1.0,user->u)); 414 PetscCall(VecExp(user->uwork)); 415 416 /* sdiag(1./((Av*(1./v)).^2)) */ 417 PetscCall(MatMult(user->Av,user->uwork,user->Swork)); 418 PetscCall(VecPointwiseMult(user->Swork,user->Swork,user->Swork)); 419 PetscCall(VecReciprocal(user->Swork)); 420 421 /* (Av * (sdiag(1./v) * b)) */ 422 PetscCall(VecPointwiseMult(user->uwork,user->uwork,X)); 423 PetscCall(MatMult(user->Av,user->uwork,user->Twork)); 424 425 /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */ 426 PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork)); 427 428 if (user->ns == 1) { 429 /* (sdiag(Grad*y(:,i)) */ 430 PetscCall(MatMult(user->Grad,user->y,user->Twork)); 431 432 /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */ 433 PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Swork)); 434 PetscCall(MatMultTranspose(user->Grad,user->Swork,Y)); 435 } else { 436 for (i=0;i<user->ns;i++) { 437 PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0)); 438 PetscCall(Scatter(Y,user->subq,user->yi_scatter[i],0,0)); 439 440 PetscCall(MatMult(user->Grad,user->suby,user->Twork)); 441 PetscCall(VecPointwiseMult(user->Twork,user->Twork,user->Swork)); 442 PetscCall(MatMultTranspose(user->Grad,user->Twork,user->subq)); 443 PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0)); 444 PetscCall(Gather(Y,user->subq,user->yi_scatter[i],0,0)); 445 } 446 } 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y) 451 { 452 PetscInt i; 453 AppCtx *user; 454 455 PetscFunctionBegin; 456 PetscCall(MatShellGetContext(J_shell,&user)); 457 PetscCall(VecZeroEntries(Y)); 458 459 /* Sdiag = 1./((Av*(1./v)).^2) */ 460 PetscCall(VecSet(user->uwork,0)); 461 PetscCall(VecAXPY(user->uwork,-1.0,user->u)); 462 PetscCall(VecExp(user->uwork)); 463 PetscCall(MatMult(user->Av,user->uwork,user->Swork)); 464 PetscCall(VecPointwiseMult(user->Sdiag,user->Swork,user->Swork)); 465 PetscCall(VecReciprocal(user->Sdiag)); 466 467 for (i=0;i<user->ns;i++) { 468 PetscCall(Scatter(X,user->subq,user->yi_scatter[i],0,0)); 469 PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0)); 470 471 /* Swork = (Div' * b(:,i)) */ 472 PetscCall(MatMult(user->Grad,user->subq,user->Swork)); 473 474 /* Twork = Grad*y(:,i) */ 475 PetscCall(MatMult(user->Grad,user->suby,user->Twork)); 476 477 /* Twork = sdiag(Twork) * Swork */ 478 PetscCall(VecPointwiseMult(user->Twork,user->Swork,user->Twork)); 479 480 /* Swork = pointwisemult(Sdiag,Twork) */ 481 PetscCall(VecPointwiseMult(user->Swork,user->Twork,user->Sdiag)); 482 483 /* Ywork = Av' * Swork */ 484 PetscCall(MatMultTranspose(user->Av,user->Swork,user->Ywork)); 485 486 /* Ywork = pointwisemult(uwork,Ywork) */ 487 PetscCall(VecPointwiseMult(user->Ywork,user->uwork,user->Ywork)); 488 PetscCall(VecAXPY(Y,1.0,user->Ywork)); 489 PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0)); 490 } 491 PetscFunctionReturn(0); 492 } 493 494 PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr) 495 { 496 /* C=Ay - q A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */ 497 PetscReal sum; 498 PetscInt i; 499 AppCtx *user = (AppCtx*)ptr; 500 501 PetscFunctionBegin; 502 PetscCall(Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter)); 503 if (user->ns == 1) { 504 PetscCall(MatMult(user->Grad,user->y,user->Swork)); 505 PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u)); 506 PetscCall(MatMultTranspose(user->Grad,user->Swork,C)); 507 PetscCall(VecSum(user->y,&sum)); 508 sum /= user->ndesign; 509 PetscCall(VecShift(C,sum)); 510 } else { 511 for (i=0;i<user->ns;i++) { 512 PetscCall(Scatter(user->y,user->suby,user->yi_scatter[i],0,0)); 513 PetscCall(Scatter(C,user->subq,user->yi_scatter[i],0,0)); 514 PetscCall(MatMult(user->Grad,user->suby,user->Swork)); 515 PetscCall(VecPointwiseDivide(user->Swork,user->Swork,user->Av_u)); 516 PetscCall(MatMultTranspose(user->Grad,user->Swork,user->subq)); 517 518 PetscCall(VecSum(user->suby,&sum)); 519 sum /= user->ndesign; 520 PetscCall(VecShift(user->subq,sum)); 521 522 PetscCall(Gather(user->y,user->suby,user->yi_scatter[i],0,0)); 523 PetscCall(Gather(C,user->subq,user->yi_scatter[i],0,0)); 524 } 525 } 526 PetscCall(VecAXPY(C,-1.0,user->q)); 527 PetscFunctionReturn(0); 528 } 529 530 PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2) 531 { 532 PetscFunctionBegin; 533 PetscCall(VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD)); 534 PetscCall(VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD)); 535 if (sub2) { 536 PetscCall(VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD)); 537 PetscCall(VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD)); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2) 543 { 544 PetscFunctionBegin; 545 PetscCall(VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE)); 546 PetscCall(VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE)); 547 if (sub2) { 548 PetscCall(VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE)); 549 PetscCall(VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE)); 550 } 551 PetscFunctionReturn(0); 552 } 553 554 PetscErrorCode EllipticInitialize(AppCtx *user) 555 { 556 PetscInt m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock; 557 Vec XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork; 558 PetscReal *x,*y,*z; 559 PetscReal h,meanut; 560 PetscScalar hinv,neg_hinv,half = 0.5,sqrt_beta; 561 PetscInt im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz; 562 IS is_alldesign,is_allstate; 563 IS is_from_d; 564 IS is_from_y; 565 PetscInt lo,hi,hi2,lo2,ysubnlocal,dsubnlocal; 566 const PetscInt *ranges, *subranges; 567 PetscMPIInt size; 568 PetscReal xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz; 569 PetscScalar v,vx,vy,vz; 570 PetscInt offset,subindex,subvec,nrank,kk; 571 572 PetscScalar xr[64] = {0.4970, 0.8498, 0.7814, 0.6268, 0.7782, 0.6402, 0.3617, 0.3160, 573 0.3610, 0.5298, 0.6987, 0.3331, 0.7962, 0.5596, 0.3866, 0.6774, 574 0.5407, 0.4518, 0.6702, 0.6061, 0.7580, 0.8997, 0.5198, 0.8326, 575 0.2138, 0.9198, 0.3000, 0.2833, 0.8288, 0.7076, 0.1820, 0.0728, 576 0.8447, 0.2367, 0.3239, 0.6413, 0.3114, 0.4731, 0.1192, 0.9273, 577 0.5724, 0.4331, 0.5136, 0.3547, 0.4413, 0.2602, 0.5698, 0.7278, 578 0.5261, 0.6230, 0.2454, 0.3948, 0.7479, 0.6582, 0.4660, 0.5594, 579 0.7574, 0.1143, 0.5900, 0.1065, 0.4260, 0.3294, 0.8276, 0.0756}; 580 581 PetscScalar yr[64] = {0.7345, 0.9120, 0.9288, 0.7528, 0.4463, 0.4985, 0.2497, 0.6256, 582 0.3425, 0.9026, 0.6983, 0.4230, 0.7140, 0.2970, 0.4474, 0.8792, 583 0.6604, 0.2485, 0.7968, 0.6127, 0.1796, 0.2437, 0.5938, 0.6137, 584 0.3867, 0.5658, 0.4575, 0.1009, 0.0863, 0.3361, 0.0738, 0.3985, 585 0.6602, 0.1437, 0.0934, 0.5983, 0.5950, 0.0763, 0.0768, 0.2288, 586 0.5761, 0.1129, 0.3841, 0.6150, 0.6904, 0.6686, 0.1361, 0.4601, 587 0.4491, 0.3716, 0.1969, 0.6537, 0.6743, 0.6991, 0.4811, 0.5480, 588 0.1684, 0.4569, 0.6889, 0.8437, 0.3015, 0.2854, 0.8199, 0.2658}; 589 590 PetscScalar zr[64] = {0.7668, 0.8573, 0.2654, 0.2719, 0.1060, 0.1311, 0.6232, 0.2295, 591 0.8009, 0.2147, 0.2119, 0.9325, 0.4473, 0.3600, 0.3374, 0.3819, 592 0.4066, 0.5801, 0.1673, 0.0959, 0.4638, 0.8236, 0.8800, 0.2939, 593 0.2028, 0.8262, 0.2706, 0.6276, 0.9085, 0.6443, 0.8241, 0.0712, 594 0.1824, 0.7789, 0.4389, 0.8415, 0.7055, 0.6639, 0.3653, 0.2078, 595 0.1987, 0.2297, 0.4321, 0.8115, 0.4915, 0.7764, 0.4657, 0.4627, 596 0.4569, 0.4232, 0.8514, 0.0674, 0.3227, 0.1055, 0.6690, 0.6313, 597 0.9226, 0.5461, 0.4126, 0.2364, 0.6096, 0.7042, 0.3914, 0.0711}; 598 599 PetscFunctionBegin; 600 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 601 PetscCall(PetscLogStageRegister("Elliptic Setup",&user->stages[0])); 602 PetscCall(PetscLogStagePush(user->stages[0])); 603 604 /* Create u,y,c,x */ 605 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->u)); 606 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->y)); 607 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->c)); 608 PetscCall(VecSetSizes(user->u,PETSC_DECIDE,user->ndesign)); 609 PetscCall(VecSetFromOptions(user->u)); 610 PetscCall(VecGetLocalSize(user->u,&ysubnlocal)); 611 PetscCall(VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate)); 612 PetscCall(VecSetSizes(user->c,ysubnlocal*user->ns,user->m)); 613 PetscCall(VecSetFromOptions(user->y)); 614 PetscCall(VecSetFromOptions(user->c)); 615 616 /* 617 ******************************* 618 Create scatters for x <-> y,u 619 ******************************* 620 621 If the state vector y and design vector u are partitioned as 622 [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors), 623 then the solution vector x is organized as 624 [y_1; u_1; y_2; u_2; ...; y_np; u_np]. 625 The index sets user->s_is and user->d_is correspond to the indices of the 626 state and design variables owned by the current processor. 627 */ 628 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->x)); 629 630 PetscCall(VecGetOwnershipRange(user->y,&lo,&hi)); 631 PetscCall(VecGetOwnershipRange(user->u,&lo2,&hi2)); 632 633 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate)); 634 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is)); 635 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign)); 636 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is)); 637 638 PetscCall(VecSetSizes(user->x,hi-lo+hi2-lo2,user->n)); 639 PetscCall(VecSetFromOptions(user->x)); 640 641 PetscCall(VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter)); 642 PetscCall(VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter)); 643 PetscCall(ISDestroy(&is_alldesign)); 644 PetscCall(ISDestroy(&is_allstate)); 645 /* 646 ******************************* 647 Create scatter from y to y_1,y_2,...,y_ns 648 ******************************* 649 */ 650 PetscCall(PetscMalloc1(user->ns,&user->yi_scatter)); 651 PetscCall(VecDuplicate(user->u,&user->suby)); 652 PetscCall(VecDuplicate(user->u,&user->subq)); 653 654 PetscCall(VecGetOwnershipRange(user->y,&lo2,&hi2)); 655 istart = 0; 656 for (i=0; i<user->ns; i++) { 657 PetscCall(VecGetOwnershipRange(user->suby,&lo,&hi)); 658 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y)); 659 PetscCall(VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i])); 660 istart = istart + hi-lo; 661 PetscCall(ISDestroy(&is_from_y)); 662 } 663 /* 664 ******************************* 665 Create scatter from d to d_1,d_2,...,d_ns 666 ******************************* 667 */ 668 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->subd)); 669 PetscCall(VecSetSizes(user->subd,PETSC_DECIDE,user->ndata)); 670 PetscCall(VecSetFromOptions(user->subd)); 671 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->d)); 672 PetscCall(VecGetLocalSize(user->subd,&dsubnlocal)); 673 PetscCall(VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns)); 674 PetscCall(VecSetFromOptions(user->d)); 675 PetscCall(PetscMalloc1(user->ns,&user->di_scatter)); 676 677 PetscCall(VecGetOwnershipRange(user->d,&lo2,&hi2)); 678 istart = 0; 679 for (i=0; i<user->ns; i++) { 680 PetscCall(VecGetOwnershipRange(user->subd,&lo,&hi)); 681 PetscCall(ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d)); 682 PetscCall(VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i])); 683 istart = istart + hi-lo; 684 PetscCall(ISDestroy(&is_from_d)); 685 } 686 687 PetscCall(PetscMalloc1(user->mx,&x)); 688 PetscCall(PetscMalloc1(user->mx,&y)); 689 PetscCall(PetscMalloc1(user->mx,&z)); 690 691 user->ksp_its = 0; 692 user->ksp_its_initial = 0; 693 694 n = user->mx * user->mx * user->mx; 695 m = 3 * user->mx * user->mx * (user->mx-1); 696 sqrt_beta = PetscSqrtScalar(user->beta); 697 698 PetscCall(VecCreate(PETSC_COMM_WORLD,&XX)); 699 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->q)); 700 PetscCall(VecSetSizes(XX,ysubnlocal,n)); 701 PetscCall(VecSetSizes(user->q,ysubnlocal*user->ns,user->m)); 702 PetscCall(VecSetFromOptions(XX)); 703 PetscCall(VecSetFromOptions(user->q)); 704 705 PetscCall(VecDuplicate(XX,&YY)); 706 PetscCall(VecDuplicate(XX,&ZZ)); 707 PetscCall(VecDuplicate(XX,&XXwork)); 708 PetscCall(VecDuplicate(XX,&YYwork)); 709 PetscCall(VecDuplicate(XX,&ZZwork)); 710 PetscCall(VecDuplicate(XX,&UTwork)); 711 PetscCall(VecDuplicate(XX,&user->utrue)); 712 713 /* map for striding q */ 714 PetscCall(VecGetOwnershipRanges(user->q,&ranges)); 715 PetscCall(VecGetOwnershipRanges(user->u,&subranges)); 716 717 PetscCall(VecGetOwnershipRange(user->q,&lo2,&hi2)); 718 PetscCall(VecGetOwnershipRange(user->u,&lo,&hi)); 719 /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */ 720 h = 1.0/user->mx; 721 hinv = user->mx; 722 neg_hinv = -hinv; 723 724 PetscCall(VecGetOwnershipRange(XX,&istart,&iend)); 725 for (linear_index=istart; linear_index<iend; linear_index++) { 726 i = linear_index % user->mx; 727 j = ((linear_index-i)/user->mx) % user->mx; 728 k = ((linear_index-i)/user->mx-j) / user->mx; 729 vx = h*(i+0.5); 730 vy = h*(j+0.5); 731 vz = h*(k+0.5); 732 PetscCall(VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES)); 733 PetscCall(VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES)); 734 PetscCall(VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES)); 735 for (is=0; is<2; is++) { 736 for (js=0; js<2; js++) { 737 for (ks=0; ks<2; ks++) { 738 ls = is*4 + js*2 + ks; 739 if (ls<user->ns) { 740 l =ls*n + linear_index; 741 /* remap */ 742 subindex = l%n; 743 subvec = l/n; 744 nrank=0; 745 while (subindex >= subranges[nrank+1]) nrank++; 746 offset = subindex - subranges[nrank]; 747 istart=0; 748 for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]); 749 istart += (subranges[nrank+1]-subranges[nrank])*subvec; 750 l = istart+offset; 751 v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks)); 752 PetscCall(VecSetValues(user->q,1,&l,&v,INSERT_VALUES)); 753 } 754 } 755 } 756 } 757 } 758 759 PetscCall(VecAssemblyBegin(XX)); 760 PetscCall(VecAssemblyEnd(XX)); 761 PetscCall(VecAssemblyBegin(YY)); 762 PetscCall(VecAssemblyEnd(YY)); 763 PetscCall(VecAssemblyBegin(ZZ)); 764 PetscCall(VecAssemblyEnd(ZZ)); 765 PetscCall(VecAssemblyBegin(user->q)); 766 PetscCall(VecAssemblyEnd(user->q)); 767 768 /* Compute true parameter function 769 ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */ 770 PetscCall(VecCopy(XX,XXwork)); 771 PetscCall(VecCopy(YY,YYwork)); 772 PetscCall(VecCopy(ZZ,ZZwork)); 773 774 PetscCall(VecShift(XXwork,-0.25)); 775 PetscCall(VecShift(YYwork,-0.25)); 776 PetscCall(VecShift(ZZwork,-0.25)); 777 778 PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork)); 779 PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork)); 780 PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork)); 781 782 PetscCall(VecCopy(XXwork,UTwork)); 783 PetscCall(VecAXPY(UTwork,1.0,YYwork)); 784 PetscCall(VecAXPY(UTwork,1.0,ZZwork)); 785 PetscCall(VecScale(UTwork,-20.0)); 786 PetscCall(VecExp(UTwork)); 787 PetscCall(VecCopy(UTwork,user->utrue)); 788 789 PetscCall(VecCopy(XX,XXwork)); 790 PetscCall(VecCopy(YY,YYwork)); 791 PetscCall(VecCopy(ZZ,ZZwork)); 792 793 PetscCall(VecShift(XXwork,-0.75)); 794 PetscCall(VecShift(YYwork,-0.75)); 795 PetscCall(VecShift(ZZwork,-0.75)); 796 797 PetscCall(VecPointwiseMult(XXwork,XXwork,XXwork)); 798 PetscCall(VecPointwiseMult(YYwork,YYwork,YYwork)); 799 PetscCall(VecPointwiseMult(ZZwork,ZZwork,ZZwork)); 800 801 PetscCall(VecCopy(XXwork,UTwork)); 802 PetscCall(VecAXPY(UTwork,1.0,YYwork)); 803 PetscCall(VecAXPY(UTwork,1.0,ZZwork)); 804 PetscCall(VecScale(UTwork,-20.0)); 805 PetscCall(VecExp(UTwork)); 806 807 PetscCall(VecAXPY(user->utrue,-1.0,UTwork)); 808 809 PetscCall(VecDestroy(&XX)); 810 PetscCall(VecDestroy(&YY)); 811 PetscCall(VecDestroy(&ZZ)); 812 PetscCall(VecDestroy(&XXwork)); 813 PetscCall(VecDestroy(&YYwork)); 814 PetscCall(VecDestroy(&ZZwork)); 815 PetscCall(VecDestroy(&UTwork)); 816 817 /* Initial guess and reference model */ 818 PetscCall(VecDuplicate(user->utrue,&user->ur)); 819 PetscCall(VecSum(user->utrue,&meanut)); 820 meanut = meanut / n; 821 PetscCall(VecSet(user->ur,meanut)); 822 PetscCall(VecCopy(user->ur,user->u)); 823 824 /* Generate Grad matrix */ 825 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Grad)); 826 PetscCall(MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n)); 827 PetscCall(MatSetFromOptions(user->Grad)); 828 PetscCall(MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL)); 829 PetscCall(MatSeqAIJSetPreallocation(user->Grad,2,NULL)); 830 PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend)); 831 832 for (i=istart; i<iend; i++) { 833 if (i<m/3) { 834 iblock = i / (user->mx-1); 835 j = iblock*user->mx + (i % (user->mx-1)); 836 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 837 j = j+1; 838 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES)); 839 } 840 if (i>=m/3 && i<2*m/3) { 841 iblock = (i-m/3) / (user->mx*(user->mx-1)); 842 j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1))); 843 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 844 j = j + user->mx; 845 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES)); 846 } 847 if (i>=2*m/3) { 848 j = i-2*m/3; 849 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 850 j = j + user->mx*user->mx; 851 PetscCall(MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES)); 852 } 853 } 854 855 PetscCall(MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY)); 856 PetscCall(MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY)); 857 858 /* Generate arithmetic averaging matrix Av */ 859 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Av)); 860 PetscCall(MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n)); 861 PetscCall(MatSetFromOptions(user->Av)); 862 PetscCall(MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL)); 863 PetscCall(MatSeqAIJSetPreallocation(user->Av,2,NULL)); 864 PetscCall(MatGetOwnershipRange(user->Av,&istart,&iend)); 865 866 for (i=istart; i<iend; i++) { 867 if (i<m/3) { 868 iblock = i / (user->mx-1); 869 j = iblock*user->mx + (i % (user->mx-1)); 870 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 871 j = j+1; 872 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 873 } 874 if (i>=m/3 && i<2*m/3) { 875 iblock = (i-m/3) / (user->mx*(user->mx-1)); 876 j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1))); 877 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 878 j = j + user->mx; 879 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 880 } 881 if (i>=2*m/3) { 882 j = i-2*m/3; 883 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 884 j = j + user->mx*user->mx; 885 PetscCall(MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES)); 886 } 887 } 888 889 PetscCall(MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY)); 890 PetscCall(MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY)); 891 892 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->L)); 893 PetscCall(MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n)); 894 PetscCall(MatSetFromOptions(user->L)); 895 PetscCall(MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL)); 896 PetscCall(MatSeqAIJSetPreallocation(user->L,2,NULL)); 897 PetscCall(MatGetOwnershipRange(user->L,&istart,&iend)); 898 899 for (i=istart; i<iend; i++) { 900 if (i<m/3) { 901 iblock = i / (user->mx-1); 902 j = iblock*user->mx + (i % (user->mx-1)); 903 PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 904 j = j+1; 905 PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES)); 906 } 907 if (i>=m/3 && i<2*m/3) { 908 iblock = (i-m/3) / (user->mx*(user->mx-1)); 909 j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1))); 910 PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 911 j = j + user->mx; 912 PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES)); 913 } 914 if (i>=2*m/3 && i<m) { 915 j = i-2*m/3; 916 PetscCall(MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES)); 917 j = j + user->mx*user->mx; 918 PetscCall(MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES)); 919 } 920 if (i>=m) { 921 j = i - m; 922 PetscCall(MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES)); 923 } 924 } 925 PetscCall(MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY)); 927 PetscCall(MatScale(user->L,PetscPowScalar(h,1.5))); 928 929 /* Generate Div matrix */ 930 if (!user->use_ptap) { 931 /* Generate Div matrix */ 932 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Div)); 933 PetscCall(MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m)); 934 PetscCall(MatSetFromOptions(user->Div)); 935 PetscCall(MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL)); 936 PetscCall(MatSeqAIJSetPreallocation(user->Div,6,NULL)); 937 PetscCall(MatGetOwnershipRange(user->Grad,&istart,&iend)); 938 939 for (i=istart; i<iend; i++) { 940 if (i<m/3) { 941 iblock = i / (user->mx-1); 942 j = iblock*user->mx + (i % (user->mx-1)); 943 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES)); 944 j = j+1; 945 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES)); 946 } 947 if (i>=m/3 && i<2*m/3) { 948 iblock = (i-m/3) / (user->mx*(user->mx-1)); 949 j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1))); 950 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES)); 951 j = j + user->mx; 952 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES)); 953 } 954 if (i>=2*m/3) { 955 j = i-2*m/3; 956 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES)); 957 j = j + user->mx*user->mx; 958 PetscCall(MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES)); 959 } 960 } 961 962 PetscCall(MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY)); 963 PetscCall(MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY)); 964 PetscCall(MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork)); 965 } else { 966 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Diag)); 967 PetscCall(MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m)); 968 PetscCall(MatSetFromOptions(user->Diag)); 969 PetscCall(MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL)); 970 PetscCall(MatSeqAIJSetPreallocation(user->Diag,1,NULL)); 971 } 972 973 /* Build work vectors and matrices */ 974 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->S)); 975 PetscCall(VecSetSizes(user->S, PETSC_DECIDE, m)); 976 PetscCall(VecSetFromOptions(user->S)); 977 978 PetscCall(VecCreate(PETSC_COMM_WORLD,&user->lwork)); 979 PetscCall(VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx)); 980 PetscCall(VecSetFromOptions(user->lwork)); 981 982 PetscCall(MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork)); 983 984 PetscCall(VecDuplicate(user->S,&user->Swork)); 985 PetscCall(VecDuplicate(user->S,&user->Sdiag)); 986 PetscCall(VecDuplicate(user->S,&user->Av_u)); 987 PetscCall(VecDuplicate(user->S,&user->Twork)); 988 PetscCall(VecDuplicate(user->y,&user->ywork)); 989 PetscCall(VecDuplicate(user->u,&user->Ywork)); 990 PetscCall(VecDuplicate(user->u,&user->uwork)); 991 PetscCall(VecDuplicate(user->u,&user->js_diag)); 992 PetscCall(VecDuplicate(user->c,&user->cwork)); 993 PetscCall(VecDuplicate(user->d,&user->dwork)); 994 995 /* Create a matrix-free shell user->Jd for computing B*x */ 996 PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd)); 997 PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult)); 998 PetscCall(MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose)); 999 1000 /* Compute true state function ytrue given utrue */ 1001 PetscCall(VecDuplicate(user->y,&user->ytrue)); 1002 1003 /* First compute Av_u = Av*exp(-u) */ 1004 PetscCall(VecSet(user->uwork, 0)); 1005 PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); /* Note: user->utrue */ 1006 PetscCall(VecExp(user->uwork)); 1007 PetscCall(MatMult(user->Av,user->uwork,user->Av_u)); 1008 1009 /* Next form DSG = Div*S*Grad */ 1010 PetscCall(VecCopy(user->Av_u,user->Swork)); 1011 PetscCall(VecReciprocal(user->Swork)); 1012 if (user->use_ptap) { 1013 PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES)); 1014 PetscCall(MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG)); 1015 } else { 1016 PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN)); 1017 PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Swork)); 1018 1019 PetscCall(MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG)); 1020 } 1021 1022 PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE)); 1023 PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)); 1024 1025 if (user->use_lrc == PETSC_TRUE) { 1026 v=PetscSqrtReal(1.0 /user->ndesign); 1027 PetscCall(PetscMalloc1(user->ndesign,&user->ones)); 1028 1029 for (i=0;i<user->ndesign;i++) { 1030 user->ones[i]=v; 1031 } 1032 PetscCall(MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones)); 1033 PetscCall(MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY)); 1034 PetscCall(MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY)); 1035 PetscCall(MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock)); 1036 PetscCall(MatSetUp(user->JsBlock)); 1037 } else { 1038 /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */ 1039 PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock)); 1040 PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult)); 1041 PetscCall(MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult)); 1042 } 1043 PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE)); 1044 PetscCall(MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)); 1045 PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js)); 1046 PetscCall(MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult)); 1047 PetscCall(MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult)); 1048 PetscCall(MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE)); 1049 PetscCall(MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)); 1050 1051 PetscCall(MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv)); 1052 PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult)); 1053 PetscCall(MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult)); 1054 PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE)); 1055 PetscCall(MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)); 1056 1057 PetscCall(MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE)); 1058 PetscCall(MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE)); 1059 /* Now solve for ytrue */ 1060 PetscCall(KSPCreate(PETSC_COMM_WORLD,&user->solver)); 1061 PetscCall(KSPSetFromOptions(user->solver)); 1062 1063 PetscCall(KSPSetOperators(user->solver,user->JsBlock,user->DSG)); 1064 1065 PetscCall(MatMult(user->JsInv,user->q,user->ytrue)); 1066 /* First compute Av_u = Av*exp(-u) */ 1067 PetscCall(VecSet(user->uwork,0)); 1068 PetscCall(VecAXPY(user->uwork,-1.0,user->u)); /* Note: user->u */ 1069 PetscCall(VecExp(user->uwork)); 1070 PetscCall(MatMult(user->Av,user->uwork,user->Av_u)); 1071 1072 /* Next update DSG = Div*S*Grad with user->u */ 1073 PetscCall(VecCopy(user->Av_u,user->Swork)); 1074 PetscCall(VecReciprocal(user->Swork)); 1075 if (user->use_ptap) { 1076 PetscCall(MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES)); 1077 PetscCall(MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG)); 1078 } else { 1079 PetscCall(MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN)); 1080 PetscCall(MatDiagonalScale(user->Divwork,NULL,user->Av_u)); 1081 PetscCall(MatProductNumeric(user->DSG)); 1082 } 1083 1084 /* Now solve for y */ 1085 1086 PetscCall(MatMult(user->JsInv,user->q,user->y)); 1087 1088 user->ksp_its_initial = user->ksp_its; 1089 user->ksp_its = 0; 1090 /* Construct projection matrix Q (blocks) */ 1091 PetscCall(MatCreate(PETSC_COMM_WORLD,&user->Q)); 1092 PetscCall(MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign)); 1093 PetscCall(MatSetFromOptions(user->Q)); 1094 PetscCall(MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL)); 1095 PetscCall(MatSeqAIJSetPreallocation(user->Q,8,NULL)); 1096 1097 for (i=0; i<user->mx; i++) { 1098 x[i] = h*(i+0.5); 1099 y[i] = h*(i+0.5); 1100 z[i] = h*(i+0.5); 1101 } 1102 PetscCall(MatGetOwnershipRange(user->Q,&istart,&iend)); 1103 1104 nx = user->mx; ny = user->mx; nz = user->mx; 1105 for (i=istart; i<iend; i++) { 1106 1107 xri = xr[i]; 1108 im = 0; 1109 xim = x[im]; 1110 while (xri>xim && im<nx) { 1111 im = im+1; 1112 xim = x[im]; 1113 } 1114 indx1 = im-1; 1115 indx2 = im; 1116 dx1 = xri - x[indx1]; 1117 dx2 = x[indx2] - xri; 1118 1119 yri = yr[i]; 1120 im = 0; 1121 yim = y[im]; 1122 while (yri>yim && im<ny) { 1123 im = im+1; 1124 yim = y[im]; 1125 } 1126 indy1 = im-1; 1127 indy2 = im; 1128 dy1 = yri - y[indy1]; 1129 dy2 = y[indy2] - yri; 1130 1131 zri = zr[i]; 1132 im = 0; 1133 zim = z[im]; 1134 while (zri>zim && im<nz) { 1135 im = im+1; 1136 zim = z[im]; 1137 } 1138 indz1 = im-1; 1139 indz2 = im; 1140 dz1 = zri - z[indz1]; 1141 dz2 = z[indz2] - zri; 1142 1143 Dx = x[indx2] - x[indx1]; 1144 Dy = y[indy2] - y[indy1]; 1145 Dz = z[indz2] - z[indz1]; 1146 1147 j = indx1 + indy1*nx + indz1*nx*ny; 1148 v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz); 1149 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1150 1151 j = indx1 + indy1*nx + indz2*nx*ny; 1152 v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz); 1153 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1154 1155 j = indx1 + indy2*nx + indz1*nx*ny; 1156 v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz); 1157 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1158 1159 j = indx1 + indy2*nx + indz2*nx*ny; 1160 v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz); 1161 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1162 1163 j = indx2 + indy1*nx + indz1*nx*ny; 1164 v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz); 1165 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1166 1167 j = indx2 + indy1*nx + indz2*nx*ny; 1168 v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz); 1169 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1170 1171 j = indx2 + indy2*nx + indz1*nx*ny; 1172 v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz); 1173 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1174 1175 j = indx2 + indy2*nx + indz2*nx*ny; 1176 v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz); 1177 PetscCall(MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES)); 1178 } 1179 1180 PetscCall(MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY)); 1181 PetscCall(MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY)); 1182 /* Create MQ (composed of blocks of Q */ 1183 PetscCall(MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ)); 1184 PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult)); 1185 PetscCall(MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose)); 1186 1187 /* Add noise to the measurement data */ 1188 PetscCall(VecSet(user->ywork,1.0)); 1189 PetscCall(VecAYPX(user->ywork,user->noise,user->ytrue)); 1190 PetscCall(MatMult(user->MQ,user->ywork,user->d)); 1191 1192 /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */ 1193 PetscCall(PetscFree(x)); 1194 PetscCall(PetscFree(y)); 1195 PetscCall(PetscFree(z)); 1196 PetscCall(PetscLogStagePop()); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 PetscErrorCode EllipticDestroy(AppCtx *user) 1201 { 1202 PetscInt i; 1203 1204 PetscFunctionBegin; 1205 PetscCall(MatDestroy(&user->DSG)); 1206 PetscCall(KSPDestroy(&user->solver)); 1207 PetscCall(MatDestroy(&user->Q)); 1208 PetscCall(MatDestroy(&user->MQ)); 1209 if (!user->use_ptap) { 1210 PetscCall(MatDestroy(&user->Div)); 1211 PetscCall(MatDestroy(&user->Divwork)); 1212 } else { 1213 PetscCall(MatDestroy(&user->Diag)); 1214 } 1215 if (user->use_lrc) { 1216 PetscCall(MatDestroy(&user->Ones)); 1217 } 1218 1219 PetscCall(MatDestroy(&user->Grad)); 1220 PetscCall(MatDestroy(&user->Av)); 1221 PetscCall(MatDestroy(&user->Avwork)); 1222 PetscCall(MatDestroy(&user->L)); 1223 PetscCall(MatDestroy(&user->Js)); 1224 PetscCall(MatDestroy(&user->Jd)); 1225 PetscCall(MatDestroy(&user->JsBlock)); 1226 PetscCall(MatDestroy(&user->JsInv)); 1227 1228 PetscCall(VecDestroy(&user->x)); 1229 PetscCall(VecDestroy(&user->u)); 1230 PetscCall(VecDestroy(&user->uwork)); 1231 PetscCall(VecDestroy(&user->utrue)); 1232 PetscCall(VecDestroy(&user->y)); 1233 PetscCall(VecDestroy(&user->ywork)); 1234 PetscCall(VecDestroy(&user->ytrue)); 1235 PetscCall(VecDestroy(&user->c)); 1236 PetscCall(VecDestroy(&user->cwork)); 1237 PetscCall(VecDestroy(&user->ur)); 1238 PetscCall(VecDestroy(&user->q)); 1239 PetscCall(VecDestroy(&user->d)); 1240 PetscCall(VecDestroy(&user->dwork)); 1241 PetscCall(VecDestroy(&user->lwork)); 1242 PetscCall(VecDestroy(&user->S)); 1243 PetscCall(VecDestroy(&user->Swork)); 1244 PetscCall(VecDestroy(&user->Sdiag)); 1245 PetscCall(VecDestroy(&user->Ywork)); 1246 PetscCall(VecDestroy(&user->Twork)); 1247 PetscCall(VecDestroy(&user->Av_u)); 1248 PetscCall(VecDestroy(&user->js_diag)); 1249 PetscCall(ISDestroy(&user->s_is)); 1250 PetscCall(ISDestroy(&user->d_is)); 1251 PetscCall(VecDestroy(&user->suby)); 1252 PetscCall(VecDestroy(&user->subd)); 1253 PetscCall(VecDestroy(&user->subq)); 1254 PetscCall(VecScatterDestroy(&user->state_scatter)); 1255 PetscCall(VecScatterDestroy(&user->design_scatter)); 1256 for (i=0;i<user->ns;i++) { 1257 PetscCall(VecScatterDestroy(&user->yi_scatter[i])); 1258 PetscCall(VecScatterDestroy(&user->di_scatter[i])); 1259 } 1260 PetscCall(PetscFree(user->yi_scatter)); 1261 PetscCall(PetscFree(user->di_scatter)); 1262 if (user->use_lrc) { 1263 PetscCall(PetscFree(user->ones)); 1264 PetscCall(MatDestroy(&user->Ones)); 1265 } 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode EllipticMonitor(Tao tao, void *ptr) 1270 { 1271 Vec X; 1272 PetscReal unorm,ynorm; 1273 AppCtx *user = (AppCtx*)ptr; 1274 1275 PetscFunctionBegin; 1276 PetscCall(TaoGetSolution(tao,&X)); 1277 PetscCall(Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter)); 1278 PetscCall(VecAXPY(user->ywork,-1.0,user->ytrue)); 1279 PetscCall(VecAXPY(user->uwork,-1.0,user->utrue)); 1280 PetscCall(VecNorm(user->uwork,NORM_2,&unorm)); 1281 PetscCall(VecNorm(user->ywork,NORM_2,&ynorm)); 1282 PetscCall(PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm)); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 /*TEST 1287 1288 build: 1289 requires: !complex 1290 1291 test: 1292 args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11 1293 requires: !single 1294 1295 test: 1296 suffix: 2 1297 args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3 1298 requires: !single 1299 1300 TEST*/ 1301