1 #ifdef HAVE_PETSC
2 #include <stdio.h>
3
4 #include "petscsys.h"
5 #include "petscvec.h"
6 #include "petscmat.h"
7 #include "petscpc.h"
8 #include "petscksp.h"
9
10 #include "assert.h"
11 #include "common_c.h"
12
13 #include <FCMangle.h>
14 #define SolGMRp FortranCInterface_GLOBAL_(solgmrp,SOLGMRP)
15 #define SolGMRpSclr FortranCInterface_GLOBAL_(solgmrpsclr,SOLGMRPSCLR)
16 #define ElmGMRPETSc FortranCInterface_GLOBAL_(elmgmrpetsc, ELMGMRPETSC)
17 #define ElmGMRPETScSclr FortranCInterface_GLOBAL_(elmgmrpetscsclr, ELMGMRPETSCSCLR)
18 #define rstatp FortranCInterface_GLOBAL_(rstatp, RSTATP)
19 #define rstatpSclr FortranCInterface_GLOBAL_(rstatpsclr, RSTATPSCLR)
20 #define min(a,b) (((a) < (b)) ? (a) : (b))
21 #define max(a,b) (((a) > (b)) ? (a) : (b))
22 #define get_time FortranCInterface_GLOBAL_(get_time,GET_TIME)
23 #define get_max_time_diff FortranCInterface_GLOBAL_(get_max_time_diff,GET_MAX_TIME_DIFF)
24
25 typedef long long int gcorp_t;
26
27 void get_time(uint64_t* rv, uint64_t* cycle);
28 void get_max_time_diff(uint64_t* first, uint64_t* last, uint64_t* c_first, uint64_t* c_last, char* lbl);
29
30 //#include "auxmpi.h"
31 // static PetscOffset poff;
32
33 static Mat lhsP;
34 static PC pc;
35 static KSP ksp;
36 static Vec DyP, resP, DyPLocal;
37 static PetscErrorCode ierr;
38 static PetscInt PetscOne, PetscRow, PetscCol, LocalRow, LocalCol;
39 static IS LocalIndexSet;
40 static ISLocalToGlobalMapping VectorMapping;
41 static VecScatter scatter7;
42 static int firstpetsccall = 1;
43 static PetscInt maxitsHist;
44 static PetscReal* resHist;
45
46 static Mat lhsPs;
47 static PC pcs;
48 static KSP ksps;
49 static Vec DyPs, resPs, DyPLocals;
50 static IS LocalIndexSets;
51 static ISLocalToGlobalMapping VectorMappings;
52 static VecScatter scatter7s;
53 static int firstpetsccalls = 1;
54
55 static int rankdump=-1; // 8121 was the problem rank with 3.5.3
56 PetscReal resNrm;
57
58
SolGMRp(double * y,double * ac,double * yold,double * x,int * iBC,double * BC,int * col,int * row,double * lhsk,double * res,double * BDiag,int * iper,int * ilwork,double * shp,double * shgl,double * shpb,double * shglb,double * Dy,double * rerr,long long int * fncorp)59 void SolGMRp(double* y, double* ac, double* yold,
60 double* x, int* iBC, double* BC,
61 int* col, int* row, double* lhsk,
62 double* res, double* BDiag,
63 int* iper,
64 int* ilwork, double* shp, double* shgl, double* shpb,
65 double* shglb, double* Dy, double* rerr, long long int* fncorp)
66 {
67 //
68 // ----------------------------------------------------------------------
69 //
70 // This is the preconditioned GMRES driver routine.
71 //
72 // input:
73 // y (nshg,ndof) : Y-variables at n+alpha_v
74 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m
75 // yold (nshg,ndof) : Y-variables at beginning of step
76 // acold (nshg,ndof) : Primvar. accel. variable at begng step
77 // x (numnp,nsd) : node coordinates
78 // iBC (nshg) : BC codes
79 // BC (nshg,ndofBC) : BC constraint parameters
80 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary)
81 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
82 //
83 // output:
84 // res (nshg,nflow) : preconditioned residual
85 // BDiag (nshg,nflow,nflow) : block-diagonal preconditioner
86 //
87 //
88 // Zdenek Johan, Winter 1991. (Fortran 90)
89 // ----------------------------------------------------------------------
90 //
91
92
93 // Get variables from common_c.h
94 int nshg, nflow, nsd, iownnodes;
95 nshg = conpar.nshg;
96 nflow = conpar.nflow;
97 nsd = NSD;
98 iownnodes = conpar.iownnodes;
99
100 gcorp_t nshgt;
101 gcorp_t mbeg;
102 gcorp_t mend;
103 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
104 mbeg = (gcorp_t) newdim.minowned;
105 mend = (gcorp_t) newdim.maxowned;
106
107
108 int node, element, var, eqn;
109 double valtoinsert;
110 int nenl, iel, lelCat, lcsyst, iorder, nshl;
111 int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
112 double DyFlat[nshg*nflow];
113 double DyFlatPhasta[nshg*nflow];
114 double rmes[nshg*nflow];
115 // DEBUG
116 int i,j,k,l,m;
117
118 // FIXME: PetscScalar
119 double real_rtol, real_abstol, real_dtol;
120 // /DEBUG
121 //double parray[1]; // Should be a PetscScalar
122 double *parray; // Should be a PetscScalar
123 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
124 PetscInt petsc_n;
125 PetscOne = 1;
126 uint64_t duration[4];
127
128 gcorp_t glbNZ;
129
130 if(firstpetsccall == 1) {
131 //Everthing in this conditional block should be moved to a function to estimate the size of PETSc's matrix which improves time on the first matrix fill
132 //
133 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
134 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
135 for(i=0;i<iownnodes;i++) {
136 idiagnz[i]=0;
137 iodiagnz[i]=0;
138 }
139 i=0;
140 for(k=0;k<nshg;k++) {
141 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
142 // this node is not owned by this rank so we skip
143 } else {
144 for(j=col[i]-1;j<col[i+1]-1;j++) {
145 // assert(row[j]<=nshg);
146 // assert(fncorp[row[j]-1]<=nshgt);
147 glbNZ=fncorp[row[j]-1];
148 if((glbNZ < mbeg) || (glbNZ > mend)) {
149 iodiagnz[i]++;
150 } else {
151 idiagnz[i]++;
152 }
153 }
154 i++;
155 }
156 }
157 gcorp_t mind=1000;
158 gcorp_t mino=1000;
159 gcorp_t maxd=0;
160 gcorp_t maxo=0;
161 for(i=0;i<iownnodes;i++) {
162 mind=min(mind,idiagnz[i]);
163 mino=min(mino,iodiagnz[i]);
164 maxd=max(maxd,idiagnz[i]);
165 maxo=max(maxo,iodiagnz[i]);
166 // iodiagnz[i]=max(iodiagnz[i],10);
167 // idiagnz[i]=max(idiagnz[i],10);
168 // iodiagnz[i]=2*iodiagnz[i]; // estimate a bit higher for off-part interactions
169 // idiagnz[i]=2*idiagnz[i]; // estimate a bit higher for off-part interactions
170 }
171 // the above was pretty good but below is faster and not too much more memory...of course once you do this
172 // could just use the constant fill parameters in create but keep it alive for potential later optimization
173
174 for(i=0;i<iownnodes;i++) {
175 iodiagnz[i]=1.3*maxd;
176 idiagnz[i]=1.3*maxd;
177 }
178
179
180
181 if(workfc.numpe < 200){
182 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
183 printf("myrank,mind,maxd,mino,maxo %d %d %d %d %d \n",workfc.myrank,mind,maxd,mino,maxo);
184 }
185 // Print debug info
186 if(nshgt < 200){
187 int irank;
188 for(irank=0;irank<workfc.numpe;irank++) {
189 if(irank == workfc.myrank){
190 printf("mbeg,mend,myrank,idiagnz, iodiagnz %d %d %d \n",mbeg,mend,workfc.myrank);
191 for(i=0;i<iownnodes;i++) {
192 printf("%d %ld %ld \n",workfc.myrank,idiagnz[i],iodiagnz[i]);
193 }
194 }
195 MPI_Barrier(MPI_COMM_WORLD);
196 }
197 }
198 petsc_bs = (PetscInt) nflow;
199 petsc_m = (PetscInt) nflow* (PetscInt) iownnodes;
200 petsc_M = (PetscInt) nshgt * (PetscInt) nflow;
201 petsc_PA = (PetscInt) 40;
202 ierr = MatCreateBAIJ(PETSC_COMM_WORLD, petsc_bs, petsc_m, petsc_m, petsc_M, petsc_M,
203 0, idiagnz, 0, iodiagnz, &lhsP);
204
205 ierr = MatSetOption(lhsP, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
206
207 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
208 #ifdef JEDBROWN
209 ierr = MatSetOption(lhsP, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
210 #endif
211 ierr = MatSetUp(lhsP);
212
213 PetscInt myMatStart, myMatEnd;
214 ierr = MatGetOwnershipRange(lhsP, &myMatStart, &myMatEnd);
215 //debug
216 if(workfc.myrank == rankdump) printf("Flow myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
217 }
218 if(genpar.lhs ==1) ierr = MatZeroEntries(lhsP);
219
220 //
221 // .... *******************>> Element Data Formation <<******************
222 //
223 //
224 // .... set the parameters for flux and surface tension calculations
225 //
226 //
227 genpar.idflx = 0 ;
228 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd;
229 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
230
231 get_time(duration, (duration+1));
232
233 ElmGMRPETSc(y, ac, x,
234 shp, shgl, iBC,
235 BC, shpb,
236 shglb, res,
237 rmes, iper,
238 ilwork, rerr , &lhsP);
239
240 get_time((duration+2), (duration+3));
241 get_max_time_diff((duration), (duration+2),
242 (duration+1), (duration+3),
243 "ElmGMRPETSc \0"); // char(0))
244 if(firstpetsccall == 1) {
245 // Setup IndexSet. For now, we mimic vector insertion procedure
246 // Since we always reference by global indexes this doesn't matter
247 // except for cache performance)
248 // TODO: Better arrangment?
249 PetscInt* indexsetary = malloc(sizeof(PetscInt)*nflow*nshg);
250 PetscInt nodetoinsert;
251 nodetoinsert = 0;
252 k=0;
253 //debug
254 if(workfc.myrank == rankdump) {
255 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
256 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
257 }
258 if(workfc.numpe > 1) {
259 for (i=0; i<nshg ; i++) {
260 nodetoinsert = fncorp[i]-1;
261 //debug
262 if(workfc.myrank == rankdump) {
263 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
264 }
265
266 // assert(fncorp[i]>=0);
267 for (j=1; j<=nflow; j++) {
268 indexsetary[k] = nodetoinsert*nflow+(j-1);
269 assert(indexsetary[k]>=0);
270 // assert(fncorp[i]>=0);
271 k = k+1;
272 }
273 }
274 }
275 else {
276 for (i=0; i<nshg ; i++) {
277 nodetoinsert = i;
278 for (j=1; j<=nflow; j++) {
279 indexsetary[k] = nodetoinsert*nflow+(j-1);
280 k = k+1;
281 }
282 }
283 }
284
285 // Create Vector Index Maps
286 petsc_n = (PetscInt) nshg * (PetscInt) nflow;
287 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetary,
288 PETSC_COPY_VALUES, &LocalIndexSet);
289 free(indexsetary);
290 }
291 if(genpar.lhs == 1) {
292 get_time((duration), (duration+1));
293 ierr = MatAssemblyBegin(lhsP, MAT_FINAL_ASSEMBLY);
294 ierr = MatAssemblyEnd(lhsP, MAT_FINAL_ASSEMBLY);
295 get_time((duration+2),(duration+3));
296 get_max_time_diff((duration), (duration+2),
297 (duration+1), (duration+3),
298 "MatAssembly \0"); // char(0))
299 get_time(duration, (duration+1));
300 }
301 if(firstpetsccall == 1) {
302 ierr = MatGetLocalSize(lhsP, &LocalRow, &LocalCol);
303 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resP);
304 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyP);
305 }
306 ierr = VecZeroEntries(resP);
307 if(firstpetsccall == 1) {
308 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocal);
309 }
310
311 PetscRow=0;
312 k = 0;
313 int index;
314 for (i=0; i<nshg; i++ ){
315 for (j = 1; j<=nflow; j++){
316 index = i + (j-1)*nshg;
317 valtoinsert = res[index];
318 if(workfc.numpe > 1) {
319 PetscRow = (fncorp[i]-1)*nflow+(j-1);
320 }
321 else {
322 PetscRow = i*nflow+(j-1);
323 }
324 assert(fncorp[i]<=nshgt);
325 assert(fncorp[i]>0);
326 assert(PetscRow>=0);
327 assert(PetscRow<=nshgt*nflow);
328 ierr = VecSetValue(resP, PetscRow, valtoinsert, ADD_VALUES);
329 }
330 }
331 ierr = VecAssemblyBegin(resP);
332 ierr = VecAssemblyEnd(resP);
333 ierr = VecNorm(resP,NORM_2,&resNrm);
334 get_time((duration+2), (duration+3));
335 get_max_time_diff((duration), (duration+2),
336 (duration+1), (duration+3),
337 "VectorWorkPre \0"); // char(0))
338
339 get_time((duration),(duration+1));
340
341 if(firstpetsccall == 1) {
342 ierr = KSPCreate(PETSC_COMM_WORLD, &ksp);
343 ierr = KSPSetOperators(ksp, lhsP, lhsP);
344 ierr = KSPGetPC(ksp, &pc);
345 ierr = PCSetType(pc, PCPBJACOBI);
346 PetscInt maxits;
347 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace;
348 ierr = KSPSetTolerances(ksp, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
349 ierr = KSPSetFromOptions(ksp);
350 maxitsHist=1000;
351 resHist= (PetscReal*) malloc(sizeof(PetscReal)*maxitsHist);
352 ierr = KSPSetResidualHistory(ksp,resHist,maxitsHist, PETSC_TRUE);
353 }
354 ierr = KSPSolve(ksp, resP, DyP);
355 ierr = KSPGetResidualHistory(ksp,&resHist,&maxitsHist);
356 PetscReal resNrmP=resHist[0];
357 get_time((duration+2),(duration+3));
358 get_max_time_diff((duration), (duration+2),
359 (duration+1), (duration+3),
360 "KSPSolve \0"); // char(0))
361 get_time((duration),(duration+1));
362 if(firstpetsccall == 1) {
363 ierr = VecScatterCreate(DyP, LocalIndexSet, DyPLocal, NULL, &scatter7);
364 }
365 ierr = VecScatterBegin(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
366 ierr = VecScatterEnd(scatter7, DyP, DyPLocal, INSERT_VALUES, SCATTER_FORWARD);
367 ierr = VecGetArray(DyPLocal, &parray);
368 PetscRow = 0;
369 for ( node = 0; node< nshg; node++) {
370 for (var = 1; var<= nflow; var++) {
371 index = node + (var-1)*nshg;
372 Dy[index] = parray[PetscRow];
373 PetscRow = PetscRow+1;
374 }
375 }
376 ierr = VecRestoreArray(DyPLocal, &parray);
377
378 firstpetsccall = 0;
379
380 // .... output the statistics
381 //
382 itrpar.iKs=0; // see rstat()
383 PetscInt its;
384 ierr = KSPGetIterationNumber(ksp, &its);
385 itrpar.iKs = (int) its;
386 /*
387 PetscReal scale=1.0/sqrt(1.0*nshgt);
388 if(workfc.myrank ==0) {
389 printf("node resNrmP rosqrtNshgt\n");
390 for ( node = 0; node<its; node++) {
391 printf(" %d %f %f \n",node,resHist[node],scale*resHist[node]);
392 }
393 }
394 */
395 get_time((duration+2),(duration+3));
396 get_max_time_diff((duration), (duration+2),
397 (duration+1), (duration+3),
398 "solWork \0"); // char(0))
399 itrpar.ntotGM += (int) its;
400 rstatp (&resNrm,&resNrmP);
401 //
402 // .... end
403 //
404 }
405
SolGMRpSclr(double * y,double * ac,double * x,double * elDw,int * iBC,double * BC,int * col,int * row,int * iper,int * ilwork,double * shp,double * shgl,double * shpb,double * shglb,double * res,double * Dy,long long int * fncorp)406 void SolGMRpSclr(double* y, double* ac,
407 double* x, double* elDw, int* iBC, double* BC,
408 int* col, int* row,
409 int* iper,
410 int* ilwork, double* shp, double* shgl, double* shpb,
411 double* shglb, double* res, double* Dy, long long int* fncorp)
412 {
413 //
414 // ----------------------------------------------------------------------
415 //
416 // This is the preconditioned GMRES driver routine.
417 //
418 // input:
419 // y (nshg,ndof) : Y-variables at n+alpha_v
420 // ac (nshg,ndof) : Primvar. accel. variable n+alpha_m
421 // yold (nshg,ndof) : Y-variables at beginning of step
422 // acold (nshg,ndof) : Primvar. accel. variable at begng step
423 // x (numnp,nsd) : node coordinates
424 // iBC (nshg) : BC codes
425 // BC (nshg,ndofBC) : BC constraint parameters
426 // shp(b) (nen,maxsh,melCat) : element shape functions (boundary)
427 // shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
428 //
429 // ----------------------------------------------------------------------
430 //
431
432
433 // Get variables from common_c.h
434 int nshg, nflow, nsd, iownnodes;
435 nshg = conpar.nshg;
436 nsd = NSD;
437 iownnodes = conpar.iownnodes;
438
439 gcorp_t nshgt;
440 gcorp_t mbeg;
441 gcorp_t mend;
442 nshgt = (gcorp_t) newdim.nshgt; //Fix nshgt in common_c.h
443 mbeg = (gcorp_t) newdim.minowned;
444 mend = (gcorp_t) newdim.maxowned;
445
446
447 int node, element, var, eqn;
448 double valtoinsert;
449 int nenl, iel, lelCat, lcsyst, iorder, nshl;
450 int mattyp, ndofl, nsymdl, npro, ngauss, nppro;
451 double DyFlats[nshg];
452 double DyFlatPhastas[nshg];
453 double rmes[nshg];
454 // DEBUG
455 int i,j,k,l,m;
456
457 double real_rtol, real_abstol, real_dtol;
458 double *parray;
459 PetscInt petsc_bs, petsc_m, petsc_M,petsc_PA;
460 PetscInt petsc_n;
461 PetscOne = 1;
462 uint64_t duration[4];
463
464
465 //
466 //
467 // .... *******************>> Element Data Formation <<******************
468 //
469 //
470 // .... set the parameters for flux and surface tension calculations
471 //
472 //
473 genpar.idflx = 0 ;
474 if(genpar.idiff >= 1) genpar.idflx= genpar.idflx + (nflow-1) * nsd;
475 if (genpar.isurf == 1) genpar.idflx=genpar.idflx + nsd;
476
477
478
479 gcorp_t glbNZ;
480
481 if(firstpetsccalls == 1) {
482 PetscInt* idiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
483 PetscInt* iodiagnz= (PetscInt*) malloc(sizeof(PetscInt)*iownnodes);
484 for(i=0;i<iownnodes;i++) {
485 idiagnz[i]=0;
486 iodiagnz[i]=0;
487 }
488 i=0;
489 for(k=0;k<nshg;k++) {
490 if((fncorp[k] < mbeg) || (fncorp[k] >mend)){
491 // this node is not owned by this rank so we skip
492 } else {
493 for(j=col[i]-1;j<col[i+1]-1;j++) {
494 glbNZ=fncorp[row[j]-1];
495 if((glbNZ < mbeg) || (glbNZ > mend)) {
496 iodiagnz[i]++;
497 } else {
498 idiagnz[i]++;
499 }
500 }
501 i++;
502 }
503 }
504 gcorp_t mind=1000;
505 gcorp_t mino=1000;
506 gcorp_t maxd=0;
507 gcorp_t maxo=0;
508 for(i=0;i<iownnodes;i++) {
509 mind=min(mind,idiagnz[i]);
510 mino=min(mino,iodiagnz[i]);
511 maxd=max(maxd,idiagnz[i]);
512 maxo=max(maxo,iodiagnz[i]);
513 }
514
515 for(i=0;i<iownnodes;i++) {
516 iodiagnz[i]=1.3*maxd;
517 idiagnz[i]=1.3*maxd;
518 }
519
520
521
522 // }
523 // if(firstpetsccalls == 1) {
524
525 petsc_m = (PetscInt) iownnodes;
526 petsc_M = (PetscInt) nshgt;
527 petsc_PA = (PetscInt) 40;
528
529 ierr = MatCreateAIJ(PETSC_COMM_WORLD, petsc_m, petsc_m, petsc_M, petsc_M,
530 0, idiagnz, 0, iodiagnz, &lhsPs);
531
532 ierr = MatSetOption(lhsPs, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
533
534 // Next is Jed Brown's improvement to imprint Assembly to make that stage scalable after the first call
535 #ifdef HIDEJEDBROWN
536 ierr = MatSetOption(lhsPs, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE);
537 #endif
538 ierr = MatSetUp(lhsPs);
539
540 PetscInt myMatStart, myMatEnd;
541 ierr = MatGetOwnershipRange(lhsPs, &myMatStart, &myMatEnd);
542 if(workfc.myrank ==rankdump) printf("Sclr myrank,myMatStart,myMatEnd %d,%d,%d, \n", workfc.myrank,myMatStart,myMatEnd);
543 }
544 // MPI_Barrier(MPI_COMM_WORLD);
545 // if(workfc.myrank ==0) printf("Before MatZeroEntries \n");
546 if(genpar.lhs == 1) ierr = MatZeroEntries(lhsPs);
547
548 get_time(duration, (duration+1));
549
550 // MPI_Barrier(MPI_COMM_WORLD);
551 // if(workfc.myrank ==0) printf("Before elmgmr \n");
552 // for (i=0; i<nshg ; i++) {
553 // assert(fncorp[i]>0);
554 // }
555
556 ElmGMRPETScSclr(y, ac, x, elDw,
557 shp, shgl, iBC,
558 BC, shpb,
559 shglb, res,
560 rmes, iper,
561 ilwork, &lhsPs);
562 if(firstpetsccalls == 1) {
563 // Setup IndexSet. For now, we mimic vector insertion procedure
564 // Since we always reference by global indexes this doesn't matter
565 // except for cache performance)
566 // TODO: Better arrangment?
567
568 PetscInt* indexsetarys = malloc(sizeof(PetscInt)*nshg);
569 PetscInt nodetoinsert;
570
571 nodetoinsert = 0;
572 k=0;
573
574 //debug
575 if(workfc.myrank == rankdump) {
576 printf("myrank,i,iownnodes,nshg %d %d %d %d \n",workfc.myrank,i,iownnodes,nshg);
577 printf("myrank,mbeg,mend %d %d %d \n",workfc.myrank,mbeg,mend);
578 }
579
580 if(workfc.numpe > 1) {
581 for (i=0; i<nshg ; i++) {
582 nodetoinsert = fncorp[i]-1;
583 //debug
584 if(workfc.myrank == rankdump) {
585 printf("myrank,i,nodetoinsert %d %d %d \n",workfc.myrank,i,nodetoinsert);
586 }
587
588 indexsetarys[k] = nodetoinsert;
589 k = k+1;
590 }
591 }
592 else {
593 for (i=0; i<nshg ; i++) {
594 nodetoinsert = i;
595 indexsetarys[k] = nodetoinsert;
596 k = k+1;
597 }
598 }
599
600 // Create Vector Index Maps
601 petsc_n = (PetscInt) nshg;
602 ierr = ISCreateGeneral(PETSC_COMM_SELF, petsc_n, indexsetarys,
603 PETSC_COPY_VALUES, &LocalIndexSets);
604 free(indexsetarys);
605 }
606 if(genpar.lhs ==1) {
607 ierr = MatAssemblyBegin(lhsPs, MAT_FINAL_ASSEMBLY);
608 ierr = MatAssemblyEnd(lhsPs, MAT_FINAL_ASSEMBLY);
609 }
610 if(firstpetsccalls == 1) {
611 ierr = MatGetLocalSize(lhsPs, &LocalRow, &LocalCol);
612 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &resPs);
613 ierr = VecCreateMPI(PETSC_COMM_WORLD, LocalRow, petsc_M, &DyPs);
614 }
615 ierr = VecZeroEntries(resPs);
616 if(firstpetsccalls == 1) {
617 ierr = VecCreateSeq(PETSC_COMM_SELF, petsc_n, &DyPLocals);
618 }
619
620 PetscRow=0;
621 k = 0;
622 int index;
623 for (i=0; i<nshg; i++ ){
624 valtoinsert = res[i];
625 if(workfc.numpe > 1) {
626 PetscRow = (fncorp[i]-1);
627 }
628 else {
629 PetscRow = i;
630 }
631 assert(fncorp[i]<=nshgt);
632 assert(fncorp[i]>0);
633 assert(PetscRow>=0);
634 assert(PetscRow<=nshgt);
635 ierr = VecSetValue(resPs, PetscRow, valtoinsert, ADD_VALUES);
636 }
637 ierr = VecAssemblyBegin(resPs);
638 ierr = VecAssemblyEnd(resPs);
639 ierr = VecNorm(resPs,NORM_2,&resNrm);
640
641 if(firstpetsccalls == 1) {
642 ierr = KSPCreate(PETSC_COMM_WORLD, &ksps);
643 ierr = KSPSetOperators(ksps, lhsPs, lhsPs);
644 ierr = KSPGetPC(ksps, &pcs);
645 ierr = PCSetType(pcs, PCPBJACOBI);
646 PetscInt maxits;
647 maxits = (PetscInt) solpar.nGMRES * (PetscInt) solpar.Kspace;
648 ierr = KSPSetTolerances(ksps, timdat.etol, PETSC_DEFAULT, PETSC_DEFAULT, maxits);
649 ierr = KSPSetFromOptions(ksps);
650 }
651 ierr = KSPSolve(ksps, resPs, DyPs);
652 if(firstpetsccalls == 1) {
653 ierr = VecScatterCreate(DyPs, LocalIndexSets, DyPLocals, NULL, &scatter7s);
654 }
655 ierr = VecScatterBegin(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
656 ierr = VecScatterEnd(scatter7s, DyPs, DyPLocals, INSERT_VALUES, SCATTER_FORWARD);
657 ierr = VecGetArray(DyPLocals, &parray);
658 PetscRow = 0;
659 for ( node = 0; node< nshg; node++) {
660 index = node;
661 Dy[index] = parray[PetscRow];
662 PetscRow = PetscRow+1;
663 }
664 ierr = VecRestoreArray(DyPLocals, &parray);
665
666 firstpetsccalls = 0;
667
668 // .... output the statistics
669 //
670 // itrpar.iKss=0; // see rstat()
671 PetscInt its;
672 ierr = KSPGetIterationNumber(ksps, &its);
673 itrpar.iKss = (int) its;
674 itrpar.ntotGMs += (int) its;
675 rstatpSclr (&resNrm);
676 //
677 // .... end
678 //
679 }
680 #endif
681