xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     if (size%r_nnodes) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo5(pc,"Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     if (nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo3(pc,"Manually setting reduction to %d active processes (%d/%D)\n",new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo1(pc,"Force coarsest grid reduction to %d active processes\n",new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo2(pc,"Coarse grid reduction from %d to %d active processes\n",nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo1(pc,"reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     if (ncrs_eq % cr_bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo2(pc,"Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move columns of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo4(pc,"Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       if (new_size==nactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo1(pc,"Number of equations (loc) %D with simple aggregation\n",ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat mat;
403       ierr        = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
404       *a_Amat_crs = mat;
405     }
406     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
407 
408     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
409     /* prolongator */
410     {
411       IS       findices;
412       PetscInt Istart,Iend;
413       Mat      Pnew;
414 
415       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
416       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
417       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
418       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
419       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
420       ierr = ISDestroy(&findices);CHKERRQ(ierr);
421       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
422 
423       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
424       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
425 
426       /* output - repartitioned */
427       *a_P_inout = Pnew;
428     }
429     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
430 
431     *a_nactive_proc = new_size; /* output */
432 
433     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
434     if (pc_gamg->cpu_pin_coarse_grids) {
435 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
436       static PetscInt llev = 2;
437       ierr = PetscInfo1(pc,"Pinning level %D to the CPU\n",llev++);CHKERRQ(ierr);
438 #endif
439       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
440       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
441       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
442         Mat         A = *a_Amat_crs, P = *a_P_inout;
443         PetscMPIInt size;
444         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
445         if (size > 1) {
446           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
447           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
448           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
449         }
450       }
451     }
452     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
458 {
459   PetscErrorCode ierr;
460   const char     *prefix;
461   char           addp[32];
462   PC_MG          *mg      = (PC_MG*)a_pc->data;
463   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
464 
465   PetscFunctionBegin;
466   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
467   ierr = PetscInfo1(a_pc,"Square Graph on level %D\n",pc_gamg->current_level+1);CHKERRQ(ierr);
468   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
469   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
470   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
471   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
472   if ((*Gmat2)->structurally_symmetric) {
473     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
474   } else {
475     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
476     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
477   }
478   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
479   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
480   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
481   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
482   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
483   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
484   (*Gmat2)->assembled = PETSC_TRUE;
485   PetscFunctionReturn(0);
486 }
487 
488 /* -------------------------------------------------------------------------- */
489 /*
490    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
491                     by setting data structures and options.
492 
493    Input Parameter:
494 .  pc - the preconditioner context
495 
496 */
497 PetscErrorCode PCSetUp_GAMG(PC pc)
498 {
499   PetscErrorCode ierr;
500   PC_MG          *mg      = (PC_MG*)pc->data;
501   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
502   Mat            Pmat     = pc->pmat;
503   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
504   MPI_Comm       comm;
505   PetscMPIInt    rank,size,nactivepe;
506   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
507   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
508   PetscLogDouble nnz0=0.,nnztot=0.;
509   MatInfo        info;
510   PetscBool      is_last = PETSC_FALSE;
511 
512   PetscFunctionBegin;
513   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
514   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
515   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
516 
517   if (pc->setupcalled) {
518     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
519       /* reset everything */
520       ierr = PCReset_MG(pc);CHKERRQ(ierr);
521       pc->setupcalled = 0;
522     } else {
523       PC_MG_Levels **mglevels = mg->levels;
524       /* just do Galerkin grids */
525       Mat          B,dA,dB;
526 
527       if (pc_gamg->Nlevels > 1) {
528         PetscInt gl;
529         /* currently only handle case where mat and pmat are the same on coarser levels */
530         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
531         /* (re)set to get dirty flag */
532         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
533 
534         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
535           MatReuse reuse = MAT_INITIAL_MATRIX ;
536 
537           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
538           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
539           if (B->product) {
540             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
541               reuse = MAT_REUSE_MATRIX;
542             }
543           }
544           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
545           if (reuse == MAT_REUSE_MATRIX) {
546             ierr = PetscInfo1(pc,"RAP after first solve, reuse matrix level %D\n",level);CHKERRQ(ierr);
547           } else {
548             ierr = PetscInfo1(pc,"RAP after first solve, new matrix level %D\n",level);CHKERRQ(ierr);
549           }
550           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
551           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
552           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
553           if (reuse == MAT_INITIAL_MATRIX) mglevels[level]->A = B;
554           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
555           dB   = B;
556         }
557       }
558 
559       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
560       PetscFunctionReturn(0);
561     }
562   }
563 
564   if (!pc_gamg->data) {
565     if (pc_gamg->orig_data) {
566       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
567       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
568 
569       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
570       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
571       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
572 
573       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
574       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
575     } else {
576       if (!pc_gamg->ops->createdefaultdata) SETERRQ(comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
577       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
578     }
579   }
580 
581   /* cache original data for reuse */
582   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
583     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
584     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
585     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
586     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
587   }
588 
589   /* get basic dims */
590   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
591   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
592 
593   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
594   nnz0   = info.nz_used;
595   nnztot = info.nz_used;
596   ierr = PetscInfo6(pc,"level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
597 
598   /* Get A_i and R_i */
599   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
600     pc_gamg->current_level = level;
601     if (level >= PETSC_MG_MAXLEVELS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
602     level1 = level + 1;
603     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
604 #if defined(GAMG_STAGES)
605     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
606 #endif
607     { /* construct prolongator */
608       Mat              Gmat;
609       PetscCoarsenData *agg_lists;
610       Mat              Prol11;
611 
612       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
613       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
614       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
615 
616       /* could have failed to create new level */
617       if (Prol11) {
618         const char *prefix;
619         char       addp[32];
620 
621         /* get new block size of coarse matrices */
622         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
623 
624         if (pc_gamg->ops->optprolongator) {
625           /* smooth */
626           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
627         }
628 
629         if (pc_gamg->use_aggs_in_asm) {
630           PetscInt bs;
631           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
632           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
633         }
634 
635         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
636         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
637         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
638         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
639         /* Always generate the transpose with CUDA
640            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
641         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
642         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
643         Parr[level1] = Prol11;
644       } else Parr[level1] = NULL; /* failed to coarsen */
645 
646       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
647       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
648     } /* construct prolongator scope */
649     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
650     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
651     if (!Parr[level1]) { /* failed to coarsen */
652       ierr = PetscInfo1(pc,"Stop gridding, level %D\n",level);CHKERRQ(ierr);
653 #if defined(GAMG_STAGES)
654       ierr = PetscLogStagePop();CHKERRQ(ierr);
655 #endif
656       break;
657     }
658     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
659     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
660     if (is_last) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
661     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
662     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
663     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
664 
665     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
666     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
667     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
668     nnztot += info.nz_used;
669     ierr = PetscInfo5(pc,"%D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
670 
671 #if defined(GAMG_STAGES)
672     ierr = PetscLogStagePop();CHKERRQ(ierr);
673 #endif
674     /* stop if one node or one proc -- could pull back for singular problems */
675     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
676       ierr =  PetscInfo2(pc,"HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",level,M/bs);CHKERRQ(ierr);
677       level++;
678       break;
679     }
680   } /* levels */
681   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
682 
683   ierr = PetscInfo2(pc,"%D levels, grid complexity = %g\n",level+1,nnztot/nnz0);CHKERRQ(ierr);
684   pc_gamg->Nlevels = level + 1;
685   fine_level       = level;
686   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
687 
688   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
689     PetscErrorCode (*savesetfromoptions[PETSC_MG_MAXLEVELS])(PetscOptionItems*,KSP);
690 
691     /* set default smoothers & set operators */
692     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
693       KSP smoother;
694       PC  subpc;
695 
696       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
697       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
698 
699       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
700       /* set ops */
701       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
702       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
703 
704       /* set defaults */
705       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
706 
707       /* set blocks for ASM smoother that uses the 'aggregates' */
708       if (pc_gamg->use_aggs_in_asm) {
709         PetscInt sz;
710         IS       *iss;
711 
712         sz   = nASMBlocksArr[level];
713         iss  = ASMLocalIDsArr[level];
714         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
715         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
716         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
717         if (!sz) {
718           IS       is;
719           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
720           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
721           ierr = ISDestroy(&is);CHKERRQ(ierr);
722         } else {
723           PetscInt kk;
724           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
725           for (kk=0; kk<sz; kk++) {
726             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
727           }
728           ierr = PetscFree(iss);CHKERRQ(ierr);
729         }
730         ASMLocalIDsArr[level] = NULL;
731         nASMBlocksArr[level]  = 0;
732       } else {
733         ierr = PCSetType(subpc, PCSOR);CHKERRQ(ierr);
734       }
735     }
736     {
737       /* coarse grid */
738       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
739       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
740 
741       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
742       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
743       if (!pc_gamg->use_parallel_coarse_grid_solver) {
744         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
745         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
746         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
747         ierr = PCSetUp(subpc);CHKERRQ(ierr);
748         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
749         if (ii != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
750         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
751         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
752         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
753         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
754         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
755       }
756     }
757 
758     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
759     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
760     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
761     ierr = PetscOptionsEnd();CHKERRQ(ierr);
762     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
763 
764     /* setup cheby eigen estimates from SA */
765     if (pc_gamg->use_sa_esteig==1) {
766       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
767         KSP       smoother;
768         PetscBool ischeb;
769 
770         savesetfromoptions[level] = NULL;
771         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
772         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
773         if (ischeb) {
774           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
775 
776           ierr = KSPSetFromOptions(smoother);CHKERRQ(ierr); /* let command line emax override using SA's eigenvalues */
777           if (mg->max_eigen_DinvA[level] > 0 && cheb->emax == 0.) {
778             PC        subpc;
779             PetscBool isjac;
780             ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
781             ierr = PetscObjectTypeCompare((PetscObject)subpc,PCJACOBI,&isjac);CHKERRQ(ierr);
782             if (isjac && pc_gamg->use_sa_esteig==1) {
783               PetscReal emax,emin;
784 
785               emin = mg->min_eigen_DinvA[level];
786               emax = mg->max_eigen_DinvA[level];
787               ierr = PetscInfo4(pc,"PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
788               cheb->emin_computed = emin;
789               cheb->emax_computed = emax;
790               ierr = KSPChebyshevSetEigenvalues(smoother, cheb->tform[2]*emin + cheb->tform[3]*emax, cheb->tform[0]*emin + cheb->tform[1]*emax);CHKERRQ(ierr);
791 
792               /* We have set the eigenvalues and consumed the transformation values
793                  prevent from flagging the recomputation of the eigenvalues again in PCSetUp_MG
794                  below when setfromoptions will be called again */
795               savesetfromoptions[level] = smoother->ops->setfromoptions;
796               smoother->ops->setfromoptions = NULL;
797             }
798           }
799         }
800       }
801     }
802 
803     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
804 
805     /* restore Chebyshev smoother for next calls */
806     if (pc_gamg->use_sa_esteig==1) {
807       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
808         if (savesetfromoptions[level]) {
809           KSP smoother;
810           ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
811           smoother->ops->setfromoptions = savesetfromoptions[level];
812         }
813       }
814     }
815 
816     /* clean up */
817     for (level=1; level<pc_gamg->Nlevels; level++) {
818       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
819       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
820     }
821   } else {
822     KSP smoother;
823 
824     ierr = PetscInfo(pc,"One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
825     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
826     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
827     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
828     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
829   }
830   PetscFunctionReturn(0);
831 }
832 
833 /* ------------------------------------------------------------------------- */
834 /*
835  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
836    that was created with PCCreate_GAMG().
837 
838    Input Parameter:
839 .  pc - the preconditioner context
840 
841    Application Interface Routine: PCDestroy()
842 */
843 PetscErrorCode PCDestroy_GAMG(PC pc)
844 {
845   PetscErrorCode ierr;
846   PC_MG          *mg     = (PC_MG*)pc->data;
847   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
848 
849   PetscFunctionBegin;
850   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
851   if (pc_gamg->ops->destroy) {
852     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
853   }
854   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
855   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
856   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
857   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 /*@
862    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
863 
864    Logically Collective on PC
865 
866    Input Parameters:
867 +  pc - the preconditioner context
868 -  n - the number of equations
869 
870    Options Database Key:
871 .  -pc_gamg_process_eq_limit <limit>
872 
873    Notes:
874     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
875           that has degrees of freedom
876 
877    Level: intermediate
878 
879 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
880 @*/
881 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
882 {
883   PetscErrorCode ierr;
884 
885   PetscFunctionBegin;
886   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
887   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
892 {
893   PC_MG   *mg      = (PC_MG*)pc->data;
894   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
895 
896   PetscFunctionBegin;
897   if (n>0) pc_gamg->min_eq_proc = n;
898   PetscFunctionReturn(0);
899 }
900 
901 /*@
902    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
903 
904  Collective on PC
905 
906    Input Parameters:
907 +  pc - the preconditioner context
908 -  n - maximum number of equations to aim for
909 
910    Options Database Key:
911 .  -pc_gamg_coarse_eq_limit <limit>
912 
913    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
914      has less than 1000 unknowns.
915 
916    Level: intermediate
917 
918 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
919 @*/
920 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
921 {
922   PetscErrorCode ierr;
923 
924   PetscFunctionBegin;
925   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
926   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
927   PetscFunctionReturn(0);
928 }
929 
930 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
931 {
932   PC_MG   *mg      = (PC_MG*)pc->data;
933   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
934 
935   PetscFunctionBegin;
936   if (n>0) pc_gamg->coarse_eq_limit = n;
937   PetscFunctionReturn(0);
938 }
939 
940 /*@
941    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
942 
943    Collective on PC
944 
945    Input Parameters:
946 +  pc - the preconditioner context
947 -  n - PETSC_TRUE or PETSC_FALSE
948 
949    Options Database Key:
950 .  -pc_gamg_repartition <true,false>
951 
952    Notes:
953     this will generally improve the loading balancing of the work on each level
954 
955    Level: intermediate
956 
957 .seealso: ()
958 @*/
959 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
960 {
961   PetscErrorCode ierr;
962 
963   PetscFunctionBegin;
964   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
965   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
970 {
971   PC_MG   *mg      = (PC_MG*)pc->data;
972   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
973 
974   PetscFunctionBegin;
975   pc_gamg->repart = n;
976   PetscFunctionReturn(0);
977 }
978 
979 /*@
980    PCGAMGSetEstEigKSPMaxIt - Set number of KSP iterations in eigen estimator (for Cheby)
981 
982    Collective on PC
983 
984    Input Parameters:
985 +  pc - the preconditioner context
986 -  n - number of its
987 
988    Options Database Key:
989 .  -pc_gamg_esteig_ksp_max_it <its>
990 
991    Notes:
992 
993    Level: intermediate
994 
995 .seealso: ()
996 @*/
997 PetscErrorCode PCGAMGSetEstEigKSPMaxIt(PC pc, PetscInt n)
998 {
999   PetscErrorCode ierr;
1000 
1001   PetscFunctionBegin;
1002   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1003   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPMaxIt_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 static PetscErrorCode PCGAMGSetEstEigKSPMaxIt_GAMG(PC pc, PetscInt n)
1008 {
1009   PC_MG   *mg      = (PC_MG*)pc->data;
1010   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1011 
1012   PetscFunctionBegin;
1013   pc_gamg->esteig_max_it = n;
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 /*@
1018    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Cheby smoother
1019 
1020    Collective on PC
1021 
1022    Input Parameters:
1023 +  pc - the preconditioner context
1024 -  n - number of its
1025 
1026    Options Database Key:
1027 .  -pc_gamg_use_sa_esteig <true,false>
1028 
1029    Notes:
1030 
1031    Level: intermediate
1032 
1033 .seealso: ()
1034 @*/
1035 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
1036 {
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1041   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1046 {
1047   PC_MG   *mg      = (PC_MG*)pc->data;
1048   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1049 
1050   PetscFunctionBegin;
1051   pc_gamg->use_sa_esteig = n ? 1 : 0;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 /*@C
1056    PCGAMGSetEstEigKSPType - Set type of KSP in eigen estimator (for Cheby)
1057 
1058    Collective on PC
1059 
1060    Input Parameters:
1061 +  pc - the preconditioner context
1062 -  t - ksp type
1063 
1064    Options Database Key:
1065 .  -pc_gamg_esteig_ksp_type <type>
1066 
1067    Notes:
1068 
1069    Level: intermediate
1070 
1071 .seealso: ()
1072 @*/
1073 PetscErrorCode PCGAMGSetEstEigKSPType(PC pc, char t[])
1074 {
1075   PetscErrorCode ierr;
1076 
1077   PetscFunctionBegin;
1078   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1079   ierr = PetscTryMethod(pc,"PCGAMGSetEstEigKSPType_C",(PC,char[]),(pc,t));CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode PCGAMGSetEstEigKSPType_GAMG(PC pc, char t[])
1084 {
1085   PetscErrorCode ierr;
1086   PC_MG   *mg      = (PC_MG*)pc->data;
1087   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1088 
1089   PetscFunctionBegin;
1090   ierr = PetscStrcpy(pc_gamg->esteig_type,t);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*@
1095    PCGAMGSetEigenvalues - Set eigenvalues
1096 
1097    Collective on PC
1098 
1099    Input Parameters:
1100 +  pc - the preconditioner context
1101 -  emax - max eigenvalue
1102 .  emin - min eigenvalue
1103 
1104    Options Database Key:
1105 .  -pc_gamg_eigenvalues
1106 
1107    Level: intermediate
1108 
1109 .seealso: PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPType()
1110 @*/
1111 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1112 {
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1117   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1122 {
1123   PC_MG          *mg      = (PC_MG*)pc->data;
1124   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1125 
1126   PetscFunctionBegin;
1127   if (emax <= emin) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1128   if (emax*emin <= 0.0) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1129   pc_gamg->emax = emax;
1130   pc_gamg->emin = emin;
1131 
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /*@
1136    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1137 
1138    Collective on PC
1139 
1140    Input Parameters:
1141 +  pc - the preconditioner context
1142 -  n - PETSC_TRUE or PETSC_FALSE
1143 
1144    Options Database Key:
1145 .  -pc_gamg_reuse_interpolation <true,false>
1146 
1147    Level: intermediate
1148 
1149    Notes:
1150     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1151           rebuilding the preconditioner quicker.
1152 
1153 .seealso: ()
1154 @*/
1155 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1156 {
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1161   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1166 {
1167   PC_MG   *mg      = (PC_MG*)pc->data;
1168   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1169 
1170   PetscFunctionBegin;
1171   pc_gamg->reuse_prol = n;
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 /*@
1176    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1177 
1178    Collective on PC
1179 
1180    Input Parameters:
1181 +  pc - the preconditioner context
1182 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1183 
1184    Options Database Key:
1185 .  -pc_gamg_asm_use_agg
1186 
1187    Level: intermediate
1188 
1189 .seealso: ()
1190 @*/
1191 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1192 {
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1197   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1202 {
1203   PC_MG   *mg      = (PC_MG*)pc->data;
1204   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1205 
1206   PetscFunctionBegin;
1207   pc_gamg->use_aggs_in_asm = flg;
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /*@
1212    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1213 
1214    Collective on PC
1215 
1216    Input Parameters:
1217 +  pc - the preconditioner context
1218 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1219 
1220    Options Database Key:
1221 .  -pc_gamg_use_parallel_coarse_grid_solver
1222 
1223    Level: intermediate
1224 
1225 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1226 @*/
1227 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1228 {
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1233   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1238 {
1239   PC_MG   *mg      = (PC_MG*)pc->data;
1240   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1241 
1242   PetscFunctionBegin;
1243   pc_gamg->use_parallel_coarse_grid_solver = flg;
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 /*@
1248    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1249 
1250    Collective on PC
1251 
1252    Input Parameters:
1253 +  pc - the preconditioner context
1254 -  flg - PETSC_TRUE to pin coarse grids to CPU
1255 
1256    Options Database Key:
1257 .  -pc_gamg_cpu_pin_coarse_grids
1258 
1259    Level: intermediate
1260 
1261 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1262 @*/
1263 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1264 {
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1269   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1274 {
1275   PC_MG   *mg      = (PC_MG*)pc->data;
1276   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1277 
1278   PetscFunctionBegin;
1279   pc_gamg->cpu_pin_coarse_grids = flg;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /*@
1284    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1285 
1286    Collective on PC
1287 
1288    Input Parameters:
1289 +  pc - the preconditioner context
1290 -  flg - Layout type
1291 
1292    Options Database Key:
1293 .  -pc_gamg_coarse_grid_layout_type
1294 
1295    Level: intermediate
1296 
1297 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1298 @*/
1299 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1300 {
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1305   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1310 {
1311   PC_MG   *mg      = (PC_MG*)pc->data;
1312   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1313 
1314   PetscFunctionBegin;
1315   pc_gamg->layout_type = flg;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*@
1320    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1321 
1322    Not collective on PC
1323 
1324    Input Parameters:
1325 +  pc - the preconditioner
1326 -  n - the maximum number of levels to use
1327 
1328    Options Database Key:
1329 .  -pc_mg_levels
1330 
1331    Level: intermediate
1332 
1333 .seealso: ()
1334 @*/
1335 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1336 {
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1341   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1346 {
1347   PC_MG   *mg      = (PC_MG*)pc->data;
1348   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1349 
1350   PetscFunctionBegin;
1351   pc_gamg->Nlevels = n;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 /*@
1356    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1357 
1358    Not collective on PC
1359 
1360    Input Parameters:
1361 +  pc - the preconditioner context
1362 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1363 -  n - number of threshold values provided in array
1364 
1365    Options Database Key:
1366 .  -pc_gamg_threshold <threshold>
1367 
1368    Notes:
1369     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1370     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1371 
1372     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1373     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1374     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1375 
1376    Level: intermediate
1377 
1378 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1379 @*/
1380 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1381 {
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1386   if (n) PetscValidRealPointer(v,2);
1387   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1392 {
1393   PC_MG   *mg      = (PC_MG*)pc->data;
1394   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1395   PetscInt i;
1396   PetscFunctionBegin;
1397   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1398   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 /*@
1403    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1404 
1405    Collective on PC
1406 
1407    Input Parameters:
1408 +  pc - the preconditioner context
1409 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1410 -  n - number of values provided in array
1411 
1412    Options Database Key:
1413 .  -pc_gamg_rank_reduction_factors <factors>
1414 
1415    Level: intermediate
1416 
1417 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1418 @*/
1419 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1420 {
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1425   if (n) PetscValidIntPointer(v,2);
1426   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1431 {
1432   PC_MG   *mg      = (PC_MG*)pc->data;
1433   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1434   PetscInt i;
1435   PetscFunctionBegin;
1436   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1437   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*@
1442    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1443 
1444    Not collective on PC
1445 
1446    Input Parameters:
1447 +  pc - the preconditioner context
1448 -  scale - the threshold value reduction, ussually < 1.0
1449 
1450    Options Database Key:
1451 .  -pc_gamg_threshold_scale <v>
1452 
1453    Notes:
1454    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1455    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1456 
1457    Level: advanced
1458 
1459 .seealso: PCGAMGSetThreshold()
1460 @*/
1461 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1462 {
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1467   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1472 {
1473   PC_MG   *mg      = (PC_MG*)pc->data;
1474   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1475   PetscFunctionBegin;
1476   pc_gamg->threshold_scale = v;
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 /*@C
1481    PCGAMGSetType - Set solution method
1482 
1483    Collective on PC
1484 
1485    Input Parameters:
1486 +  pc - the preconditioner context
1487 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1488 
1489    Options Database Key:
1490 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1491 
1492    Level: intermediate
1493 
1494 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1495 @*/
1496 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1497 {
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1502   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 /*@C
1507    PCGAMGGetType - Get solution method
1508 
1509    Collective on PC
1510 
1511    Input Parameter:
1512 .  pc - the preconditioner context
1513 
1514    Output Parameter:
1515 .  type - the type of algorithm used
1516 
1517    Level: intermediate
1518 
1519 .seealso: PCGAMGSetType(), PCGAMGType
1520 @*/
1521 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1522 {
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1527   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1532 {
1533   PC_MG          *mg      = (PC_MG*)pc->data;
1534   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1535 
1536   PetscFunctionBegin;
1537   *type = pc_gamg->type;
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1542 {
1543   PetscErrorCode ierr,(*r)(PC);
1544   PC_MG          *mg      = (PC_MG*)pc->data;
1545   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1546 
1547   PetscFunctionBegin;
1548   pc_gamg->type = type;
1549   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1550   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1551   if (pc_gamg->ops->destroy) {
1552     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1553     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1554     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1555     /* cleaning up common data in pc_gamg - this should disapear someday */
1556     pc_gamg->data_cell_cols = 0;
1557     pc_gamg->data_cell_rows = 0;
1558     pc_gamg->orig_data_cell_cols = 0;
1559     pc_gamg->orig_data_cell_rows = 0;
1560     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1561     pc_gamg->data_sz = 0;
1562   }
1563   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1564   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1565   ierr = (*r)(pc);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 /* -------------------------------------------------------------------------- */
1570 /*
1571    PCMGGetGridComplexity - compute coarse grid complexity of MG hierarchy
1572 
1573    Input Parameter:
1574 .  pc - the preconditioner context
1575 
1576    Output Parameter:
1577 .  gc - grid complexity = sum_i(nnz_i) / nnz_0
1578 
1579    Level: advanced
1580 */
1581 static PetscErrorCode PCMGGetGridComplexity(PC pc, PetscReal *gc)
1582 {
1583   PetscErrorCode ierr;
1584   PC_MG          *mg      = (PC_MG*)pc->data;
1585   PC_MG_Levels   **mglevels = mg->levels;
1586   PetscInt       lev;
1587   PetscLogDouble nnz0 = 0, sgc = 0;
1588   MatInfo        info;
1589 
1590   PetscFunctionBegin;
1591   if (!pc->setupcalled) {
1592     *gc = 0;
1593     PetscFunctionReturn(0);
1594   }
1595   if (!mg->nlevels) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MG has no levels");
1596   for (lev=0; lev<mg->nlevels; lev++) {
1597     Mat dB;
1598     ierr = KSPGetOperators(mglevels[lev]->smoothd,NULL,&dB);CHKERRQ(ierr);
1599     ierr = MatGetInfo(dB,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
1600     sgc += info.nz_used;
1601     if (lev==mg->nlevels-1) nnz0 = info.nz_used;
1602   }
1603   if (nnz0 > 0) *gc = (PetscReal)(sgc/nnz0);
1604   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Number for grid points on finest level is not available");
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1609 {
1610   PetscErrorCode ierr,i;
1611   PC_MG          *mg      = (PC_MG*)pc->data;
1612   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1613   PetscReal       gc=0;
1614   PetscFunctionBegin;
1615   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1616   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1617   for (i=0;i<mg->nlevels; i++) {
1618     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1619   }
1620   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1621   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1622   if (pc_gamg->use_aggs_in_asm) {
1623     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1624   }
1625   if (pc_gamg->use_parallel_coarse_grid_solver) {
1626     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1627   }
1628 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1629   if (pc_gamg->cpu_pin_coarse_grids) {
1630     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1631   }
1632 #endif
1633   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1634   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1635   /* } else { */
1636   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1637   /* } */
1638   if (pc_gamg->ops->view) {
1639     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1640   }
1641   ierr = PCMGGetGridComplexity(pc,&gc);CHKERRQ(ierr);
1642   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g\n",gc);CHKERRQ(ierr);
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1647 {
1648   PetscErrorCode ierr;
1649   PC_MG          *mg      = (PC_MG*)pc->data;
1650   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1651   PetscBool      flag,f2;
1652   MPI_Comm       comm;
1653   char           prefix[256],tname[32];
1654   PetscInt       i,n;
1655   const char     *pcpre;
1656   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1657   PetscFunctionBegin;
1658   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1659   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1660   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1661   if (flag) {
1662     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1663   }
1664   ierr = PetscOptionsFList("-pc_gamg_esteig_ksp_type","Krylov method for eigen estimator","PCGAMGSetEstEigKSPType",KSPList,pc_gamg->esteig_type,tname,sizeof(tname),&flag);CHKERRQ(ierr);
1665   if (flag) {
1666     ierr = PCGAMGSetEstEigKSPType(pc,tname);CHKERRQ(ierr);
1667   }
1668   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1669   f2 = PETSC_TRUE;
1670   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",f2,&f2,&flag);CHKERRQ(ierr);
1671   if (flag) pc_gamg->use_sa_esteig = f2 ? 1 : 0;
1672   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1673   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1674   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1675   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1676   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1677   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1678   ierr = PetscOptionsInt("-pc_gamg_esteig_ksp_max_it","Number of iterations of eigen estimator","PCGAMGSetEstEigKSPMaxIt",pc_gamg->esteig_max_it,&pc_gamg->esteig_max_it,NULL);CHKERRQ(ierr);
1679   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1680   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1681   n = PETSC_MG_MAXLEVELS;
1682   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1683   if (!flag || n < PETSC_MG_MAXLEVELS) {
1684     if (!flag) n = 1;
1685     i = n;
1686     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1687   }
1688   n = PETSC_MG_MAXLEVELS;
1689   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1690   if (!flag) i = 0;
1691   else i = n;
1692   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1693   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1694   {
1695     PetscReal eminmax[2] = {0., 0.};
1696     n = 2;
1697     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1698     if (flag) {
1699       if (n != 2) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1700       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1701     }
1702   }
1703   /* set options for subtype */
1704   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1705 
1706   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1707   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1708   ierr = PetscOptionsTail();CHKERRQ(ierr);
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 /* -------------------------------------------------------------------------- */
1713 /*MC
1714      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1715 
1716    Options Database Keys:
1717 +   -pc_gamg_type <type> - one of agg, geo, or classical
1718 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1719 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1720 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1721 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1722                                         equations on each process that has degrees of freedom
1723 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1724 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1725 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1726 
1727    Options Database Keys for default Aggregation:
1728 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1729 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1730 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1731 
1732    Multigrid options:
1733 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1734 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1735 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1736 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1737 
1738   Notes:
1739     In order to obtain good performance for PCGAMG for vector valued problems you must
1740        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1741        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1742        See the Users Manual Chapter 4 for more details
1743 
1744   Level: intermediate
1745 
1746 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1747            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig(), PCGAMGSetEstEigKSPMaxIt(), PCGAMGSetEstEigKSPType()
1748 M*/
1749 
1750 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1751 {
1752   PetscErrorCode ierr,i;
1753   PC_GAMG        *pc_gamg;
1754   PC_MG          *mg;
1755 
1756   PetscFunctionBegin;
1757    /* register AMG type */
1758   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1759 
1760   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1761   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1762   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1763 
1764   /* create a supporting struct and attach it to pc */
1765   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1766   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1767   mg           = (PC_MG*)pc->data;
1768   mg->innerctx = pc_gamg;
1769 
1770   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1771 
1772   /* these should be in subctx but repartitioning needs simple arrays */
1773   pc_gamg->data_sz = 0;
1774   pc_gamg->data    = NULL;
1775 
1776   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1777   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1778   pc->ops->setup          = PCSetUp_GAMG;
1779   pc->ops->reset          = PCReset_GAMG;
1780   pc->ops->destroy        = PCDestroy_GAMG;
1781   mg->view                = PCView_GAMG;
1782 
1783   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPType_C",PCGAMGSetEstEigKSPType_GAMG);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEstEigKSPMaxIt_C",PCGAMGSetEstEigKSPMaxIt_GAMG);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1792   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1793   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1794   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1795   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1798   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1799   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1802   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1803   pc_gamg->repart           = PETSC_FALSE;
1804   pc_gamg->reuse_prol       = PETSC_FALSE;
1805   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1806   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1807   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1808   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1809   pc_gamg->min_eq_proc      = 50;
1810   pc_gamg->coarse_eq_limit  = 50;
1811   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1812   pc_gamg->threshold_scale = 1.;
1813   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1814   pc_gamg->current_level    = 0; /* don't need to init really */
1815   ierr = PetscStrcpy(pc_gamg->esteig_type,NULL);CHKERRQ(ierr);
1816   pc_gamg->esteig_max_it    = 10;
1817   pc_gamg->use_sa_esteig    = -1;
1818   pc_gamg->emin             = 0;
1819   pc_gamg->emax             = 0;
1820 
1821   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1822 
1823   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1824   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 /*@C
1829  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1830     from PCInitializePackage().
1831 
1832  Level: developer
1833 
1834  .seealso: PetscInitialize()
1835 @*/
1836 PetscErrorCode PCGAMGInitializePackage(void)
1837 {
1838   PetscErrorCode ierr;
1839   PetscInt       l;
1840 
1841   PetscFunctionBegin;
1842   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1843   PCGAMGPackageInitialized = PETSC_TRUE;
1844   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1845   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1846   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1847   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1848 
1849   /* general events */
1850   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1851   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1852   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1853   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1854   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1855   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1856   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1857 
1858   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1859   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1860   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1861   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1862   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1863   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1864   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1865   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1866   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1867   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1868   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1869   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1870   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1871   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1872   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1873   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1874   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1875   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1876     char ename[32];
1877 
1878     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1879     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1880     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1881     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1882     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1883     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1884   }
1885   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1886   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1887   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1888   /* create timer stages */
1889 #if defined(GAMG_STAGES)
1890   {
1891     char     str[32];
1892     PetscInt lidx;
1893     sprintf(str,"MG Level %d (finest)",0);
1894     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1895     for (lidx=1; lidx<9; lidx++) {
1896       sprintf(str,"MG Level %d",(int)lidx);
1897       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1898     }
1899   }
1900 #endif
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 /*@C
1905  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1906     called from PetscFinalize() automatically.
1907 
1908  Level: developer
1909 
1910  .seealso: PetscFinalize()
1911 @*/
1912 PetscErrorCode PCGAMGFinalizePackage(void)
1913 {
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   PCGAMGPackageInitialized = PETSC_FALSE;
1918   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 /*@C
1923  PCGAMGRegister - Register a PCGAMG implementation.
1924 
1925  Input Parameters:
1926  + type - string that will be used as the name of the GAMG type.
1927  - create - function for creating the gamg context.
1928 
1929   Level: advanced
1930 
1931  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1932 @*/
1933 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1934 {
1935   PetscErrorCode ierr;
1936 
1937   PetscFunctionBegin;
1938   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1939   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943