xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision b45e3bf4ff73d80a20c3202b6cd9f79d2f2d3efe)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     PetscCheckFalse(size%r_nnodes,PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo(pc,"%s: Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",((PetscObject)pc)->prefix,r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"%s: With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     PetscCheckFalse(nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level],PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo(pc,"%s: Manually setting reduction to %d active processes (%d/%D)\n",((PetscObject)pc)->prefix,new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo(pc,"%s: Force coarsest grid reduction to %d active processes\n",((PetscObject)pc)->prefix,new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo(pc,"%s: Coarse grid reduction from %d to %d active processes\n",((PetscObject)pc)->prefix,nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo(pc,"%s: reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",((PetscObject)pc)->prefix,nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     PetscCheckFalse(ncrs_eq % cr_bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo(pc,"%s: Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",((PetscObject)pc)->prefix,new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move columns of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo(pc,"%s: Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",((PetscObject)pc)->prefix,*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       PetscCheckFalse(new_size==nactive,PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo(pc,"%s: Number of equations (loc) %D with simple aggregation\n",((PetscObject)pc)->prefix,ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat       mat;
403       PetscBool flg;
404       ierr = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
405       ierr = MatGetOption(Cmat, MAT_SPD, &flg);CHKERRQ(ierr);
406       if (flg) {
407         ierr = MatSetOption(mat, MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
408       } else {
409         ierr = MatGetOption(Cmat, MAT_HERMITIAN, &flg);CHKERRQ(ierr);
410         if (flg) {
411           ierr = MatSetOption(mat, MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
412         } else {
413 #if !defined(PETSC_USE_COMPLEX)
414           ierr = MatGetOption(Cmat, MAT_SYMMETRIC, &flg);CHKERRQ(ierr);
415           if (flg) {
416             ierr = MatSetOption(mat, MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
417           }
418 #endif
419         }
420       }
421       *a_Amat_crs = mat;
422     }
423     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
424 
425     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
426     /* prolongator */
427     {
428       IS       findices;
429       PetscInt Istart,Iend;
430       Mat      Pnew;
431 
432       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
433       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
434       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
435       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
436       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
437       ierr = ISDestroy(&findices);CHKERRQ(ierr);
438       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
441       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
442 
443       /* output - repartitioned */
444       *a_P_inout = Pnew;
445     }
446     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
447 
448     *a_nactive_proc = new_size; /* output */
449 
450     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
451     if (pc_gamg->cpu_pin_coarse_grids) {
452 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
453       static PetscInt llev = 2;
454       ierr = PetscInfo(pc,"%s: Pinning level %D to the CPU\n",((PetscObject)pc)->prefix,llev++);CHKERRQ(ierr);
455 #endif
456       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
457       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
458       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
459         Mat         A = *a_Amat_crs, P = *a_P_inout;
460         PetscMPIInt size;
461         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
462         if (size > 1) {
463           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
464           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
465           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
466         }
467       }
468     }
469     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
475 {
476   PetscErrorCode ierr;
477   const char     *prefix;
478   char           addp[32];
479   PC_MG          *mg      = (PC_MG*)a_pc->data;
480   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
481 
482   PetscFunctionBegin;
483   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
484   ierr = PetscInfo(a_pc,"%s: Square Graph on level %D\n",((PetscObject)a_pc)->prefix,pc_gamg->current_level+1);CHKERRQ(ierr);
485   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
486   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
487   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
488   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
489   if ((*Gmat2)->structurally_symmetric) {
490     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
491   } else {
492     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
493     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
494   }
495   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
496   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
497   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
498   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
499   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
500   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
501   (*Gmat2)->assembled = PETSC_TRUE;
502   PetscFunctionReturn(0);
503 }
504 
505 /* -------------------------------------------------------------------------- */
506 /*
507    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
508                     by setting data structures and options.
509 
510    Input Parameter:
511 .  pc - the preconditioner context
512 
513 */
514 PetscErrorCode PCSetUp_GAMG(PC pc)
515 {
516   PetscErrorCode ierr;
517   PC_MG          *mg      = (PC_MG*)pc->data;
518   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
519   Mat            Pmat     = pc->pmat;
520   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
521   MPI_Comm       comm;
522   PetscMPIInt    rank,size,nactivepe;
523   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
524   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
525   PetscLogDouble nnz0=0.,nnztot=0.;
526   MatInfo        info;
527   PetscBool      is_last = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
531   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
532   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
533 
534   if (pc->setupcalled) {
535     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
536       /* reset everything */
537       ierr = PCReset_MG(pc);CHKERRQ(ierr);
538       pc->setupcalled = 0;
539     } else {
540       PC_MG_Levels **mglevels = mg->levels;
541       /* just do Galerkin grids */
542       Mat          B,dA,dB;
543 
544       if (pc_gamg->Nlevels > 1) {
545         PetscInt gl;
546         /* currently only handle case where mat and pmat are the same on coarser levels */
547         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
548         /* (re)set to get dirty flag */
549         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
550 
551         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
552           MatReuse reuse = MAT_INITIAL_MATRIX ;
553 
554           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
555           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
556           if (B->product) {
557             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
558               reuse = MAT_REUSE_MATRIX;
559             }
560           }
561           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
562           if (reuse == MAT_REUSE_MATRIX) {
563             ierr = PetscInfo(pc,"%s: RAP after first solve, reuse matrix level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
564           } else {
565             ierr = PetscInfo(pc,"%s: RAP after first solve, new matrix level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
566           }
567           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
568           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
569           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
570           if (reuse == MAT_INITIAL_MATRIX) mglevels[level]->A = B;
571           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
572           dB   = B;
573         }
574       }
575 
576       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
577       PetscFunctionReturn(0);
578     }
579   }
580 
581   if (!pc_gamg->data) {
582     if (pc_gamg->orig_data) {
583       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
584       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
585 
586       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
587       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
588       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
589 
590       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
591       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
592     } else {
593       PetscCheckFalse(!pc_gamg->ops->createdefaultdata,comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
594       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
595     }
596   }
597 
598   /* cache original data for reuse */
599   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
600     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
601     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
602     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
603     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
604   }
605 
606   /* get basic dims */
607   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
608   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
609 
610   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
611   nnz0   = info.nz_used;
612   nnztot = info.nz_used;
613   ierr = PetscInfo(pc,"%s: level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",((PetscObject)pc)->prefix,0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
614 
615   /* Get A_i and R_i */
616   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
617     pc_gamg->current_level = level;
618     PetscCheckFalse(level >= PETSC_MG_MAXLEVELS,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
619     level1 = level + 1;
620     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
621 #if defined(GAMG_STAGES)
622     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
623 #endif
624     { /* construct prolongator */
625       Mat              Gmat;
626       PetscCoarsenData *agg_lists;
627       Mat              Prol11;
628 
629       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
630       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
631       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
632 
633       /* could have failed to create new level */
634       if (Prol11) {
635         const char *prefix;
636         char       addp[32];
637 
638         /* get new block size of coarse matrices */
639         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
640 
641         if (pc_gamg->ops->optprolongator) {
642           /* smooth */
643           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
644         }
645 
646         if (pc_gamg->use_aggs_in_asm) {
647           PetscInt bs;
648           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
649           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
650         }
651 
652         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
653         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
654         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
655         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
656         /* Always generate the transpose with CUDA
657            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
658         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
659         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
660         Parr[level1] = Prol11;
661       } else Parr[level1] = NULL; /* failed to coarsen */
662 
663       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
664       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
665     } /* construct prolongator scope */
666     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
667     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
668     if (!Parr[level1]) { /* failed to coarsen */
669       ierr = PetscInfo(pc,"%s: Stop gridding, level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
670 #if defined(GAMG_STAGES)
671       ierr = PetscLogStagePop();CHKERRQ(ierr);
672 #endif
673       break;
674     }
675     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
676     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
677     PetscCheckFalse(is_last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
678     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
679     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
680     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
681 
682     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
683     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
684     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
685     nnztot += info.nz_used;
686     ierr = PetscInfo(pc,"%s: %D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",((PetscObject)pc)->prefix,level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
687 
688 #if defined(GAMG_STAGES)
689     ierr = PetscLogStagePop();CHKERRQ(ierr);
690 #endif
691     /* stop if one node or one proc -- could pull back for singular problems */
692     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
693       ierr =  PetscInfo(pc,"%s: HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",((PetscObject)pc)->prefix,level,M/bs);CHKERRQ(ierr);
694       level++;
695       break;
696     }
697   } /* levels */
698   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
699 
700   ierr = PetscInfo(pc,"%s: %D levels, grid complexity = %g\n",((PetscObject)pc)->prefix,level+1,nnztot/nnz0);CHKERRQ(ierr);
701   pc_gamg->Nlevels = level + 1;
702   fine_level       = level;
703   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
704 
705   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
706 
707     /* set default smoothers & set operators */
708     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
709       KSP smoother;
710       PC  subpc;
711 
712       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
713       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
714 
715       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
716       /* set ops */
717       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
718       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
719 
720       /* set defaults */
721       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
722 
723       /* set blocks for ASM smoother that uses the 'aggregates' */
724       if (pc_gamg->use_aggs_in_asm) {
725         PetscInt sz;
726         IS       *iss;
727 
728         sz   = nASMBlocksArr[level];
729         iss  = ASMLocalIDsArr[level];
730         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
731         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
732         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
733         if (!sz) {
734           IS       is;
735           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
736           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
737           ierr = ISDestroy(&is);CHKERRQ(ierr);
738         } else {
739           PetscInt kk;
740           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
741           for (kk=0; kk<sz; kk++) {
742             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
743           }
744           ierr = PetscFree(iss);CHKERRQ(ierr);
745         }
746         ASMLocalIDsArr[level] = NULL;
747         nASMBlocksArr[level]  = 0;
748       } else {
749         ierr = PCSetType(subpc, PCJACOBI);CHKERRQ(ierr);
750       }
751     }
752     {
753       /* coarse grid */
754       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
755       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
756 
757       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
758       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
759       if (!pc_gamg->use_parallel_coarse_grid_solver) {
760         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
761         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
762         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
763         ierr = PCSetUp(subpc);CHKERRQ(ierr);
764         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
765         PetscCheckFalse(ii != 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
766         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
767         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
768         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
769         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
770         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
771       }
772     }
773 
774     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
775     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
776     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
777     ierr = PetscOptionsEnd();CHKERRQ(ierr);
778     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
779 
780     /* setup cheby eigen estimates from SA */
781     if (pc_gamg->use_sa_esteig) {
782       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
783         KSP       smoother;
784         PetscBool ischeb;
785 
786         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
787         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
788         if (ischeb) {
789           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
790 
791           // The command line will override these settings because KSPSetFromOptions is called in PCSetUp_MG
792           if (mg->max_eigen_DinvA[level] > 0) {
793             // SA uses Jacobi for P; we use SA estimates if the smoother is also Jacobi or if the user explicitly requested it.
794             // TODO: This should test whether it's the same Jacobi variant (DIAG, ROWSUM, etc.)
795             PetscReal emax,emin;
796 
797             emin = mg->min_eigen_DinvA[level];
798             emax = mg->max_eigen_DinvA[level];
799             ierr = PetscInfo(pc,"%s: PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",((PetscObject)pc)->prefix,level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
800             cheb->emin_provided = emin;
801             cheb->emax_provided = emax;
802           }
803         }
804       }
805     }
806 
807     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
808 
809     /* clean up */
810     for (level=1; level<pc_gamg->Nlevels; level++) {
811       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
812       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
813     }
814   } else {
815     KSP smoother;
816 
817     ierr = PetscInfo(pc,"%s: One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
818     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
819     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
820     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
821     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 /* ------------------------------------------------------------------------- */
827 /*
828  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
829    that was created with PCCreate_GAMG().
830 
831    Input Parameter:
832 .  pc - the preconditioner context
833 
834    Application Interface Routine: PCDestroy()
835 */
836 PetscErrorCode PCDestroy_GAMG(PC pc)
837 {
838   PetscErrorCode ierr;
839   PC_MG          *mg     = (PC_MG*)pc->data;
840   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
841 
842   PetscFunctionBegin;
843   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
844   if (pc_gamg->ops->destroy) {
845     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
848   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
849   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
850   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*@
855    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
856 
857    Logically Collective on PC
858 
859    Input Parameters:
860 +  pc - the preconditioner context
861 -  n - the number of equations
862 
863    Options Database Key:
864 .  -pc_gamg_process_eq_limit <limit>
865 
866    Notes:
867     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
868           that has degrees of freedom
869 
870    Level: intermediate
871 
872 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
873 @*/
874 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
880   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
885 {
886   PC_MG   *mg      = (PC_MG*)pc->data;
887   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
888 
889   PetscFunctionBegin;
890   if (n>0) pc_gamg->min_eq_proc = n;
891   PetscFunctionReturn(0);
892 }
893 
894 /*@
895    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
896 
897  Collective on PC
898 
899    Input Parameters:
900 +  pc - the preconditioner context
901 -  n - maximum number of equations to aim for
902 
903    Options Database Key:
904 .  -pc_gamg_coarse_eq_limit <limit>
905 
906    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
907      has less than 1000 unknowns.
908 
909    Level: intermediate
910 
911 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
912 @*/
913 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
919   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
924 {
925   PC_MG   *mg      = (PC_MG*)pc->data;
926   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
927 
928   PetscFunctionBegin;
929   if (n>0) pc_gamg->coarse_eq_limit = n;
930   PetscFunctionReturn(0);
931 }
932 
933 /*@
934    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
935 
936    Collective on PC
937 
938    Input Parameters:
939 +  pc - the preconditioner context
940 -  n - PETSC_TRUE or PETSC_FALSE
941 
942    Options Database Key:
943 .  -pc_gamg_repartition <true,false>
944 
945    Notes:
946     this will generally improve the loading balancing of the work on each level
947 
948    Level: intermediate
949 
950 .seealso: ()
951 @*/
952 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
953 {
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
958   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
963 {
964   PC_MG   *mg      = (PC_MG*)pc->data;
965   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
966 
967   PetscFunctionBegin;
968   pc_gamg->repart = n;
969   PetscFunctionReturn(0);
970 }
971 
972 /*@
973    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Chebyshev smoother
974 
975    Collective on PC
976 
977    Input Parameters:
978 +  pc - the preconditioner context
979 -  n - number of its
980 
981    Options Database Key:
982 .  -pc_gamg_use_sa_esteig <true,false>
983 
984    Notes:
985    Smoothed aggregation constructs the smoothed prolongator $P = (I - \omega D^{-1} A) T$ where $T$ is the tentative prolongator and $D$ is the diagonal of $A$.
986    Eigenvalue estimates (based on a few CG or GMRES iterations) are computed to choose $\omega$ so that this is a stable smoothing operation.
987    If Chebyshev with Jacobi (diagonal) preconditioning is used for smoothing, then the eigenvalue estimates can be reused.
988    This option is only used when the smoother uses Jacobi, and should be turned off if a different PCJacobiType is used.
989    It became default in PETSc 3.17.
990 
991    Level: advanced
992 
993 .seealso: KSPChebyshevSetEigenvalues(), KSPChebyshevEstEigSet()
994 @*/
995 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
996 {
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1001   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1006 {
1007   PC_MG   *mg      = (PC_MG*)pc->data;
1008   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1009 
1010   PetscFunctionBegin;
1011   pc_gamg->use_sa_esteig = n;
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 /*@
1016    PCGAMGSetEigenvalues - Set eigenvalues
1017 
1018    Collective on PC
1019 
1020    Input Parameters:
1021 +  pc - the preconditioner context
1022 -  emax - max eigenvalue
1023 .  emin - min eigenvalue
1024 
1025    Options Database Key:
1026 .  -pc_gamg_eigenvalues
1027 
1028    Level: intermediate
1029 
1030 .seealso: PCGAMGSetUseSAEstEig()
1031 @*/
1032 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1033 {
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1038   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1043 {
1044   PC_MG          *mg      = (PC_MG*)pc->data;
1045   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1046 
1047   PetscFunctionBegin;
1048   PetscCheckFalse(emax <= emin,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1049   PetscCheckFalse(emax*emin <= 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1050   pc_gamg->emax = emax;
1051   pc_gamg->emin = emin;
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 /*@
1057    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1058 
1059    Collective on PC
1060 
1061    Input Parameters:
1062 +  pc - the preconditioner context
1063 -  n - PETSC_TRUE or PETSC_FALSE
1064 
1065    Options Database Key:
1066 .  -pc_gamg_reuse_interpolation <true,false>
1067 
1068    Level: intermediate
1069 
1070    Notes:
1071     this may negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1072           rebuilding the preconditioner quicker.
1073 
1074 .seealso: ()
1075 @*/
1076 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1077 {
1078   PetscErrorCode ierr;
1079 
1080   PetscFunctionBegin;
1081   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1082   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1087 {
1088   PC_MG   *mg      = (PC_MG*)pc->data;
1089   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1090 
1091   PetscFunctionBegin;
1092   pc_gamg->reuse_prol = n;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 /*@
1097    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1098 
1099    Collective on PC
1100 
1101    Input Parameters:
1102 +  pc - the preconditioner context
1103 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1104 
1105    Options Database Key:
1106 .  -pc_gamg_asm_use_agg
1107 
1108    Level: intermediate
1109 
1110 .seealso: ()
1111 @*/
1112 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1113 {
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1118   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1123 {
1124   PC_MG   *mg      = (PC_MG*)pc->data;
1125   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1126 
1127   PetscFunctionBegin;
1128   pc_gamg->use_aggs_in_asm = flg;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 /*@
1133    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1134 
1135    Collective on PC
1136 
1137    Input Parameters:
1138 +  pc - the preconditioner context
1139 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1140 
1141    Options Database Key:
1142 .  -pc_gamg_use_parallel_coarse_grid_solver
1143 
1144    Level: intermediate
1145 
1146 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1147 @*/
1148 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1149 {
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1154   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1159 {
1160   PC_MG   *mg      = (PC_MG*)pc->data;
1161   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1162 
1163   PetscFunctionBegin;
1164   pc_gamg->use_parallel_coarse_grid_solver = flg;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 /*@
1169    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1170 
1171    Collective on PC
1172 
1173    Input Parameters:
1174 +  pc - the preconditioner context
1175 -  flg - PETSC_TRUE to pin coarse grids to CPU
1176 
1177    Options Database Key:
1178 .  -pc_gamg_cpu_pin_coarse_grids
1179 
1180    Level: intermediate
1181 
1182 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1183 @*/
1184 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1185 {
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1190   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1195 {
1196   PC_MG   *mg      = (PC_MG*)pc->data;
1197   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1198 
1199   PetscFunctionBegin;
1200   pc_gamg->cpu_pin_coarse_grids = flg;
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 /*@
1205    PCGAMGSetCoarseGridLayoutType - place reduce grids on processors with natural order (compact type)
1206 
1207    Collective on PC
1208 
1209    Input Parameters:
1210 +  pc - the preconditioner context
1211 -  flg - Layout type
1212 
1213    Options Database Key:
1214 .  -pc_gamg_coarse_grid_layout_type
1215 
1216    Level: intermediate
1217 
1218 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1219 @*/
1220 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1221 {
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1226   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1231 {
1232   PC_MG   *mg      = (PC_MG*)pc->data;
1233   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1234 
1235   PetscFunctionBegin;
1236   pc_gamg->layout_type = flg;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*@
1241    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1242 
1243    Not collective on PC
1244 
1245    Input Parameters:
1246 +  pc - the preconditioner
1247 -  n - the maximum number of levels to use
1248 
1249    Options Database Key:
1250 .  -pc_mg_levels
1251 
1252    Level: intermediate
1253 
1254 .seealso: ()
1255 @*/
1256 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1257 {
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1262   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1267 {
1268   PC_MG   *mg      = (PC_MG*)pc->data;
1269   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1270 
1271   PetscFunctionBegin;
1272   pc_gamg->Nlevels = n;
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /*@
1277    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1278 
1279    Not collective on PC
1280 
1281    Input Parameters:
1282 +  pc - the preconditioner context
1283 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1284 -  n - number of threshold values provided in array
1285 
1286    Options Database Key:
1287 .  -pc_gamg_threshold <threshold>
1288 
1289    Notes:
1290     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1291     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1292 
1293     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1294     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1295     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1296 
1297    Level: intermediate
1298 
1299 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1300 @*/
1301 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1302 {
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1307   if (n) PetscValidRealPointer(v,2);
1308   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1313 {
1314   PC_MG   *mg      = (PC_MG*)pc->data;
1315   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1316   PetscInt i;
1317   PetscFunctionBegin;
1318   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1319   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /*@
1324    PCGAMGSetRankReductionFactors - Set manual schedual for process reduction on coarse grids
1325 
1326    Collective on PC
1327 
1328    Input Parameters:
1329 +  pc - the preconditioner context
1330 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1331 -  n - number of values provided in array
1332 
1333    Options Database Key:
1334 .  -pc_gamg_rank_reduction_factors <factors>
1335 
1336    Level: intermediate
1337 
1338 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1339 @*/
1340 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1341 {
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1346   if (n) PetscValidIntPointer(v,2);
1347   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1352 {
1353   PC_MG   *mg      = (PC_MG*)pc->data;
1354   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1355   PetscInt i;
1356   PetscFunctionBegin;
1357   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1358   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 /*@
1363    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1364 
1365    Not collective on PC
1366 
1367    Input Parameters:
1368 +  pc - the preconditioner context
1369 -  scale - the threshold value reduction, ussually < 1.0
1370 
1371    Options Database Key:
1372 .  -pc_gamg_threshold_scale <v>
1373 
1374    Notes:
1375    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1376    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1377 
1378    Level: advanced
1379 
1380 .seealso: PCGAMGSetThreshold()
1381 @*/
1382 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1383 {
1384   PetscErrorCode ierr;
1385 
1386   PetscFunctionBegin;
1387   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1388   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1393 {
1394   PC_MG   *mg      = (PC_MG*)pc->data;
1395   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1396   PetscFunctionBegin;
1397   pc_gamg->threshold_scale = v;
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 /*@C
1402    PCGAMGSetType - Set solution method
1403 
1404    Collective on PC
1405 
1406    Input Parameters:
1407 +  pc - the preconditioner context
1408 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1409 
1410    Options Database Key:
1411 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1412 
1413    Level: intermediate
1414 
1415 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1416 @*/
1417 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1418 {
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1423   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 /*@C
1428    PCGAMGGetType - Get solution method
1429 
1430    Collective on PC
1431 
1432    Input Parameter:
1433 .  pc - the preconditioner context
1434 
1435    Output Parameter:
1436 .  type - the type of algorithm used
1437 
1438    Level: intermediate
1439 
1440 .seealso: PCGAMGSetType(), PCGAMGType
1441 @*/
1442 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1443 {
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1448   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1453 {
1454   PC_MG          *mg      = (PC_MG*)pc->data;
1455   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1456 
1457   PetscFunctionBegin;
1458   *type = pc_gamg->type;
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1463 {
1464   PetscErrorCode ierr,(*r)(PC);
1465   PC_MG          *mg      = (PC_MG*)pc->data;
1466   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1467 
1468   PetscFunctionBegin;
1469   pc_gamg->type = type;
1470   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1471   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1472   if (pc_gamg->ops->destroy) {
1473     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1474     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1475     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1476     /* cleaning up common data in pc_gamg - this should disapear someday */
1477     pc_gamg->data_cell_cols = 0;
1478     pc_gamg->data_cell_rows = 0;
1479     pc_gamg->orig_data_cell_cols = 0;
1480     pc_gamg->orig_data_cell_rows = 0;
1481     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1482     pc_gamg->data_sz = 0;
1483   }
1484   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1485   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1486   ierr = (*r)(pc);CHKERRQ(ierr);
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1491 {
1492   PetscErrorCode ierr,i;
1493   PC_MG          *mg      = (PC_MG*)pc->data;
1494   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1495   PetscReal       gc=0, oc=0;
1496 
1497   PetscFunctionBegin;
1498   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1499   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1500   for (i=0;i<mg->nlevels; i++) {
1501     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1502   }
1503   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1504   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1505   if (pc_gamg->use_aggs_in_asm) {
1506     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1507   }
1508   if (pc_gamg->use_parallel_coarse_grid_solver) {
1509     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1510   }
1511 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1512   if (pc_gamg->cpu_pin_coarse_grids) {
1513     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1514   }
1515 #endif
1516   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1517   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1518   /* } else { */
1519   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1520   /* } */
1521   if (pc_gamg->ops->view) {
1522     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1523   }
1524   ierr = PCMGGetGridComplexity(pc,&gc,&oc);CHKERRQ(ierr);
1525   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g    operator = %g\n",gc,oc);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1530 {
1531   PetscErrorCode ierr;
1532   PC_MG          *mg      = (PC_MG*)pc->data;
1533   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1534   PetscBool      flag;
1535   MPI_Comm       comm;
1536   char           prefix[256],tname[32];
1537   PetscInt       i,n;
1538   const char     *pcpre;
1539   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1540   PetscFunctionBegin;
1541   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1542   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1543   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1544   if (flag) {
1545     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1546   }
1547   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1548   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",pc_gamg->use_sa_esteig,&pc_gamg->use_sa_esteig,NULL);CHKERRQ(ierr);
1549   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1550   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1551   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1552   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1553   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1554   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1555   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1556   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1557   n = PETSC_MG_MAXLEVELS;
1558   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1559   if (!flag || n < PETSC_MG_MAXLEVELS) {
1560     if (!flag) n = 1;
1561     i = n;
1562     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1563   }
1564   n = PETSC_MG_MAXLEVELS;
1565   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1566   if (!flag) i = 0;
1567   else i = n;
1568   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1569   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1570   {
1571     PetscReal eminmax[2] = {0., 0.};
1572     n = 2;
1573     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1574     if (flag) {
1575       PetscCheckFalse(n != 2,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1576       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1577     }
1578   }
1579   /* set options for subtype */
1580   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1581 
1582   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1583   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1584   ierr = PetscOptionsTail();CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 /* -------------------------------------------------------------------------- */
1589 /*MC
1590      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1591 
1592    Options Database Keys:
1593 +   -pc_gamg_type <type> - one of agg, geo, or classical
1594 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1595 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1596 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1597 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1598                                         equations on each process that has degrees of freedom
1599 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1600 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1601 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1602 
1603    Options Database Keys for default Aggregation:
1604 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1605 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1606 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1607 
1608    Multigrid options:
1609 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1610 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1611 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1612 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1613 
1614   Notes:
1615     In order to obtain good performance for PCGAMG for vector valued problems you must
1616        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1617        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1618        See the Users Manual Chapter 4 for more details
1619 
1620   Level: intermediate
1621 
1622 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1623            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig()
1624 M*/
1625 
1626 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1627 {
1628   PetscErrorCode ierr,i;
1629   PC_GAMG        *pc_gamg;
1630   PC_MG          *mg;
1631 
1632   PetscFunctionBegin;
1633    /* register AMG type */
1634   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1635 
1636   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1637   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1638   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1639 
1640   /* create a supporting struct and attach it to pc */
1641   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1642   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1643   mg           = (PC_MG*)pc->data;
1644   mg->innerctx = pc_gamg;
1645 
1646   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1647 
1648   /* these should be in subctx but repartitioning needs simple arrays */
1649   pc_gamg->data_sz = 0;
1650   pc_gamg->data    = NULL;
1651 
1652   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1653   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1654   pc->ops->setup          = PCSetUp_GAMG;
1655   pc->ops->reset          = PCReset_GAMG;
1656   pc->ops->destroy        = PCDestroy_GAMG;
1657   mg->view                = PCView_GAMG;
1658 
1659   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1661   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1662   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1664   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1665   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1677   pc_gamg->repart           = PETSC_FALSE;
1678   pc_gamg->reuse_prol       = PETSC_FALSE;
1679   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1680   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1681   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1682   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1683   pc_gamg->min_eq_proc      = 50;
1684   pc_gamg->coarse_eq_limit  = 50;
1685   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1686   pc_gamg->threshold_scale = 1.;
1687   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1688   pc_gamg->current_level    = 0; /* don't need to init really */
1689   pc_gamg->use_sa_esteig    = PETSC_TRUE;
1690   pc_gamg->emin             = 0;
1691   pc_gamg->emax             = 0;
1692 
1693   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1694 
1695   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1696   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 /*@C
1701  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1702     from PCInitializePackage().
1703 
1704  Level: developer
1705 
1706  .seealso: PetscInitialize()
1707 @*/
1708 PetscErrorCode PCGAMGInitializePackage(void)
1709 {
1710   PetscErrorCode ierr;
1711   PetscInt       l;
1712 
1713   PetscFunctionBegin;
1714   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1715   PCGAMGPackageInitialized = PETSC_TRUE;
1716   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1717   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1718   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1719   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1720 
1721   /* general events */
1722   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1723   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1724   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1725   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1726   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1727   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1728   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1729 
1730   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1731   ierr = PetscLogEventRegister("  Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1732   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1733   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1734   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1735   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1736   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1737   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1738   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1739   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1740   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1741   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1742   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1743   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1744   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1745   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1746   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1747   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1748     char ename[32];
1749 
1750     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1751     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1752     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1753     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1754     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1755     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1756   }
1757   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1758   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1759   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1760   /* create timer stages */
1761 #if defined(GAMG_STAGES)
1762   {
1763     char     str[32];
1764     PetscInt lidx;
1765     sprintf(str,"MG Level %d (finest)",0);
1766     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1767     for (lidx=1; lidx<9; lidx++) {
1768       sprintf(str,"MG Level %d",(int)lidx);
1769       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1770     }
1771   }
1772 #endif
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 /*@C
1777  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1778     called from PetscFinalize() automatically.
1779 
1780  Level: developer
1781 
1782  .seealso: PetscFinalize()
1783 @*/
1784 PetscErrorCode PCGAMGFinalizePackage(void)
1785 {
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   PCGAMGPackageInitialized = PETSC_FALSE;
1790   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 /*@C
1795  PCGAMGRegister - Register a PCGAMG implementation.
1796 
1797  Input Parameters:
1798  + type - string that will be used as the name of the GAMG type.
1799  - create - function for creating the gamg context.
1800 
1801   Level: advanced
1802 
1803  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1804 @*/
1805 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1806 {
1807   PetscErrorCode ierr;
1808 
1809   PetscFunctionBegin;
1810   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1811   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815