xref: /petsc/src/ksp/pc/impls/gamg/gamg.c (revision f155c23239e6d3f7c7ec79ff00b4f28519d0ce99)
1 /*
2  GAMG geometric-algebric multigrid PC - Mark Adams 2011
3  */
4 #include <petsc/private/matimpl.h>
5 #include <../src/ksp/pc/impls/gamg/gamg.h>           /*I "petscpc.h" I*/
6 #include <../src/ksp/ksp/impls/cheby/chebyshevimpl.h>    /*I "petscksp.h" I*/
7 
8 #if defined(PETSC_HAVE_CUDA)
9   #include <cuda_runtime.h>
10 #endif
11 
12 #if defined(PETSC_HAVE_HIP)
13   #include <hip/hip_runtime.h>
14 #endif
15 
16 PetscLogEvent petsc_gamg_setup_events[NUM_SET];
17 PetscLogEvent petsc_gamg_setup_matmat_events[PETSC_MG_MAXLEVELS][3];
18 PetscLogEvent PC_GAMGGraph_AGG;
19 PetscLogEvent PC_GAMGGraph_GEO;
20 PetscLogEvent PC_GAMGCoarsen_AGG;
21 PetscLogEvent PC_GAMGCoarsen_GEO;
22 PetscLogEvent PC_GAMGProlongator_AGG;
23 PetscLogEvent PC_GAMGProlongator_GEO;
24 PetscLogEvent PC_GAMGOptProlongator_AGG;
25 
26 /* #define GAMG_STAGES */
27 #if defined(GAMG_STAGES)
28 static PetscLogStage gamg_stages[PETSC_MG_MAXLEVELS];
29 #endif
30 
31 static PetscFunctionList GAMGList = NULL;
32 static PetscBool PCGAMGPackageInitialized;
33 
34 /* ----------------------------------------------------------------------------- */
35 PetscErrorCode PCReset_GAMG(PC pc)
36 {
37   PetscErrorCode ierr, level;
38   PC_MG          *mg      = (PC_MG*)pc->data;
39   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
40 
41   PetscFunctionBegin;
42   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
43   pc_gamg->data_sz = 0;
44   ierr = PetscFree(pc_gamg->orig_data);CHKERRQ(ierr);
45   for (level = 0; level < PETSC_MG_MAXLEVELS ; level++) {
46     mg->min_eigen_DinvA[level] = 0;
47     mg->max_eigen_DinvA[level] = 0;
48   }
49   pc_gamg->emin = 0;
50   pc_gamg->emax = 0;
51   PetscFunctionReturn(0);
52 }
53 
54 /* -------------------------------------------------------------------------- */
55 /*
56    PCGAMGCreateLevel_GAMG: create coarse op with RAP.  repartition and/or reduce number
57      of active processors.
58 
59    Input Parameter:
60    . pc - parameters + side effect: coarse data in 'pc_gamg->data' and
61           'pc_gamg->data_sz' are changed via repartitioning/reduction.
62    . Amat_fine - matrix on this fine (k) level
63    . cr_bs - coarse block size
64    In/Output Parameter:
65    . a_P_inout - prolongation operator to the next level (k-->k-1)
66    . a_nactive_proc - number of active procs
67    Output Parameter:
68    . a_Amat_crs - coarse matrix that is created (k-1)
69 */
70 
71 static PetscErrorCode PCGAMGCreateLevel_GAMG(PC pc,Mat Amat_fine,PetscInt cr_bs,Mat *a_P_inout,Mat *a_Amat_crs,PetscMPIInt *a_nactive_proc,IS * Pcolumnperm, PetscBool is_last)
72 {
73   PetscErrorCode  ierr;
74   PC_MG           *mg         = (PC_MG*)pc->data;
75   PC_GAMG         *pc_gamg    = (PC_GAMG*)mg->innerctx;
76   Mat             Cmat,Pold=*a_P_inout;
77   MPI_Comm        comm;
78   PetscMPIInt     rank,size,new_size,nactive=*a_nactive_proc;
79   PetscInt        ncrs_eq,ncrs,f_bs;
80 
81   PetscFunctionBegin;
82   ierr = PetscObjectGetComm((PetscObject)Amat_fine,&comm);CHKERRQ(ierr);
83   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
84   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
85   ierr = MatGetBlockSize(Amat_fine, &f_bs);CHKERRQ(ierr);
86   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
87   ierr = MatPtAP(Amat_fine, Pold, MAT_INITIAL_MATRIX, 2.0, &Cmat);CHKERRQ(ierr);
88   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][1],0,0,0,0);CHKERRQ(ierr);
89 
90   if (Pcolumnperm) *Pcolumnperm = NULL;
91 
92   /* set 'ncrs' (nodes), 'ncrs_eq' (equations)*/
93   ierr = MatGetLocalSize(Cmat, &ncrs_eq, NULL);CHKERRQ(ierr);
94   if (pc_gamg->data_cell_rows>0) {
95     ncrs = pc_gamg->data_sz/pc_gamg->data_cell_cols/pc_gamg->data_cell_rows;
96   } else {
97     PetscInt  bs;
98     ierr = MatGetBlockSize(Cmat, &bs);CHKERRQ(ierr);
99     ncrs = ncrs_eq/bs;
100   }
101   /* get number of PEs to make active 'new_size', reduce, can be any integer 1-P */
102   if (pc_gamg->level_reduction_factors[pc_gamg->current_level] == 0 && PetscDefined(HAVE_CUDA) && pc_gamg->current_level==0) { /* 0 turns reducing to 1 process/device on; do for HIP, etc. */
103 #if defined(PETSC_HAVE_CUDA)
104     PetscShmComm pshmcomm;
105     PetscMPIInt  locrank;
106     MPI_Comm     loccomm;
107     PetscInt     s_nnodes,r_nnodes, new_new_size;
108     cudaError_t  cerr;
109     int          devCount;
110     ierr = PetscShmCommGet(comm,&pshmcomm);CHKERRQ(ierr);
111     ierr = PetscShmCommGetMpiShmComm(pshmcomm,&loccomm);CHKERRQ(ierr);
112     ierr = MPI_Comm_rank(loccomm, &locrank);CHKERRMPI(ierr);
113     s_nnodes = !locrank;
114     ierr = MPI_Allreduce(&s_nnodes,&r_nnodes,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
115     PetscCheckFalse(size%r_nnodes,PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of nodes np=%D nnodes%D",size,r_nnodes);
116     devCount = 0;
117     cerr = cudaGetDeviceCount(&devCount);
118     cudaGetLastError(); /* Reset the last error */
119     if (cerr == cudaSuccess && devCount >= 1) { /* There are devices, else go to heuristic */
120       new_new_size = r_nnodes * devCount;
121       new_size = new_new_size;
122       ierr = PetscInfo(pc,"%s: Fine grid with Cuda. %D nodes. Change new active set size %d --> %d (devCount=%d #nodes=%D)\n",((PetscObject)pc)->prefix,r_nnodes,nactive,new_size,devCount,r_nnodes);CHKERRQ(ierr);
123     } else {
124       ierr = PetscInfo(pc,"%s: With Cuda but no device. Use heuristics.");CHKERRQ(ierr);
125       goto HEURISTIC;
126     }
127 #else
128     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not be here");
129 #endif
130   } else if (pc_gamg->level_reduction_factors[pc_gamg->current_level] > 0) {
131     PetscCheckFalse(nactive%pc_gamg->level_reduction_factors[pc_gamg->current_level],PETSC_COMM_SELF,PETSC_ERR_PLIB,"odd number of active process %D wrt reduction factor %D",nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);
132     new_size = nactive/pc_gamg->level_reduction_factors[pc_gamg->current_level];
133     ierr = PetscInfo(pc,"%s: Manually setting reduction to %d active processes (%d/%D)\n",((PetscObject)pc)->prefix,new_size,nactive,pc_gamg->level_reduction_factors[pc_gamg->current_level]);CHKERRQ(ierr);
134   } else if (is_last && !pc_gamg->use_parallel_coarse_grid_solver) {
135     new_size = 1;
136     ierr = PetscInfo(pc,"%s: Force coarsest grid reduction to %d active processes\n",((PetscObject)pc)->prefix,new_size);CHKERRQ(ierr);
137   } else {
138     PetscInt ncrs_eq_glob;
139 #if defined(PETSC_HAVE_CUDA)
140     HEURISTIC:
141 #endif
142     ierr     = MatGetSize(Cmat, &ncrs_eq_glob, NULL);CHKERRQ(ierr);
143     new_size = (PetscMPIInt)((float)ncrs_eq_glob/(float)pc_gamg->min_eq_proc + 0.5); /* hardwire min. number of eq/proc */
144     if (!new_size) new_size = 1; /* not likely, posible? */
145     else if (new_size >= nactive) new_size = nactive; /* no change, rare */
146     ierr = PetscInfo(pc,"%s: Coarse grid reduction from %d to %d active processes\n",((PetscObject)pc)->prefix,nactive,new_size);CHKERRQ(ierr);
147   }
148   if (new_size==nactive) {
149     *a_Amat_crs = Cmat; /* output - no repartitioning or reduction - could bail here */
150     if (new_size < size) {
151       /* odd case where multiple coarse grids are on one processor or no coarsening ... */
152       ierr = PetscInfo(pc,"%s: reduced grid using same number of processors (%d) as last grid (use larger coarse grid)\n",((PetscObject)pc)->prefix,nactive);CHKERRQ(ierr);
153       if (pc_gamg->cpu_pin_coarse_grids) {
154         ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
155         ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
156       }
157     }
158     /* we know that the grid structure can be reused in MatPtAP */
159   } else { /* reduce active processors - we know that the grid structure can NOT be reused in MatPtAP */
160     PetscInt       *counts,*newproc_idx,ii,jj,kk,strideNew,*tidx,ncrs_new,ncrs_eq_new,nloc_old,expand_factor=1,rfactor=1;
161     IS             is_eq_newproc,is_eq_num,is_eq_num_prim,new_eq_indices;
162     nloc_old = ncrs_eq/cr_bs;
163     PetscCheckFalse(ncrs_eq % cr_bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ncrs_eq %D not divisible by cr_bs %D",ncrs_eq,cr_bs);
164     /* get new_size and rfactor */
165     if (pc_gamg->layout_type==PCGAMG_LAYOUT_SPREAD || !pc_gamg->repart) {
166       /* find factor */
167       if (new_size == 1) rfactor = size; /* don't modify */
168       else {
169         PetscReal best_fact = 0.;
170         jj = -1;
171         for (kk = 1 ; kk <= size ; kk++) {
172           if (!(size%kk)) { /* a candidate */
173             PetscReal nactpe = (PetscReal)size/(PetscReal)kk, fact = nactpe/(PetscReal)new_size;
174             if (fact > 1.0) fact = 1./fact; /* keep fact < 1 */
175             if (fact > best_fact) {
176               best_fact = fact; jj = kk;
177             }
178           }
179         }
180         if (jj != -1) rfactor = jj;
181         else rfactor = 1; /* a prime */
182         if (pc_gamg->layout_type == PCGAMG_LAYOUT_COMPACT) expand_factor = 1;
183         else expand_factor = rfactor;
184       }
185       new_size = size/rfactor; /* make new size one that is factor */
186       if (new_size==nactive) { /* no repartitioning or reduction, bail out because nested here (rare) */
187         *a_Amat_crs = Cmat;
188         ierr = PetscInfo(pc,"%s: Finding factorable processor set stopped reduction: new_size=%d, neq(loc)=%D\n",((PetscObject)pc)->prefix,new_size,ncrs_eq);CHKERRQ(ierr);
189         PetscFunctionReturn(0);
190       }
191     }
192     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
193     /* make 'is_eq_newproc' */
194     ierr = PetscMalloc1(size, &counts);CHKERRQ(ierr);
195     if (pc_gamg->repart) {
196       /* Repartition Cmat_{k} and move columns of P^{k}_{k-1} and coordinates of primal part accordingly */
197       Mat      adj;
198       ierr = PetscInfo(pc,"%s: Repartition: size (active): %d --> %d, %D local equations, using %s process layout\n",((PetscObject)pc)->prefix,*a_nactive_proc, new_size, ncrs_eq, (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) ? "compact" : "spread");CHKERRQ(ierr);
199       /* get 'adj' */
200       if (cr_bs == 1) {
201         ierr = MatConvert(Cmat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
202       } else {
203         /* make a scalar matrix to partition (no Stokes here) */
204         Mat               tMat;
205         PetscInt          Istart_crs,Iend_crs,ncols,jj,Ii;
206         const PetscScalar *vals;
207         const PetscInt    *idx;
208         PetscInt          *d_nnz, *o_nnz, M, N;
209         static PetscInt   llev = 0; /* ugly but just used for debugging */
210         MatType           mtype;
211 
212         ierr = PetscMalloc2(ncrs, &d_nnz,ncrs, &o_nnz);CHKERRQ(ierr);
213         ierr = MatGetOwnershipRange(Cmat, &Istart_crs, &Iend_crs);CHKERRQ(ierr);
214         ierr = MatGetSize(Cmat, &M, &N);CHKERRQ(ierr);
215         for (Ii = Istart_crs, jj = 0; Ii < Iend_crs; Ii += cr_bs, jj++) {
216           ierr      = MatGetRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
217           d_nnz[jj] = ncols/cr_bs;
218           o_nnz[jj] = ncols/cr_bs;
219           ierr      = MatRestoreRow(Cmat,Ii,&ncols,NULL,NULL);CHKERRQ(ierr);
220           if (d_nnz[jj] > ncrs) d_nnz[jj] = ncrs;
221           if (o_nnz[jj] > (M/cr_bs-ncrs)) o_nnz[jj] = M/cr_bs-ncrs;
222         }
223 
224         ierr = MatGetType(Amat_fine,&mtype);CHKERRQ(ierr);
225         ierr = MatCreate(comm, &tMat);CHKERRQ(ierr);
226         ierr = MatSetSizes(tMat, ncrs, ncrs,PETSC_DETERMINE, PETSC_DETERMINE);CHKERRQ(ierr);
227         ierr = MatSetType(tMat,mtype);CHKERRQ(ierr);
228         ierr = MatSeqAIJSetPreallocation(tMat,0,d_nnz);CHKERRQ(ierr);
229         ierr = MatMPIAIJSetPreallocation(tMat,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
230         ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
231 
232         for (ii = Istart_crs; ii < Iend_crs; ii++) {
233           PetscInt dest_row = ii/cr_bs;
234           ierr = MatGetRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
235           for (jj = 0; jj < ncols; jj++) {
236             PetscInt    dest_col = idx[jj]/cr_bs;
237             PetscScalar v        = 1.0;
238             ierr = MatSetValues(tMat,1,&dest_row,1,&dest_col,&v,ADD_VALUES);CHKERRQ(ierr);
239           }
240           ierr = MatRestoreRow(Cmat,ii,&ncols,&idx,&vals);CHKERRQ(ierr);
241         }
242         ierr = MatAssemblyBegin(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
243         ierr = MatAssemblyEnd(tMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244 
245         if (llev++ == -1) {
246           PetscViewer viewer; char fname[32];
247           ierr = PetscSNPrintf(fname,sizeof(fname),"part_mat_%D.mat",llev);CHKERRQ(ierr);
248           PetscViewerBinaryOpen(comm,fname,FILE_MODE_WRITE,&viewer);
249           ierr = MatView(tMat, viewer);CHKERRQ(ierr);
250           ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
251         }
252         ierr = MatConvert(tMat, MATMPIADJ, MAT_INITIAL_MATRIX, &adj);CHKERRQ(ierr);
253         ierr = MatDestroy(&tMat);CHKERRQ(ierr);
254       } /* create 'adj' */
255 
256       { /* partition: get newproc_idx */
257         char            prefix[256];
258         const char      *pcpre;
259         const PetscInt  *is_idx;
260         MatPartitioning mpart;
261         IS              proc_is;
262 
263         ierr = MatPartitioningCreate(comm, &mpart);CHKERRQ(ierr);
264         ierr = MatPartitioningSetAdjacency(mpart, adj);CHKERRQ(ierr);
265         ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
266         ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
267         ierr = PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);CHKERRQ(ierr);
268         ierr = MatPartitioningSetFromOptions(mpart);CHKERRQ(ierr);
269         ierr = MatPartitioningSetNParts(mpart, new_size);CHKERRQ(ierr);
270         ierr = MatPartitioningApply(mpart, &proc_is);CHKERRQ(ierr);
271         ierr = MatPartitioningDestroy(&mpart);CHKERRQ(ierr);
272 
273         /* collect IS info */
274         ierr     = PetscMalloc1(ncrs_eq, &newproc_idx);CHKERRQ(ierr);
275         ierr     = ISGetIndices(proc_is, &is_idx);CHKERRQ(ierr);
276         for (kk = jj = 0 ; kk < nloc_old ; kk++) {
277           for (ii = 0 ; ii < cr_bs ; ii++, jj++) {
278             newproc_idx[jj] = is_idx[kk] * expand_factor; /* distribution */
279           }
280         }
281         ierr = ISRestoreIndices(proc_is, &is_idx);CHKERRQ(ierr);
282         ierr = ISDestroy(&proc_is);CHKERRQ(ierr);
283       }
284       ierr = MatDestroy(&adj);CHKERRQ(ierr);
285 
286       ierr = ISCreateGeneral(comm, ncrs_eq, newproc_idx, PETSC_COPY_VALUES, &is_eq_newproc);CHKERRQ(ierr);
287       ierr = PetscFree(newproc_idx);CHKERRQ(ierr);
288     } else { /* simple aggregation of parts -- 'is_eq_newproc' */
289       PetscInt targetPE;
290       PetscCheckFalse(new_size==nactive,PETSC_COMM_SELF,PETSC_ERR_PLIB,"new_size==nactive. Should not happen");
291       ierr = PetscInfo(pc,"%s: Number of equations (loc) %D with simple aggregation\n",((PetscObject)pc)->prefix,ncrs_eq);CHKERRQ(ierr);
292       targetPE = (rank/rfactor)*expand_factor;
293       ierr     = ISCreateStride(comm, ncrs_eq, targetPE, 0, &is_eq_newproc);CHKERRQ(ierr);
294     } /* end simple 'is_eq_newproc' */
295 
296     /*
297       Create an index set from the is_eq_newproc index set to indicate the mapping TO
298     */
299     ierr = ISPartitioningToNumbering(is_eq_newproc, &is_eq_num);CHKERRQ(ierr);
300     is_eq_num_prim = is_eq_num;
301     /*
302       Determine how many equations/vertices are assigned to each processor
303     */
304     ierr        = ISPartitioningCount(is_eq_newproc, size, counts);CHKERRQ(ierr);
305     ncrs_eq_new = counts[rank];
306     ierr        = ISDestroy(&is_eq_newproc);CHKERRQ(ierr);
307     ncrs_new = ncrs_eq_new/cr_bs;
308 
309     ierr = PetscFree(counts);CHKERRQ(ierr);
310     /* data movement scope -- this could be moved to subclasses so that we don't try to cram all auxilary data into some complex abstracted thing */
311     {
312       Vec            src_crd, dest_crd;
313       const PetscInt *idx,ndata_rows=pc_gamg->data_cell_rows,ndata_cols=pc_gamg->data_cell_cols,node_data_sz=ndata_rows*ndata_cols;
314       VecScatter     vecscat;
315       PetscScalar    *array;
316       IS isscat;
317       /* move data (for primal equations only) */
318       /* Create a vector to contain the newly ordered element information */
319       ierr = VecCreate(comm, &dest_crd);CHKERRQ(ierr);
320       ierr = VecSetSizes(dest_crd, node_data_sz*ncrs_new, PETSC_DECIDE);CHKERRQ(ierr);
321       ierr = VecSetType(dest_crd,VECSTANDARD);CHKERRQ(ierr); /* this is needed! */
322       /*
323         There are 'ndata_rows*ndata_cols' data items per node, (one can think of the vectors of having
324         a block size of ...).  Note, ISs are expanded into equation space by 'cr_bs'.
325       */
326       ierr = PetscMalloc1(ncrs*node_data_sz, &tidx);CHKERRQ(ierr);
327       ierr = ISGetIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
328       for (ii=0,jj=0; ii<ncrs; ii++) {
329         PetscInt id = idx[ii*cr_bs]/cr_bs; /* get node back */
330         for (kk=0; kk<node_data_sz; kk++, jj++) tidx[jj] = id*node_data_sz + kk;
331       }
332       ierr = ISRestoreIndices(is_eq_num_prim, &idx);CHKERRQ(ierr);
333       ierr = ISCreateGeneral(comm, node_data_sz*ncrs, tidx, PETSC_COPY_VALUES, &isscat);CHKERRQ(ierr);
334       ierr = PetscFree(tidx);CHKERRQ(ierr);
335       /*
336         Create a vector to contain the original vertex information for each element
337       */
338       ierr = VecCreateSeq(PETSC_COMM_SELF, node_data_sz*ncrs, &src_crd);CHKERRQ(ierr);
339       for (jj=0; jj<ndata_cols; jj++) {
340         const PetscInt stride0=ncrs*pc_gamg->data_cell_rows;
341         for (ii=0; ii<ncrs; ii++) {
342           for (kk=0; kk<ndata_rows; kk++) {
343             PetscInt    ix = ii*ndata_rows + kk + jj*stride0, jx = ii*node_data_sz + kk*ndata_cols + jj;
344             PetscScalar tt = (PetscScalar)pc_gamg->data[ix];
345             ierr = VecSetValues(src_crd, 1, &jx, &tt, INSERT_VALUES);CHKERRQ(ierr);
346           }
347         }
348       }
349       ierr = VecAssemblyBegin(src_crd);CHKERRQ(ierr);
350       ierr = VecAssemblyEnd(src_crd);CHKERRQ(ierr);
351       /*
352         Scatter the element vertex information (still in the original vertex ordering)
353         to the correct processor
354       */
355       ierr = VecScatterCreate(src_crd, NULL, dest_crd, isscat, &vecscat);CHKERRQ(ierr);
356       ierr = ISDestroy(&isscat);CHKERRQ(ierr);
357       ierr = VecScatterBegin(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
358       ierr = VecScatterEnd(vecscat,src_crd,dest_crd,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
359       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
360       ierr = VecDestroy(&src_crd);CHKERRQ(ierr);
361       /*
362         Put the element vertex data into a new allocation of the gdata->ele
363       */
364       ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
365       ierr = PetscMalloc1(node_data_sz*ncrs_new, &pc_gamg->data);CHKERRQ(ierr);
366 
367       pc_gamg->data_sz = node_data_sz*ncrs_new;
368       strideNew        = ncrs_new*ndata_rows;
369 
370       ierr = VecGetArray(dest_crd, &array);CHKERRQ(ierr);
371       for (jj=0; jj<ndata_cols; jj++) {
372         for (ii=0; ii<ncrs_new; ii++) {
373           for (kk=0; kk<ndata_rows; kk++) {
374             PetscInt ix = ii*ndata_rows + kk + jj*strideNew, jx = ii*node_data_sz + kk*ndata_cols + jj;
375             pc_gamg->data[ix] = PetscRealPart(array[jx]);
376           }
377         }
378       }
379       ierr = VecRestoreArray(dest_crd, &array);CHKERRQ(ierr);
380       ierr = VecDestroy(&dest_crd);CHKERRQ(ierr);
381     }
382     /* move A and P (columns) with new layout */
383     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
384     /*
385       Invert for MatCreateSubMatrix
386     */
387     ierr = ISInvertPermutation(is_eq_num, ncrs_eq_new, &new_eq_indices);CHKERRQ(ierr);
388     ierr = ISSort(new_eq_indices);CHKERRQ(ierr); /* is this needed? */
389     ierr = ISSetBlockSize(new_eq_indices, cr_bs);CHKERRQ(ierr);
390     if (is_eq_num != is_eq_num_prim) {
391       ierr = ISDestroy(&is_eq_num_prim);CHKERRQ(ierr); /* could be same as 'is_eq_num' */
392     }
393     if (Pcolumnperm) {
394       ierr = PetscObjectReference((PetscObject)new_eq_indices);CHKERRQ(ierr);
395       *Pcolumnperm = new_eq_indices;
396     }
397     ierr = ISDestroy(&is_eq_num);CHKERRQ(ierr);
398     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET13],0,0,0,0);CHKERRQ(ierr);
399     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
400     /* 'a_Amat_crs' output */
401     {
402       Mat       mat;
403       PetscBool flg;
404       ierr = MatCreateSubMatrix(Cmat, new_eq_indices, new_eq_indices, MAT_INITIAL_MATRIX, &mat);CHKERRQ(ierr);
405       ierr = MatGetOption(Cmat, MAT_SPD, &flg);CHKERRQ(ierr);
406       if (flg) {
407         ierr = MatSetOption(mat, MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
408       } else {
409         ierr = MatGetOption(Cmat, MAT_HERMITIAN, &flg);CHKERRQ(ierr);
410         if (flg) {
411           ierr = MatSetOption(mat, MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
412         } else {
413 #if !defined(PETSC_USE_COMPLEX)
414           ierr = MatGetOption(Cmat, MAT_SYMMETRIC, &flg);CHKERRQ(ierr);
415           if (flg) {
416             ierr = MatSetOption(mat, MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
417           }
418 #endif
419         }
420       }
421       *a_Amat_crs = mat;
422     }
423     ierr = MatDestroy(&Cmat);CHKERRQ(ierr);
424 
425     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET14],0,0,0,0);CHKERRQ(ierr);
426     /* prolongator */
427     {
428       IS       findices;
429       PetscInt Istart,Iend;
430       Mat      Pnew;
431 
432       ierr = MatGetOwnershipRange(Pold, &Istart, &Iend);CHKERRQ(ierr);
433       ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
434       ierr = ISCreateStride(comm,Iend-Istart,Istart,1,&findices);CHKERRQ(ierr);
435       ierr = ISSetBlockSize(findices,f_bs);CHKERRQ(ierr);
436       ierr = MatCreateSubMatrix(Pold, findices, new_eq_indices, MAT_INITIAL_MATRIX, &Pnew);CHKERRQ(ierr);
437       ierr = ISDestroy(&findices);CHKERRQ(ierr);
438       ierr = MatSetOption(Pnew,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440       ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET15],0,0,0,0);CHKERRQ(ierr);
441       ierr = MatDestroy(a_P_inout);CHKERRQ(ierr);
442 
443       /* output - repartitioned */
444       *a_P_inout = Pnew;
445     }
446     ierr = ISDestroy(&new_eq_indices);CHKERRQ(ierr);
447 
448     *a_nactive_proc = new_size; /* output */
449 
450     /* pinning on reduced grids, not a bad heuristic and optimization gets folded into process reduction optimization */
451     if (pc_gamg->cpu_pin_coarse_grids) {
452 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
453       static PetscInt llev = 2;
454       ierr = PetscInfo(pc,"%s: Pinning level %D to the CPU\n",((PetscObject)pc)->prefix,llev++);CHKERRQ(ierr);
455 #endif
456       ierr = MatBindToCPU(*a_Amat_crs,PETSC_TRUE);CHKERRQ(ierr);
457       ierr = MatBindToCPU(*a_P_inout,PETSC_TRUE);CHKERRQ(ierr);
458       if (1) { /* HACK: move this to MatBindCPU_MPIAIJXXX; lvec is created, need to pin it, this is done in MatSetUpMultiply_MPIAIJ. Hack */
459         Mat         A = *a_Amat_crs, P = *a_P_inout;
460         PetscMPIInt size;
461         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
462         if (size > 1) {
463           Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data, *p = (Mat_MPIAIJ*)P->data;
464           ierr = VecBindToCPU(a->lvec,PETSC_TRUE);CHKERRQ(ierr);
465           ierr = VecBindToCPU(p->lvec,PETSC_TRUE);CHKERRQ(ierr);
466         }
467       }
468     }
469     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET12],0,0,0,0);CHKERRQ(ierr);
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 PetscErrorCode PCGAMGSquareGraph_GAMG(PC a_pc, Mat Gmat1, Mat* Gmat2)
475 {
476   PetscErrorCode ierr;
477   const char     *prefix;
478   char           addp[32];
479   PC_MG          *mg      = (PC_MG*)a_pc->data;
480   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
481 
482   PetscFunctionBegin;
483   ierr = PCGetOptionsPrefix(a_pc,&prefix);CHKERRQ(ierr);
484   ierr = PetscInfo(a_pc,"%s: Square Graph on level %D\n",((PetscObject)a_pc)->prefix,pc_gamg->current_level+1);CHKERRQ(ierr);
485   ierr = MatProductCreate(Gmat1,Gmat1,NULL,Gmat2);CHKERRQ(ierr);
486   ierr = MatSetOptionsPrefix(*Gmat2,prefix);CHKERRQ(ierr);
487   ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_square_%d_",pc_gamg->current_level);CHKERRQ(ierr);
488   ierr = MatAppendOptionsPrefix(*Gmat2,addp);CHKERRQ(ierr);
489   if ((*Gmat2)->structurally_symmetric) {
490     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AB);CHKERRQ(ierr);
491   } else {
492     ierr = MatSetOption(Gmat1,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
493     ierr = MatProductSetType(*Gmat2,MATPRODUCT_AtB);CHKERRQ(ierr);
494   }
495   ierr = MatProductSetFromOptions(*Gmat2);CHKERRQ(ierr);
496   ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
497   ierr = MatProductSymbolic(*Gmat2);CHKERRQ(ierr);
498   ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][0],0,0,0,0);CHKERRQ(ierr);
499   ierr = MatProductClear(*Gmat2);CHKERRQ(ierr);
500   /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
501   (*Gmat2)->assembled = PETSC_TRUE;
502   PetscFunctionReturn(0);
503 }
504 
505 /* -------------------------------------------------------------------------- */
506 /*
507    PCSetUp_GAMG - Prepares for the use of the GAMG preconditioner
508                     by setting data structures and options.
509 
510    Input Parameter:
511 .  pc - the preconditioner context
512 
513 */
514 PetscErrorCode PCSetUp_GAMG(PC pc)
515 {
516   PetscErrorCode ierr;
517   PC_MG          *mg      = (PC_MG*)pc->data;
518   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
519   Mat            Pmat     = pc->pmat;
520   PetscInt       fine_level,level,level1,bs,M,N,qq,lidx,nASMBlocksArr[PETSC_MG_MAXLEVELS];
521   MPI_Comm       comm;
522   PetscMPIInt    rank,size,nactivepe;
523   Mat            Aarr[PETSC_MG_MAXLEVELS],Parr[PETSC_MG_MAXLEVELS];
524   IS             *ASMLocalIDsArr[PETSC_MG_MAXLEVELS];
525   PetscLogDouble nnz0=0.,nnztot=0.;
526   MatInfo        info;
527   PetscBool      is_last = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
531   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
532   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
533 
534   if (pc->setupcalled) {
535     if (!pc_gamg->reuse_prol || pc->flag == DIFFERENT_NONZERO_PATTERN) {
536       /* reset everything */
537       ierr = PCReset_MG(pc);CHKERRQ(ierr);
538       pc->setupcalled = 0;
539     } else {
540       PC_MG_Levels **mglevels = mg->levels;
541       /* just do Galerkin grids */
542       Mat          B,dA,dB;
543 
544       if (pc_gamg->Nlevels > 1) {
545         PetscInt gl;
546         /* currently only handle case where mat and pmat are the same on coarser levels */
547         ierr = KSPGetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,&dA,&dB);CHKERRQ(ierr);
548         /* (re)set to get dirty flag */
549         ierr = KSPSetOperators(mglevels[pc_gamg->Nlevels-1]->smoothd,dA,dB);CHKERRQ(ierr);
550 
551         for (level=pc_gamg->Nlevels-2,gl=0; level>=0; level--,gl++) {
552           MatReuse reuse = MAT_INITIAL_MATRIX ;
553 
554           /* matrix structure can change from repartitioning or process reduction but don't know if we have process reduction here. Should fix */
555           ierr = KSPGetOperators(mglevels[level]->smoothd,NULL,&B);CHKERRQ(ierr);
556           if (B->product) {
557             if (B->product->A == dB && B->product->B == mglevels[level+1]->interpolate) {
558               reuse = MAT_REUSE_MATRIX;
559             }
560           }
561           if (reuse == MAT_INITIAL_MATRIX) { ierr = MatDestroy(&mglevels[level]->A);CHKERRQ(ierr); }
562           if (reuse == MAT_REUSE_MATRIX) {
563             ierr = PetscInfo(pc,"%s: RAP after first solve, reuse matrix level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
564           } else {
565             ierr = PetscInfo(pc,"%s: RAP after first solve, new matrix level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
566           }
567           ierr = PetscLogEventBegin(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
568           ierr = MatPtAP(dB,mglevels[level+1]->interpolate,reuse,PETSC_DEFAULT,&B);CHKERRQ(ierr);
569           ierr = PetscLogEventEnd(petsc_gamg_setup_matmat_events[gl][1],0,0,0,0);CHKERRQ(ierr);
570           if (reuse == MAT_INITIAL_MATRIX) mglevels[level]->A = B;
571           ierr = KSPSetOperators(mglevels[level]->smoothd,B,B);CHKERRQ(ierr);
572           dB   = B;
573         }
574       }
575 
576       ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
577       PetscFunctionReturn(0);
578     }
579   }
580 
581   if (!pc_gamg->data) {
582     if (pc_gamg->orig_data) {
583       ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
584       ierr = MatGetLocalSize(Pmat, &qq, NULL);CHKERRQ(ierr);
585 
586       pc_gamg->data_sz        = (qq/bs)*pc_gamg->orig_data_cell_rows*pc_gamg->orig_data_cell_cols;
587       pc_gamg->data_cell_rows = pc_gamg->orig_data_cell_rows;
588       pc_gamg->data_cell_cols = pc_gamg->orig_data_cell_cols;
589 
590       ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->data);CHKERRQ(ierr);
591       for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->data[qq] = pc_gamg->orig_data[qq];
592     } else {
593       PetscCheckFalse(!pc_gamg->ops->createdefaultdata,comm,PETSC_ERR_PLIB,"'createdefaultdata' not set(?) need to support NULL data");
594       ierr = pc_gamg->ops->createdefaultdata(pc,Pmat);CHKERRQ(ierr);
595     }
596   }
597 
598   /* cache original data for reuse */
599   if (!pc_gamg->orig_data && (PetscBool)(!pc_gamg->reuse_prol)) {
600     ierr = PetscMalloc1(pc_gamg->data_sz, &pc_gamg->orig_data);CHKERRQ(ierr);
601     for (qq=0; qq<pc_gamg->data_sz; qq++) pc_gamg->orig_data[qq] = pc_gamg->data[qq];
602     pc_gamg->orig_data_cell_rows = pc_gamg->data_cell_rows;
603     pc_gamg->orig_data_cell_cols = pc_gamg->data_cell_cols;
604   }
605 
606   /* get basic dims */
607   ierr = MatGetBlockSize(Pmat, &bs);CHKERRQ(ierr);
608   ierr = MatGetSize(Pmat, &M, &N);CHKERRQ(ierr);
609 
610   ierr = MatGetInfo(Pmat,MAT_GLOBAL_SUM,&info);CHKERRQ(ierr); /* global reduction */
611   nnz0   = info.nz_used;
612   nnztot = info.nz_used;
613   ierr = PetscInfo(pc,"%s: level %D) N=%D, n data rows=%D, n data cols=%D, nnz/row (ave)=%d, np=%D\n",((PetscObject)pc)->prefix,0,M,pc_gamg->data_cell_rows,pc_gamg->data_cell_cols,(int)(nnz0/(PetscReal)M+0.5),size);CHKERRQ(ierr);
614 
615   /* Get A_i and R_i */
616   for (level=0, Aarr[0]=Pmat, nactivepe = size; level < (pc_gamg->Nlevels-1) && (!level || M>pc_gamg->coarse_eq_limit); level++) {
617     pc_gamg->current_level = level;
618     PetscCheckFalse(level >= PETSC_MG_MAXLEVELS,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Too many levels %D",level);
619     level1 = level + 1;
620     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
621 #if defined(GAMG_STAGES)
622     ierr = PetscLogStagePush(gamg_stages[level]);CHKERRQ(ierr);
623 #endif
624     { /* construct prolongator */
625       Mat              Gmat;
626       PetscCoarsenData *agg_lists;
627       Mat              Prol11;
628 
629       ierr = pc_gamg->ops->graph(pc,Aarr[level], &Gmat);CHKERRQ(ierr);
630       ierr = pc_gamg->ops->coarsen(pc, &Gmat, &agg_lists);CHKERRQ(ierr);
631       ierr = pc_gamg->ops->prolongator(pc,Aarr[level],Gmat,agg_lists,&Prol11);CHKERRQ(ierr);
632 
633       /* could have failed to create new level */
634       if (Prol11) {
635         const char *prefix;
636         char       addp[32];
637 
638         /* get new block size of coarse matrices */
639         ierr = MatGetBlockSizes(Prol11, NULL, &bs);CHKERRQ(ierr);
640 
641         if (pc_gamg->ops->optprolongator) {
642           /* smooth */
643           ierr = pc_gamg->ops->optprolongator(pc, Aarr[level], &Prol11);CHKERRQ(ierr);
644         }
645 
646         if (pc_gamg->use_aggs_in_asm) {
647           PetscInt bs;
648           ierr = MatGetBlockSizes(Prol11, &bs, NULL);CHKERRQ(ierr);
649           ierr = PetscCDGetASMBlocks(agg_lists, bs, Gmat, &nASMBlocksArr[level], &ASMLocalIDsArr[level]);CHKERRQ(ierr);
650         }
651 
652         ierr = PCGetOptionsPrefix(pc,&prefix);CHKERRQ(ierr);
653         ierr = MatSetOptionsPrefix(Prol11,prefix);CHKERRQ(ierr);
654         ierr = PetscSNPrintf(addp,sizeof(addp),"pc_gamg_prolongator_%d_",(int)level);CHKERRQ(ierr);
655         ierr = MatAppendOptionsPrefix(Prol11,addp);CHKERRQ(ierr);
656         /* Always generate the transpose with CUDA
657            Such behaviour can be adapted with -pc_gamg_prolongator_ prefixed options */
658         ierr = MatSetOption(Prol11,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);CHKERRQ(ierr);
659         ierr = MatSetFromOptions(Prol11);CHKERRQ(ierr);
660         Parr[level1] = Prol11;
661       } else Parr[level1] = NULL; /* failed to coarsen */
662 
663       ierr = MatDestroy(&Gmat);CHKERRQ(ierr);
664       ierr = PetscCDDestroy(agg_lists);CHKERRQ(ierr);
665     } /* construct prolongator scope */
666     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET1],0,0,0,0);CHKERRQ(ierr);
667     if (!level) Aarr[0] = Pmat; /* use Pmat for finest level setup */
668     if (!Parr[level1]) { /* failed to coarsen */
669       ierr = PetscInfo(pc,"%s: Stop gridding, level %D\n",((PetscObject)pc)->prefix,level);CHKERRQ(ierr);
670 #if defined(GAMG_STAGES)
671       ierr = PetscLogStagePop();CHKERRQ(ierr);
672 #endif
673       break;
674     }
675     ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
676     ierr = MatGetSize(Parr[level1], &M, &N);CHKERRQ(ierr); /* N is next M, a loop test variables */
677     PetscCheckFalse(is_last,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Is last ?");
678     if (N <= pc_gamg->coarse_eq_limit) is_last = PETSC_TRUE;
679     if (level1 == pc_gamg->Nlevels-1) is_last = PETSC_TRUE;
680     ierr = pc_gamg->ops->createlevel(pc, Aarr[level], bs, &Parr[level1], &Aarr[level1], &nactivepe, NULL, is_last);CHKERRQ(ierr);
681 
682     ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET2],0,0,0,0);CHKERRQ(ierr);
683     ierr = MatGetSize(Aarr[level1], &M, &N);CHKERRQ(ierr); /* M is loop test variables */
684     ierr = MatGetInfo(Aarr[level1], MAT_GLOBAL_SUM, &info);CHKERRQ(ierr);
685     nnztot += info.nz_used;
686     ierr = PetscInfo(pc,"%s: %D) N=%D, n data cols=%D, nnz/row (ave)=%d, %D active pes\n",((PetscObject)pc)->prefix,level1,M,pc_gamg->data_cell_cols,(int)(info.nz_used/(PetscReal)M),nactivepe);CHKERRQ(ierr);
687 
688 #if defined(GAMG_STAGES)
689     ierr = PetscLogStagePop();CHKERRQ(ierr);
690 #endif
691     /* stop if one node or one proc -- could pull back for singular problems */
692     if ((pc_gamg->data_cell_cols && M/pc_gamg->data_cell_cols < 2) || (!pc_gamg->data_cell_cols && M/bs < 2)) {
693       ierr =  PetscInfo(pc,"%s: HARD stop of coarsening on level %D.  Grid too small: %D block nodes\n",((PetscObject)pc)->prefix,level,M/bs);CHKERRQ(ierr);
694       level++;
695       break;
696     }
697   } /* levels */
698   ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
699 
700   ierr = PetscInfo(pc,"%s: %D levels, grid complexity = %g\n",((PetscObject)pc)->prefix,level+1,nnztot/nnz0);CHKERRQ(ierr);
701   pc_gamg->Nlevels = level + 1;
702   fine_level       = level;
703   ierr             = PCMGSetLevels(pc,pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
704 
705   if (pc_gamg->Nlevels > 1) { /* don't setup MG if one level */
706 
707     /* set default smoothers & set operators */
708     for (lidx = 1, level = pc_gamg->Nlevels-2; lidx <= fine_level; lidx++, level--) {
709       KSP smoother;
710       PC  subpc;
711 
712       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
713       ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
714 
715       ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
716       /* set ops */
717       ierr = KSPSetOperators(smoother, Aarr[level], Aarr[level]);CHKERRQ(ierr);
718       ierr = PCMGSetInterpolation(pc, lidx, Parr[level+1]);CHKERRQ(ierr);
719 
720       /* set defaults */
721       ierr = KSPSetType(smoother, KSPCHEBYSHEV);CHKERRQ(ierr);
722 
723       /* set blocks for ASM smoother that uses the 'aggregates' */
724       if (pc_gamg->use_aggs_in_asm) {
725         PetscInt sz;
726         IS       *iss;
727 
728         sz   = nASMBlocksArr[level];
729         iss  = ASMLocalIDsArr[level];
730         ierr = PCSetType(subpc, PCASM);CHKERRQ(ierr);
731         ierr = PCASMSetOverlap(subpc, 0);CHKERRQ(ierr);
732         ierr = PCASMSetType(subpc,PC_ASM_BASIC);CHKERRQ(ierr);
733         if (!sz) {
734           IS       is;
735           ierr = ISCreateGeneral(PETSC_COMM_SELF, 0, NULL, PETSC_COPY_VALUES, &is);CHKERRQ(ierr);
736           ierr = PCASMSetLocalSubdomains(subpc, 1, NULL, &is);CHKERRQ(ierr);
737           ierr = ISDestroy(&is);CHKERRQ(ierr);
738         } else {
739           PetscInt kk;
740           ierr = PCASMSetLocalSubdomains(subpc, sz, NULL, iss);CHKERRQ(ierr);
741           for (kk=0; kk<sz; kk++) {
742             ierr = ISDestroy(&iss[kk]);CHKERRQ(ierr);
743           }
744           ierr = PetscFree(iss);CHKERRQ(ierr);
745         }
746         ASMLocalIDsArr[level] = NULL;
747         nASMBlocksArr[level]  = 0;
748       } else {
749         ierr = PCSetType(subpc, PCJACOBI);CHKERRQ(ierr);
750       }
751     }
752     {
753       /* coarse grid */
754       KSP smoother,*k2; PC subpc,pc2; PetscInt ii,first;
755       Mat Lmat = Aarr[(level=pc_gamg->Nlevels-1)]; lidx = 0;
756 
757       ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
758       ierr = KSPSetOperators(smoother, Lmat, Lmat);CHKERRQ(ierr);
759       if (!pc_gamg->use_parallel_coarse_grid_solver) {
760         ierr = KSPSetNormType(smoother, KSP_NORM_NONE);CHKERRQ(ierr);
761         ierr = KSPGetPC(smoother, &subpc);CHKERRQ(ierr);
762         ierr = PCSetType(subpc, PCBJACOBI);CHKERRQ(ierr);
763         ierr = PCSetUp(subpc);CHKERRQ(ierr);
764         ierr = PCBJacobiGetSubKSP(subpc,&ii,&first,&k2);CHKERRQ(ierr);
765         PetscCheckFalse(ii != 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"ii %D is not one",ii);
766         ierr = KSPGetPC(k2[0],&pc2);CHKERRQ(ierr);
767         ierr = PCSetType(pc2, PCLU);CHKERRQ(ierr);
768         ierr = PCFactorSetShiftType(pc2,MAT_SHIFT_INBLOCKS);CHKERRQ(ierr);
769         ierr = KSPSetTolerances(k2[0],PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
770         ierr = KSPSetType(k2[0], KSPPREONLY);CHKERRQ(ierr);
771       }
772     }
773 
774     /* should be called in PCSetFromOptions_GAMG(), but cannot be called prior to PCMGSetLevels() */
775     ierr = PetscObjectOptionsBegin((PetscObject)pc);CHKERRQ(ierr);
776     ierr = PCSetFromOptions_MG(PetscOptionsObject,pc);CHKERRQ(ierr);
777     ierr = PetscOptionsEnd();CHKERRQ(ierr);
778     ierr = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
779 
780     /* setup cheby eigen estimates from SA */
781     if (pc_gamg->use_sa_esteig) {
782       for (lidx = 1, level = pc_gamg->Nlevels-2; level >= 0 ; lidx++, level--) {
783         KSP       smoother;
784         PetscBool ischeb;
785 
786         ierr = PCMGGetSmoother(pc, lidx, &smoother);CHKERRQ(ierr);
787         ierr = PetscObjectTypeCompare((PetscObject)smoother,KSPCHEBYSHEV,&ischeb);CHKERRQ(ierr);
788         if (ischeb) {
789           KSP_Chebyshev *cheb = (KSP_Chebyshev*)smoother->data;
790 
791           // The command line will override these settings because KSPSetFromOptions is called in PCSetUp_MG
792           if (mg->max_eigen_DinvA[level] > 0) {
793             // SA uses Jacobi for P; we use SA estimates if the smoother is also Jacobi or if the user explicitly requested it.
794             // TODO: This should test whether it's the same Jacobi variant (DIAG, ROWSUM, etc.)
795             PetscReal emax,emin;
796 
797             emin = mg->min_eigen_DinvA[level];
798             emax = mg->max_eigen_DinvA[level];
799             ierr = PetscInfo(pc,"%s: PCSetUp_GAMG: call KSPChebyshevSetEigenvalues on level %D (N=%D) with emax = %g emin = %g\n",((PetscObject)pc)->prefix,level,Aarr[level]->rmap->N,(double)emax,(double)emin);CHKERRQ(ierr);
800             cheb->emin_provided = emin;
801             cheb->emax_provided = emax;
802           }
803         }
804       }
805     }
806 
807     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
808 
809     /* clean up */
810     for (level=1; level<pc_gamg->Nlevels; level++) {
811       ierr = MatDestroy(&Parr[level]);CHKERRQ(ierr);
812       ierr = MatDestroy(&Aarr[level]);CHKERRQ(ierr);
813     }
814   } else {
815     KSP smoother;
816 
817     ierr = PetscInfo(pc,"%s: One level solver used (system is seen as DD). Using default solver.\n");CHKERRQ(ierr);
818     ierr = PCMGGetSmoother(pc, 0, &smoother);CHKERRQ(ierr);
819     ierr = KSPSetOperators(smoother, Aarr[0], Aarr[0]);CHKERRQ(ierr);
820     ierr = KSPSetType(smoother, KSPPREONLY);CHKERRQ(ierr);
821     ierr = PCSetUp_MG(pc);CHKERRQ(ierr);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 /* ------------------------------------------------------------------------- */
827 /*
828  PCDestroy_GAMG - Destroys the private context for the GAMG preconditioner
829    that was created with PCCreate_GAMG().
830 
831    Input Parameter:
832 .  pc - the preconditioner context
833 
834    Application Interface Routine: PCDestroy()
835 */
836 PetscErrorCode PCDestroy_GAMG(PC pc)
837 {
838   PetscErrorCode ierr;
839   PC_MG          *mg     = (PC_MG*)pc->data;
840   PC_GAMG        *pc_gamg= (PC_GAMG*)mg->innerctx;
841 
842   PetscFunctionBegin;
843   ierr = PCReset_GAMG(pc);CHKERRQ(ierr);
844   if (pc_gamg->ops->destroy) {
845     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(pc_gamg->ops);CHKERRQ(ierr);
848   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
849   ierr = PetscFree(pc_gamg);CHKERRQ(ierr);
850   ierr = PCDestroy_MG(pc);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*@
855    PCGAMGSetProcEqLim - Set number of equations to aim for per process on the coarse grids via processor reduction.
856 
857    Logically Collective on PC
858 
859    Input Parameters:
860 +  pc - the preconditioner context
861 -  n - the number of equations
862 
863    Options Database Key:
864 .  -pc_gamg_process_eq_limit <limit> - set the limit
865 
866    Notes:
867     GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit> equations on each process
868     that has degrees of freedom
869 
870    Level: intermediate
871 
872 .seealso: PCGAMGSetCoarseEqLim(), PCGAMGSetRankReductionFactors()
873 @*/
874 PetscErrorCode  PCGAMGSetProcEqLim(PC pc, PetscInt n)
875 {
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
880   ierr = PetscTryMethod(pc,"PCGAMGSetProcEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 static PetscErrorCode PCGAMGSetProcEqLim_GAMG(PC pc, PetscInt n)
885 {
886   PC_MG   *mg      = (PC_MG*)pc->data;
887   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
888 
889   PetscFunctionBegin;
890   if (n>0) pc_gamg->min_eq_proc = n;
891   PetscFunctionReturn(0);
892 }
893 
894 /*@
895    PCGAMGSetCoarseEqLim - Set maximum number of equations on coarsest grid.
896 
897  Collective on PC
898 
899    Input Parameters:
900 +  pc - the preconditioner context
901 -  n - maximum number of equations to aim for
902 
903    Options Database Key:
904 .  -pc_gamg_coarse_eq_limit <limit> - set the limit
905 
906    Notes: For example -pc_gamg_coarse_eq_limit 1000 will stop coarsening once the coarse grid
907      has less than 1000 unknowns.
908 
909    Level: intermediate
910 
911 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetRankReductionFactors()
912 @*/
913 PetscErrorCode PCGAMGSetCoarseEqLim(PC pc, PetscInt n)
914 {
915   PetscErrorCode ierr;
916 
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
919   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseEqLim_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode PCGAMGSetCoarseEqLim_GAMG(PC pc, PetscInt n)
924 {
925   PC_MG   *mg      = (PC_MG*)pc->data;
926   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
927 
928   PetscFunctionBegin;
929   if (n>0) pc_gamg->coarse_eq_limit = n;
930   PetscFunctionReturn(0);
931 }
932 
933 /*@
934    PCGAMGSetRepartition - Repartition the degrees of freedom across the processors on the coarser grids
935 
936    Collective on PC
937 
938    Input Parameters:
939 +  pc - the preconditioner context
940 -  n - PETSC_TRUE or PETSC_FALSE
941 
942    Options Database Key:
943 .  -pc_gamg_repartition <true,false> - turn on the repartitioning
944 
945    Notes:
946     this will generally improve the loading balancing of the work on each level
947 
948    Level: intermediate
949 
950 @*/
951 PetscErrorCode PCGAMGSetRepartition(PC pc, PetscBool n)
952 {
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
957   ierr = PetscTryMethod(pc,"PCGAMGSetRepartition_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 static PetscErrorCode PCGAMGSetRepartition_GAMG(PC pc, PetscBool n)
962 {
963   PC_MG   *mg      = (PC_MG*)pc->data;
964   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
965 
966   PetscFunctionBegin;
967   pc_gamg->repart = n;
968   PetscFunctionReturn(0);
969 }
970 
971 /*@
972    PCGAMGSetUseSAEstEig - Use eigen estimate from smoothed aggregation for Chebyshev smoother
973 
974    Collective on PC
975 
976    Input Parameters:
977 +  pc - the preconditioner context
978 -  n - number of its
979 
980    Options Database Key:
981 .  -pc_gamg_use_sa_esteig <true,false> - use the eigen estimate
982 
983    Notes:
984    Smoothed aggregation constructs the smoothed prolongator $P = (I - \omega D^{-1} A) T$ where $T$ is the tentative prolongator and $D$ is the diagonal of $A$.
985    Eigenvalue estimates (based on a few CG or GMRES iterations) are computed to choose $\omega$ so that this is a stable smoothing operation.
986    If Chebyshev with Jacobi (diagonal) preconditioning is used for smoothing, then the eigenvalue estimates can be reused.
987    This option is only used when the smoother uses Jacobi, and should be turned off if a different PCJacobiType is used.
988    It became default in PETSc 3.17.
989 
990    Level: advanced
991 
992 .seealso: KSPChebyshevSetEigenvalues(), KSPChebyshevEstEigSet()
993 @*/
994 PetscErrorCode PCGAMGSetUseSAEstEig(PC pc, PetscBool n)
995 {
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1000   ierr = PetscTryMethod(pc,"PCGAMGSetUseSAEstEig_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 static PetscErrorCode PCGAMGSetUseSAEstEig_GAMG(PC pc, PetscBool n)
1005 {
1006   PC_MG   *mg      = (PC_MG*)pc->data;
1007   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1008 
1009   PetscFunctionBegin;
1010   pc_gamg->use_sa_esteig = n;
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 /*@
1015    PCGAMGSetEigenvalues - Set eigenvalues
1016 
1017    Collective on PC
1018 
1019    Input Parameters:
1020 +  pc - the preconditioner context
1021 -  emax - max eigenvalue
1022 .  emin - min eigenvalue
1023 
1024    Options Database Key:
1025 .  -pc_gamg_eigenvalues <emin,emax> - estimates of the eigenvalues
1026 
1027    Level: intermediate
1028 
1029 .seealso: PCGAMGSetUseSAEstEig()
1030 @*/
1031 PetscErrorCode PCGAMGSetEigenvalues(PC pc, PetscReal emax,PetscReal emin)
1032 {
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1037   ierr = PetscTryMethod(pc,"PCGAMGSetEigenvalues_C",(PC,PetscReal,PetscReal),(pc,emax,emin));CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 static PetscErrorCode PCGAMGSetEigenvalues_GAMG(PC pc,PetscReal emax,PetscReal emin)
1042 {
1043   PC_MG          *mg      = (PC_MG*)pc->data;
1044   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1045 
1046   PetscFunctionBegin;
1047   PetscCheckFalse(emax <= emin,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Maximum eigenvalue must be larger than minimum: max %g min %g",(double)emax,(double)emin);
1048   PetscCheckFalse(emax*emin <= 0.0,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Both eigenvalues must be of the same sign: max %g min %g",(double)emax,(double)emin);
1049   pc_gamg->emax = emax;
1050   pc_gamg->emin = emin;
1051 
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 /*@
1056    PCGAMGSetReuseInterpolation - Reuse prolongation when rebuilding algebraic multigrid preconditioner
1057 
1058    Collective on PC
1059 
1060    Input Parameters:
1061 +  pc - the preconditioner context
1062 -  n - PETSC_TRUE or PETSC_FALSE
1063 
1064    Options Database Key:
1065 .  -pc_gamg_reuse_interpolation <true,false> - reuse the previous interpolation
1066 
1067    Level: intermediate
1068 
1069    Notes:
1070     May negatively affect the convergence rate of the method on new matrices if the matrix entries change a great deal, but allows
1071     rebuilding the preconditioner quicker.
1072 
1073 @*/
1074 PetscErrorCode PCGAMGSetReuseInterpolation(PC pc, PetscBool n)
1075 {
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1080   ierr = PetscTryMethod(pc,"PCGAMGSetReuseInterpolation_C",(PC,PetscBool),(pc,n));CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 static PetscErrorCode PCGAMGSetReuseInterpolation_GAMG(PC pc, PetscBool n)
1085 {
1086   PC_MG   *mg      = (PC_MG*)pc->data;
1087   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1088 
1089   PetscFunctionBegin;
1090   pc_gamg->reuse_prol = n;
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*@
1095    PCGAMGASMSetUseAggs - Have the PCGAMG smoother on each level use the aggregates defined by the coarsening process as the subdomains for the additive Schwarz preconditioner.
1096 
1097    Collective on PC
1098 
1099    Input Parameters:
1100 +  pc - the preconditioner context
1101 -  flg - PETSC_TRUE to use aggregates, PETSC_FALSE to not
1102 
1103    Options Database Key:
1104 .  -pc_gamg_asm_use_agg <true,false> - use aggregates to define the additive Schwarz subdomains
1105 
1106    Level: intermediate
1107 
1108 @*/
1109 PetscErrorCode PCGAMGASMSetUseAggs(PC pc, PetscBool flg)
1110 {
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1115   ierr = PetscTryMethod(pc,"PCGAMGASMSetUseAggs_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode PCGAMGASMSetUseAggs_GAMG(PC pc, PetscBool flg)
1120 {
1121   PC_MG   *mg      = (PC_MG*)pc->data;
1122   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1123 
1124   PetscFunctionBegin;
1125   pc_gamg->use_aggs_in_asm = flg;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 /*@
1130    PCGAMGSetUseParallelCoarseGridSolve - allow a parallel coarse grid solver
1131 
1132    Collective on PC
1133 
1134    Input Parameters:
1135 +  pc - the preconditioner context
1136 -  flg - PETSC_TRUE to not force coarse grid onto one processor
1137 
1138    Options Database Key:
1139 .  -pc_gamg_use_parallel_coarse_grid_solver - use a parallel coarse grid direct solver
1140 
1141    Level: intermediate
1142 
1143 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetCpuPinCoarseGrids()
1144 @*/
1145 PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve(PC pc, PetscBool flg)
1146 {
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1151   ierr = PetscTryMethod(pc,"PCGAMGSetUseParallelCoarseGridSolve_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode PCGAMGSetUseParallelCoarseGridSolve_GAMG(PC pc, PetscBool flg)
1156 {
1157   PC_MG   *mg      = (PC_MG*)pc->data;
1158   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1159 
1160   PetscFunctionBegin;
1161   pc_gamg->use_parallel_coarse_grid_solver = flg;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 /*@
1166    PCGAMGSetCpuPinCoarseGrids - pin reduced grids to CPU
1167 
1168    Collective on PC
1169 
1170    Input Parameters:
1171 +  pc - the preconditioner context
1172 -  flg - PETSC_TRUE to pin coarse grids to CPU
1173 
1174    Options Database Key:
1175 .  -pc_gamg_cpu_pin_coarse_grids - pin the coarse grids to the CPU
1176 
1177    Level: intermediate
1178 
1179 .seealso: PCGAMGSetCoarseGridLayoutType(), PCGAMGSetUseParallelCoarseGridSolve()
1180 @*/
1181 PetscErrorCode PCGAMGSetCpuPinCoarseGrids(PC pc, PetscBool flg)
1182 {
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1187   ierr = PetscTryMethod(pc,"PCGAMGSetCpuPinCoarseGrids_C",(PC,PetscBool),(pc,flg));CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode PCGAMGSetCpuPinCoarseGrids_GAMG(PC pc, PetscBool flg)
1192 {
1193   PC_MG   *mg      = (PC_MG*)pc->data;
1194   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1195 
1196   PetscFunctionBegin;
1197   pc_gamg->cpu_pin_coarse_grids = flg;
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /*@
1202    PCGAMGSetCoarseGridLayoutType - place coarse grids on processors with natural order (compact type)
1203 
1204    Collective on PC
1205 
1206    Input Parameters:
1207 +  pc - the preconditioner context
1208 -  flg - Layout type
1209 
1210    Options Database Key:
1211 .  -pc_gamg_coarse_grid_layout_type - place the coarse grids with natural ordering
1212 
1213    Level: intermediate
1214 
1215 .seealso: PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetCpuPinCoarseGrids()
1216 @*/
1217 PetscErrorCode PCGAMGSetCoarseGridLayoutType(PC pc, PCGAMGLayoutType flg)
1218 {
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1223   ierr = PetscTryMethod(pc,"PCGAMGSetCoarseGridLayoutType_C",(PC,PCGAMGLayoutType),(pc,flg));CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 static PetscErrorCode PCGAMGSetCoarseGridLayoutType_GAMG(PC pc, PCGAMGLayoutType flg)
1228 {
1229   PC_MG   *mg      = (PC_MG*)pc->data;
1230   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1231 
1232   PetscFunctionBegin;
1233   pc_gamg->layout_type = flg;
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*@
1238    PCGAMGSetNlevels -  Sets the maximum number of levels PCGAMG will use
1239 
1240    Not collective on PC
1241 
1242    Input Parameters:
1243 +  pc - the preconditioner
1244 -  n - the maximum number of levels to use
1245 
1246    Options Database Key:
1247 .  -pc_mg_levels <n> - set the maximum number of levels to allow
1248 
1249    Level: intermediate
1250 
1251 @*/
1252 PetscErrorCode PCGAMGSetNlevels(PC pc, PetscInt n)
1253 {
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1258   ierr = PetscTryMethod(pc,"PCGAMGSetNlevels_C",(PC,PetscInt),(pc,n));CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 static PetscErrorCode PCGAMGSetNlevels_GAMG(PC pc, PetscInt n)
1263 {
1264   PC_MG   *mg      = (PC_MG*)pc->data;
1265   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1266 
1267   PetscFunctionBegin;
1268   pc_gamg->Nlevels = n;
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 /*@
1273    PCGAMGSetThreshold - Relative threshold to use for dropping edges in aggregation graph
1274 
1275    Not collective on PC
1276 
1277    Input Parameters:
1278 +  pc - the preconditioner context
1279 .  v - array of threshold values for finest n levels; 0.0 means keep all nonzero entries in the graph; negative means keep even zero entries in the graph
1280 -  n - number of threshold values provided in array
1281 
1282    Options Database Key:
1283 .  -pc_gamg_threshold <threshold> - the threshold to drop edges
1284 
1285    Notes:
1286     Increasing the threshold decreases the rate of coarsening. Conversely reducing the threshold increases the rate of coarsening (aggressive coarsening) and thereby reduces the complexity of the coarse grids, and generally results in slower solver converge rates. Reducing coarse grid complexity reduced the complexity of Galerkin coarse grid construction considerably.
1287     Before coarsening or aggregating the graph, GAMG removes small values from the graph with this threshold, and thus reducing the coupling in the graph and a different (perhaps better) coarser set of points.
1288 
1289     If n is less than the total number of coarsenings (see PCGAMGSetNlevels()), then threshold scaling (see PCGAMGSetThresholdScale()) is used for each successive coarsening.
1290     In this case, PCGAMGSetThresholdScale() must be called before PCGAMGSetThreshold().
1291     If n is greater than the total number of levels, the excess entries in threshold will not be used.
1292 
1293    Level: intermediate
1294 
1295 .seealso: PCGAMGFilterGraph(), PCGAMGSetSquareGraph()
1296 @*/
1297 PetscErrorCode PCGAMGSetThreshold(PC pc, PetscReal v[], PetscInt n)
1298 {
1299   PetscErrorCode ierr;
1300 
1301   PetscFunctionBegin;
1302   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1303   if (n) PetscValidRealPointer(v,2);
1304   ierr = PetscTryMethod(pc,"PCGAMGSetThreshold_C",(PC,PetscReal[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 static PetscErrorCode PCGAMGSetThreshold_GAMG(PC pc, PetscReal v[], PetscInt n)
1309 {
1310   PC_MG   *mg      = (PC_MG*)pc->data;
1311   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1312   PetscInt i;
1313   PetscFunctionBegin;
1314   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->threshold[i] = v[i];
1315   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*@
1320    PCGAMGSetRankReductionFactors - Set manual schedule for process reduction on coarse grids
1321 
1322    Collective on PC
1323 
1324    Input Parameters:
1325 +  pc - the preconditioner context
1326 .  v - array of reduction factors. 0 for fist value forces a reduction to one process/device on first level in Cuda
1327 -  n - number of values provided in array
1328 
1329    Options Database Key:
1330 .  -pc_gamg_rank_reduction_factors <factors> - provide the schedule
1331 
1332    Level: intermediate
1333 
1334 .seealso: PCGAMGSetProcEqLim(), PCGAMGSetCoarseEqLim()
1335 @*/
1336 PetscErrorCode PCGAMGSetRankReductionFactors(PC pc, PetscInt v[], PetscInt n)
1337 {
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1342   if (n) PetscValidIntPointer(v,2);
1343   ierr = PetscTryMethod(pc,"PCGAMGSetRankReductionFactors_C",(PC,PetscInt[],PetscInt),(pc,v,n));CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 static PetscErrorCode PCGAMGSetRankReductionFactors_GAMG(PC pc, PetscInt v[], PetscInt n)
1348 {
1349   PC_MG   *mg      = (PC_MG*)pc->data;
1350   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1351   PetscInt i;
1352   PetscFunctionBegin;
1353   for (i=0; i<PetscMin(n,PETSC_MG_MAXLEVELS); i++) pc_gamg->level_reduction_factors[i] = v[i];
1354   for (; i<PETSC_MG_MAXLEVELS; i++) pc_gamg->level_reduction_factors[i] = -1; /* 0 stop putting one process/device on first level */
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 /*@
1359    PCGAMGSetThresholdScale - Relative threshold reduction at each level
1360 
1361    Not collective on PC
1362 
1363    Input Parameters:
1364 +  pc - the preconditioner context
1365 -  scale - the threshold value reduction, usually < 1.0
1366 
1367    Options Database Key:
1368 .  -pc_gamg_threshold_scale <v> - set the relative threshold reduction on each level
1369 
1370    Notes:
1371    The initial threshold (for an arbitrary number of levels starting from the finest) can be set with PCGAMGSetThreshold().
1372    This scaling is used for each subsequent coarsening, but must be called before PCGAMGSetThreshold().
1373 
1374    Level: advanced
1375 
1376 .seealso: PCGAMGSetThreshold()
1377 @*/
1378 PetscErrorCode PCGAMGSetThresholdScale(PC pc, PetscReal v)
1379 {
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1384   ierr = PetscTryMethod(pc,"PCGAMGSetThresholdScale_C",(PC,PetscReal),(pc,v));CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 static PetscErrorCode PCGAMGSetThresholdScale_GAMG(PC pc, PetscReal v)
1389 {
1390   PC_MG   *mg      = (PC_MG*)pc->data;
1391   PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx;
1392   PetscFunctionBegin;
1393   pc_gamg->threshold_scale = v;
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /*@C
1398    PCGAMGSetType - Set solution method
1399 
1400    Collective on PC
1401 
1402    Input Parameters:
1403 +  pc - the preconditioner context
1404 -  type - PCGAMGAGG, PCGAMGGEO, or PCGAMGCLASSICAL
1405 
1406    Options Database Key:
1407 .  -pc_gamg_type <agg,geo,classical> - type of algebraic multigrid to apply
1408 
1409    Level: intermediate
1410 
1411 .seealso: PCGAMGGetType(), PCGAMG, PCGAMGType
1412 @*/
1413 PetscErrorCode PCGAMGSetType(PC pc, PCGAMGType type)
1414 {
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1419   ierr = PetscTryMethod(pc,"PCGAMGSetType_C",(PC,PCGAMGType),(pc,type));CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 /*@C
1424    PCGAMGGetType - Get solution method
1425 
1426    Collective on PC
1427 
1428    Input Parameter:
1429 .  pc - the preconditioner context
1430 
1431    Output Parameter:
1432 .  type - the type of algorithm used
1433 
1434    Level: intermediate
1435 
1436 .seealso: PCGAMGSetType(), PCGAMGType
1437 @*/
1438 PetscErrorCode PCGAMGGetType(PC pc, PCGAMGType *type)
1439 {
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443   PetscValidHeaderSpecific(pc,PC_CLASSID,1);
1444   ierr = PetscUseMethod(pc,"PCGAMGGetType_C",(PC,PCGAMGType*),(pc,type));CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 static PetscErrorCode PCGAMGGetType_GAMG(PC pc, PCGAMGType *type)
1449 {
1450   PC_MG          *mg      = (PC_MG*)pc->data;
1451   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1452 
1453   PetscFunctionBegin;
1454   *type = pc_gamg->type;
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 static PetscErrorCode PCGAMGSetType_GAMG(PC pc, PCGAMGType type)
1459 {
1460   PetscErrorCode ierr,(*r)(PC);
1461   PC_MG          *mg      = (PC_MG*)pc->data;
1462   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1463 
1464   PetscFunctionBegin;
1465   pc_gamg->type = type;
1466   ierr = PetscFunctionListFind(GAMGList,type,&r);CHKERRQ(ierr);
1467   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown GAMG type %s given",type);
1468   if (pc_gamg->ops->destroy) {
1469     ierr = (*pc_gamg->ops->destroy)(pc);CHKERRQ(ierr);
1470     ierr = PetscMemzero(pc_gamg->ops,sizeof(struct _PCGAMGOps));CHKERRQ(ierr);
1471     pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1472     /* cleaning up common data in pc_gamg - this should disapear someday */
1473     pc_gamg->data_cell_cols = 0;
1474     pc_gamg->data_cell_rows = 0;
1475     pc_gamg->orig_data_cell_cols = 0;
1476     pc_gamg->orig_data_cell_rows = 0;
1477     ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr);
1478     pc_gamg->data_sz = 0;
1479   }
1480   ierr = PetscFree(pc_gamg->gamg_type_name);CHKERRQ(ierr);
1481   ierr = PetscStrallocpy(type,&pc_gamg->gamg_type_name);CHKERRQ(ierr);
1482   ierr = (*r)(pc);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 static PetscErrorCode PCView_GAMG(PC pc,PetscViewer viewer)
1487 {
1488   PetscErrorCode ierr,i;
1489   PC_MG          *mg      = (PC_MG*)pc->data;
1490   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1491   PetscReal       gc=0, oc=0;
1492 
1493   PetscFunctionBegin;
1494   ierr = PetscViewerASCIIPrintf(viewer,"    GAMG specific options\n");CHKERRQ(ierr);
1495   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold for dropping small values in graph on each level =");CHKERRQ(ierr);
1496   for (i=0;i<mg->nlevels; i++) {
1497     ierr = PetscViewerASCIIPrintf(viewer," %g",(double)pc_gamg->threshold[i]);CHKERRQ(ierr);
1498   }
1499   ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1500   ierr = PetscViewerASCIIPrintf(viewer,"      Threshold scaling factor for each level not specified = %g\n",(double)pc_gamg->threshold_scale);CHKERRQ(ierr);
1501   if (pc_gamg->use_aggs_in_asm) {
1502     ierr = PetscViewerASCIIPrintf(viewer,"      Using aggregates from coarsening process to define subdomains for PCASM\n");CHKERRQ(ierr);
1503   }
1504   if (pc_gamg->use_parallel_coarse_grid_solver) {
1505     ierr = PetscViewerASCIIPrintf(viewer,"      Using parallel coarse grid solver (all coarse grid equations not put on one process)\n");CHKERRQ(ierr);
1506   }
1507 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1508   if (pc_gamg->cpu_pin_coarse_grids) {
1509     /* ierr = PetscViewerASCIIPrintf(viewer,"      Pinning coarse grids to the CPU)\n");CHKERRQ(ierr); */
1510   }
1511 #endif
1512   /* if (pc_gamg->layout_type==PCGAMG_LAYOUT_COMPACT) { */
1513   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on processes in natural order (ie, 0,1,2...)\n");CHKERRQ(ierr); */
1514   /* } else { */
1515   /*   ierr = PetscViewerASCIIPrintf(viewer,"      Put reduced grids on whole machine (ie, 0,1*f,2*f...,np-f)\n");CHKERRQ(ierr); */
1516   /* } */
1517   if (pc_gamg->ops->view) {
1518     ierr = (*pc_gamg->ops->view)(pc,viewer);CHKERRQ(ierr);
1519   }
1520   ierr = PCMGGetGridComplexity(pc,&gc,&oc);CHKERRQ(ierr);
1521   ierr = PetscViewerASCIIPrintf(viewer,"      Complexity:    grid = %g    operator = %g\n",gc,oc);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode PCSetFromOptions_GAMG(PetscOptionItems *PetscOptionsObject,PC pc)
1526 {
1527   PetscErrorCode ierr;
1528   PC_MG          *mg      = (PC_MG*)pc->data;
1529   PC_GAMG        *pc_gamg = (PC_GAMG*)mg->innerctx;
1530   PetscBool      flag;
1531   MPI_Comm       comm;
1532   char           prefix[256],tname[32];
1533   PetscInt       i,n;
1534   const char     *pcpre;
1535   static const char *LayoutTypes[] = {"compact","spread","PCGAMGLayoutType","PC_GAMG_LAYOUT",NULL};
1536   PetscFunctionBegin;
1537   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1538   ierr = PetscOptionsHead(PetscOptionsObject,"GAMG options");CHKERRQ(ierr);
1539   ierr = PetscOptionsFList("-pc_gamg_type","Type of AMG method","PCGAMGSetType",GAMGList, pc_gamg->gamg_type_name, tname, sizeof(tname), &flag);CHKERRQ(ierr);
1540   if (flag) {
1541     ierr = PCGAMGSetType(pc,tname);CHKERRQ(ierr);
1542   }
1543   ierr = PetscOptionsBool("-pc_gamg_repartition","Repartion coarse grids","PCGAMGSetRepartition",pc_gamg->repart,&pc_gamg->repart,NULL);CHKERRQ(ierr);
1544   ierr = PetscOptionsBool("-pc_gamg_use_sa_esteig","Use eigen estimate from Smoothed aggregation for smoother","PCGAMGSetUseSAEstEig",pc_gamg->use_sa_esteig,&pc_gamg->use_sa_esteig,NULL);CHKERRQ(ierr);
1545   ierr = PetscOptionsBool("-pc_gamg_reuse_interpolation","Reuse prolongation operator","PCGAMGReuseInterpolation",pc_gamg->reuse_prol,&pc_gamg->reuse_prol,NULL);CHKERRQ(ierr);
1546   ierr = PetscOptionsBool("-pc_gamg_asm_use_agg","Use aggregation aggregates for ASM smoother","PCGAMGASMSetUseAggs",pc_gamg->use_aggs_in_asm,&pc_gamg->use_aggs_in_asm,NULL);CHKERRQ(ierr);
1547   ierr = PetscOptionsBool("-pc_gamg_use_parallel_coarse_grid_solver","Use parallel coarse grid solver (otherwise put last grid on one process)","PCGAMGSetUseParallelCoarseGridSolve",pc_gamg->use_parallel_coarse_grid_solver,&pc_gamg->use_parallel_coarse_grid_solver,NULL);CHKERRQ(ierr);
1548   ierr = PetscOptionsBool("-pc_gamg_cpu_pin_coarse_grids","Pin coarse grids to the CPU","PCGAMGSetCpuPinCoarseGrids",pc_gamg->cpu_pin_coarse_grids,&pc_gamg->cpu_pin_coarse_grids,NULL);CHKERRQ(ierr);
1549   ierr = PetscOptionsEnum("-pc_gamg_coarse_grid_layout_type","compact: place reduced grids on processes in natural order; spread: distribute to whole machine for more memory bandwidth","PCGAMGSetCoarseGridLayoutType",LayoutTypes,(PetscEnum)pc_gamg->layout_type,(PetscEnum*)&pc_gamg->layout_type,NULL);CHKERRQ(ierr);
1550   ierr = PetscOptionsInt("-pc_gamg_process_eq_limit","Limit (goal) on number of equations per process on coarse grids","PCGAMGSetProcEqLim",pc_gamg->min_eq_proc,&pc_gamg->min_eq_proc,NULL);CHKERRQ(ierr);
1551   ierr = PetscOptionsInt("-pc_gamg_coarse_eq_limit","Limit on number of equations for the coarse grid","PCGAMGSetCoarseEqLim",pc_gamg->coarse_eq_limit,&pc_gamg->coarse_eq_limit,NULL);CHKERRQ(ierr);
1552   ierr = PetscOptionsReal("-pc_gamg_threshold_scale","Scaling of threshold for each level not specified","PCGAMGSetThresholdScale",pc_gamg->threshold_scale,&pc_gamg->threshold_scale,NULL);CHKERRQ(ierr);
1553   n = PETSC_MG_MAXLEVELS;
1554   ierr = PetscOptionsRealArray("-pc_gamg_threshold","Relative threshold to use for dropping edges in aggregation graph","PCGAMGSetThreshold",pc_gamg->threshold,&n,&flag);CHKERRQ(ierr);
1555   if (!flag || n < PETSC_MG_MAXLEVELS) {
1556     if (!flag) n = 1;
1557     i = n;
1558     do {pc_gamg->threshold[i] = pc_gamg->threshold[i-1]*pc_gamg->threshold_scale;} while (++i<PETSC_MG_MAXLEVELS);
1559   }
1560   n = PETSC_MG_MAXLEVELS;
1561   ierr = PetscOptionsIntArray("-pc_gamg_rank_reduction_factors","Manual schedule of coarse grid reduction factors that overrides internal heuristics (0 for first reduction puts one process/device)","PCGAMGSetRankReductionFactors",pc_gamg->level_reduction_factors,&n,&flag);CHKERRQ(ierr);
1562   if (!flag) i = 0;
1563   else i = n;
1564   do {pc_gamg->level_reduction_factors[i] = -1;} while (++i<PETSC_MG_MAXLEVELS);
1565   ierr = PetscOptionsInt("-pc_mg_levels","Set number of MG levels","PCGAMGSetNlevels",pc_gamg->Nlevels,&pc_gamg->Nlevels,NULL);CHKERRQ(ierr);
1566   {
1567     PetscReal eminmax[2] = {0., 0.};
1568     n = 2;
1569     ierr = PetscOptionsRealArray("-pc_gamg_eigenvalues","extreme eigenvalues for smoothed aggregation","PCGAMGSetEigenvalues",eminmax,&n,&flag);CHKERRQ(ierr);
1570     if (flag) {
1571       PetscCheckFalse(n != 2,PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"-pc_gamg_eigenvalues: must specify 2 parameters, min and max eigenvalues");
1572       ierr = PCGAMGSetEigenvalues(pc, eminmax[1], eminmax[0]);CHKERRQ(ierr);
1573     }
1574   }
1575   /* set options for subtype */
1576   if (pc_gamg->ops->setfromoptions) {ierr = (*pc_gamg->ops->setfromoptions)(PetscOptionsObject,pc);CHKERRQ(ierr);}
1577 
1578   ierr = PCGetOptionsPrefix(pc, &pcpre);CHKERRQ(ierr);
1579   ierr = PetscSNPrintf(prefix,sizeof(prefix),"%spc_gamg_",pcpre ? pcpre : "");CHKERRQ(ierr);
1580   ierr = PetscOptionsTail();CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 /* -------------------------------------------------------------------------- */
1585 /*MC
1586      PCGAMG - Geometric algebraic multigrid (AMG) preconditioner
1587 
1588    Options Database Keys:
1589 +   -pc_gamg_type <type> - one of agg, geo, or classical
1590 .   -pc_gamg_repartition  <true,default=false> - repartition the degrees of freedom accross the coarse grids as they are determined
1591 .   -pc_gamg_reuse_interpolation <true,default=false> - when rebuilding the algebraic multigrid preconditioner reuse the previously computed interpolations
1592 .   -pc_gamg_asm_use_agg <true,default=false> - use the aggregates from the coasening process to defined the subdomains on each level for the PCASM smoother
1593 .   -pc_gamg_process_eq_limit <limit, default=50> - GAMG will reduce the number of MPI processes used directly on the coarse grids so that there are around <limit>
1594                                         equations on each process that has degrees of freedom
1595 .   -pc_gamg_coarse_eq_limit <limit, default=50> - Set maximum number of equations on coarsest grid to aim for.
1596 .   -pc_gamg_threshold[] <thresh,default=0> - Before aggregating the graph GAMG will remove small values from the graph on each level
1597 -   -pc_gamg_threshold_scale <scale,default=1> - Scaling of threshold on each coarser grid if not specified
1598 
1599    Options Database Keys for default Aggregation:
1600 +  -pc_gamg_agg_nsmooths <nsmooth, default=1> - number of smoothing steps to use with smooth aggregation
1601 .  -pc_gamg_sym_graph <true,default=false> - symmetrize the graph before computing the aggregation
1602 -  -pc_gamg_square_graph <n,default=1> - number of levels to square the graph before aggregating it
1603 
1604    Multigrid options:
1605 +  -pc_mg_cycles <v> - v or w, see PCMGSetCycleType()
1606 .  -pc_mg_distinct_smoothup - configure the up and down (pre and post) smoothers separately, see PCMGSetDistinctSmoothUp()
1607 .  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1608 -  -pc_mg_levels <levels> - Number of levels of multigrid to use.
1609 
1610   Notes:
1611     In order to obtain good performance for PCGAMG for vector valued problems you must
1612        Call MatSetBlockSize() to indicate the number of degrees of freedom per grid point
1613        Call MatSetNearNullSpace() (or PCSetCoordinates() if solving the equations of elasticity) to indicate the near null space of the operator
1614        See the Users Manual Chapter 4 for more details
1615 
1616   Level: intermediate
1617 
1618 .seealso:  PCCreate(), PCSetType(), MatSetBlockSize(), PCMGType, PCSetCoordinates(), MatSetNearNullSpace(), PCGAMGSetType(), PCGAMGAGG, PCGAMGGEO, PCGAMGCLASSICAL, PCGAMGSetProcEqLim(),
1619            PCGAMGSetCoarseEqLim(), PCGAMGSetRepartition(), PCGAMGRegister(), PCGAMGSetReuseInterpolation(), PCGAMGASMSetUseAggs(), PCGAMGSetUseParallelCoarseGridSolve(), PCGAMGSetNlevels(), PCGAMGSetThreshold(), PCGAMGGetType(), PCGAMGSetReuseInterpolation(), PCGAMGSetUseSAEstEig()
1620 M*/
1621 
1622 PETSC_EXTERN PetscErrorCode PCCreate_GAMG(PC pc)
1623 {
1624   PetscErrorCode ierr,i;
1625   PC_GAMG        *pc_gamg;
1626   PC_MG          *mg;
1627 
1628   PetscFunctionBegin;
1629    /* register AMG type */
1630   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1631 
1632   /* PCGAMG is an inherited class of PCMG. Initialize pc as PCMG */
1633   ierr = PCSetType(pc, PCMG);CHKERRQ(ierr);
1634   ierr = PetscObjectChangeTypeName((PetscObject)pc, PCGAMG);CHKERRQ(ierr);
1635 
1636   /* create a supporting struct and attach it to pc */
1637   ierr         = PetscNewLog(pc,&pc_gamg);CHKERRQ(ierr);
1638   ierr         = PCMGSetGalerkin(pc,PC_MG_GALERKIN_EXTERNAL);CHKERRQ(ierr);
1639   mg           = (PC_MG*)pc->data;
1640   mg->innerctx = pc_gamg;
1641 
1642   ierr = PetscNewLog(pc,&pc_gamg->ops);CHKERRQ(ierr);
1643 
1644   /* these should be in subctx but repartitioning needs simple arrays */
1645   pc_gamg->data_sz = 0;
1646   pc_gamg->data    = NULL;
1647 
1648   /* overwrite the pointers of PCMG by the functions of base class PCGAMG */
1649   pc->ops->setfromoptions = PCSetFromOptions_GAMG;
1650   pc->ops->setup          = PCSetUp_GAMG;
1651   pc->ops->reset          = PCReset_GAMG;
1652   pc->ops->destroy        = PCDestroy_GAMG;
1653   mg->view                = PCView_GAMG;
1654 
1655   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGGetLevels_C",PCMGGetLevels_MG);CHKERRQ(ierr);
1656   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCMGSetLevels_C",PCMGSetLevels_MG);CHKERRQ(ierr);
1657   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetProcEqLim_C",PCGAMGSetProcEqLim_GAMG);CHKERRQ(ierr);
1658   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseEqLim_C",PCGAMGSetCoarseEqLim_GAMG);CHKERRQ(ierr);
1659   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRepartition_C",PCGAMGSetRepartition_GAMG);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetEigenvalues_C",PCGAMGSetEigenvalues_GAMG);CHKERRQ(ierr);
1661   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseSAEstEig_C",PCGAMGSetUseSAEstEig_GAMG);CHKERRQ(ierr);
1662   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetReuseInterpolation_C",PCGAMGSetReuseInterpolation_GAMG);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGASMSetUseAggs_C",PCGAMGASMSetUseAggs_GAMG);CHKERRQ(ierr);
1664   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetUseParallelCoarseGridSolve_C",PCGAMGSetUseParallelCoarseGridSolve_GAMG);CHKERRQ(ierr);
1665   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCpuPinCoarseGrids_C",PCGAMGSetCpuPinCoarseGrids_GAMG);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetCoarseGridLayoutType_C",PCGAMGSetCoarseGridLayoutType_GAMG);CHKERRQ(ierr);
1667   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThreshold_C",PCGAMGSetThreshold_GAMG);CHKERRQ(ierr);
1668   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetRankReductionFactors_C",PCGAMGSetRankReductionFactors_GAMG);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetThresholdScale_C",PCGAMGSetThresholdScale_GAMG);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetType_C",PCGAMGSetType_GAMG);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGGetType_C",PCGAMGGetType_GAMG);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCGAMGSetNlevels_C",PCGAMGSetNlevels_GAMG);CHKERRQ(ierr);
1673   pc_gamg->repart           = PETSC_FALSE;
1674   pc_gamg->reuse_prol       = PETSC_FALSE;
1675   pc_gamg->use_aggs_in_asm  = PETSC_FALSE;
1676   pc_gamg->use_parallel_coarse_grid_solver = PETSC_FALSE;
1677   pc_gamg->cpu_pin_coarse_grids = PETSC_FALSE;
1678   pc_gamg->layout_type      = PCGAMG_LAYOUT_SPREAD;
1679   pc_gamg->min_eq_proc      = 50;
1680   pc_gamg->coarse_eq_limit  = 50;
1681   for (i=0;i<PETSC_MG_MAXLEVELS;i++) pc_gamg->threshold[i] = 0.;
1682   pc_gamg->threshold_scale = 1.;
1683   pc_gamg->Nlevels          = PETSC_MG_MAXLEVELS;
1684   pc_gamg->current_level    = 0; /* don't need to init really */
1685   pc_gamg->use_sa_esteig    = PETSC_TRUE;
1686   pc_gamg->emin             = 0;
1687   pc_gamg->emax             = 0;
1688 
1689   pc_gamg->ops->createlevel = PCGAMGCreateLevel_GAMG;
1690 
1691   /* PCSetUp_GAMG assumes that the type has been set, so set it to the default now */
1692   ierr = PCGAMGSetType(pc,PCGAMGAGG);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 /*@C
1697  PCGAMGInitializePackage - This function initializes everything in the PCGAMG package. It is called
1698     from PCInitializePackage().
1699 
1700  Level: developer
1701 
1702  .seealso: PetscInitialize()
1703 @*/
1704 PetscErrorCode PCGAMGInitializePackage(void)
1705 {
1706   PetscErrorCode ierr;
1707   PetscInt       l;
1708 
1709   PetscFunctionBegin;
1710   if (PCGAMGPackageInitialized) PetscFunctionReturn(0);
1711   PCGAMGPackageInitialized = PETSC_TRUE;
1712   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGGEO,PCCreateGAMG_GEO);CHKERRQ(ierr);
1713   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGAGG,PCCreateGAMG_AGG);CHKERRQ(ierr);
1714   ierr = PetscFunctionListAdd(&GAMGList,PCGAMGCLASSICAL,PCCreateGAMG_Classical);CHKERRQ(ierr);
1715   ierr = PetscRegisterFinalize(PCGAMGFinalizePackage);CHKERRQ(ierr);
1716 
1717   /* general events */
1718   ierr = PetscLogEventRegister("PCGAMGGraph_AGG", 0, &PC_GAMGGraph_AGG);CHKERRQ(ierr);
1719   ierr = PetscLogEventRegister("PCGAMGGraph_GEO", PC_CLASSID, &PC_GAMGGraph_GEO);CHKERRQ(ierr);
1720   ierr = PetscLogEventRegister("PCGAMGCoarse_AGG", PC_CLASSID, &PC_GAMGCoarsen_AGG);CHKERRQ(ierr);
1721   ierr = PetscLogEventRegister("PCGAMGCoarse_GEO", PC_CLASSID, &PC_GAMGCoarsen_GEO);CHKERRQ(ierr);
1722   ierr = PetscLogEventRegister("PCGAMGProl_AGG", PC_CLASSID, &PC_GAMGProlongator_AGG);CHKERRQ(ierr);
1723   ierr = PetscLogEventRegister("PCGAMGProl_GEO", PC_CLASSID, &PC_GAMGProlongator_GEO);CHKERRQ(ierr);
1724   ierr = PetscLogEventRegister("PCGAMGPOpt_AGG", PC_CLASSID, &PC_GAMGOptProlongator_AGG);CHKERRQ(ierr);
1725 
1726   ierr = PetscLogEventRegister("GAMG: createProl", PC_CLASSID, &petsc_gamg_setup_events[SET1]);CHKERRQ(ierr);
1727   ierr = PetscLogEventRegister("  Create Graph", PC_CLASSID, &petsc_gamg_setup_events[GRAPH]);CHKERRQ(ierr);
1728   ierr = PetscLogEventRegister("  Filter Graph", PC_CLASSID, &petsc_gamg_setup_events[SET16]);CHKERRQ(ierr);
1729   /* PetscLogEventRegister("    G.Mat", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_MAT]); */
1730   /* PetscLogEventRegister("    G.Filter", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_FILTER]); */
1731   /* PetscLogEventRegister("    G.Square", PC_CLASSID, &petsc_gamg_setup_events[GRAPH_SQR]); */
1732   ierr = PetscLogEventRegister("  MIS/Agg", PC_CLASSID, &petsc_gamg_setup_events[SET4]);CHKERRQ(ierr);
1733   ierr = PetscLogEventRegister("  geo: growSupp", PC_CLASSID, &petsc_gamg_setup_events[SET5]);CHKERRQ(ierr);
1734   ierr = PetscLogEventRegister("  geo: triangle", PC_CLASSID, &petsc_gamg_setup_events[SET6]);CHKERRQ(ierr);
1735   ierr = PetscLogEventRegister("    search-set", PC_CLASSID, &petsc_gamg_setup_events[FIND_V]);CHKERRQ(ierr);
1736   ierr = PetscLogEventRegister("  SA: col data", PC_CLASSID, &petsc_gamg_setup_events[SET7]);CHKERRQ(ierr);
1737   ierr = PetscLogEventRegister("  SA: frmProl0", PC_CLASSID, &petsc_gamg_setup_events[SET8]);CHKERRQ(ierr);
1738   ierr = PetscLogEventRegister("  SA: smooth", PC_CLASSID, &petsc_gamg_setup_events[SET9]);CHKERRQ(ierr);
1739   ierr = PetscLogEventRegister("GAMG: partLevel", PC_CLASSID, &petsc_gamg_setup_events[SET2]);CHKERRQ(ierr);
1740   ierr = PetscLogEventRegister("  repartition", PC_CLASSID, &petsc_gamg_setup_events[SET12]);CHKERRQ(ierr);
1741   ierr = PetscLogEventRegister("  Invert-Sort", PC_CLASSID, &petsc_gamg_setup_events[SET13]);CHKERRQ(ierr);
1742   ierr = PetscLogEventRegister("  Move A", PC_CLASSID, &petsc_gamg_setup_events[SET14]);CHKERRQ(ierr);
1743   ierr = PetscLogEventRegister("  Move P", PC_CLASSID, &petsc_gamg_setup_events[SET15]);CHKERRQ(ierr);
1744   for (l=0;l<PETSC_MG_MAXLEVELS;l++) {
1745     char ename[32];
1746 
1747     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Squ l%02d",l);CHKERRQ(ierr);
1748     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][0]);CHKERRQ(ierr);
1749     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Gal l%02d",l);CHKERRQ(ierr);
1750     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][1]);CHKERRQ(ierr);
1751     ierr = PetscSNPrintf(ename,sizeof(ename),"PCGAMG Opt l%02d",l);CHKERRQ(ierr);
1752     ierr = PetscLogEventRegister(ename, PC_CLASSID, &petsc_gamg_setup_matmat_events[l][2]);CHKERRQ(ierr);
1753   }
1754   /* PetscLogEventRegister(" PL move data", PC_CLASSID, &petsc_gamg_setup_events[SET13]); */
1755   /* PetscLogEventRegister("GAMG: fix", PC_CLASSID, &petsc_gamg_setup_events[SET10]); */
1756   /* PetscLogEventRegister("GAMG: set levels", PC_CLASSID, &petsc_gamg_setup_events[SET11]); */
1757   /* create timer stages */
1758 #if defined(GAMG_STAGES)
1759   {
1760     char     str[32];
1761     PetscInt lidx;
1762     sprintf(str,"MG Level %d (finest)",0);
1763     ierr = PetscLogStageRegister(str, &gamg_stages[0]);CHKERRQ(ierr);
1764     for (lidx=1; lidx<9; lidx++) {
1765       sprintf(str,"MG Level %d",(int)lidx);
1766       ierr = PetscLogStageRegister(str, &gamg_stages[lidx]);CHKERRQ(ierr);
1767     }
1768   }
1769 #endif
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 /*@C
1774  PCGAMGFinalizePackage - This function frees everything from the PCGAMG package. It is
1775     called from PetscFinalize() automatically.
1776 
1777  Level: developer
1778 
1779  .seealso: PetscFinalize()
1780 @*/
1781 PetscErrorCode PCGAMGFinalizePackage(void)
1782 {
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   PCGAMGPackageInitialized = PETSC_FALSE;
1787   ierr = PetscFunctionListDestroy(&GAMGList);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 /*@C
1792  PCGAMGRegister - Register a PCGAMG implementation.
1793 
1794  Input Parameters:
1795  + type - string that will be used as the name of the GAMG type.
1796  - create - function for creating the gamg context.
1797 
1798   Level: advanced
1799 
1800  .seealso: PCGAMGType, PCGAMG, PCGAMGSetType()
1801 @*/
1802 PetscErrorCode PCGAMGRegister(PCGAMGType type, PetscErrorCode (*create)(PC))
1803 {
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   ierr = PCGAMGInitializePackage();CHKERRQ(ierr);
1808   ierr = PetscFunctionListAdd(&GAMGList,type,create);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812