xref: /petsc/src/ksp/pc/impls/ml/ml.c (revision a4e35b1925eceef64945ea472b84f2bf06a67b5e)
1 
2 /*
3    Provides an interface to the ML smoothed Aggregation
4    Note: Something non-obvious breaks -pc_mg_type ADDITIVE for parallel runs
5                                     Jed Brown, see [PETSC #18321, #18449].
6 */
7 #include <petsc/private/pcimpl.h>   /*I "petscpc.h" I*/
8 #include <petsc/private/pcmgimpl.h> /*I "petscksp.h" I*/
9 #include <../src/mat/impls/aij/seq/aij.h>
10 #include <../src/mat/impls/aij/mpi/mpiaij.h>
11 #include <petscdm.h> /* for DMDestroy(&pc->mg) hack */
12 
13 EXTERN_C_BEGIN
14 /* HAVE_CONFIG_H flag is required by ML include files */
15 #ifndef HAVE_CONFIG_H
16   #define HAVE_CONFIG_H
17 #endif
18 #include <ml_include.h>
19 #include <ml_viz_stats.h>
20 EXTERN_C_END
21 
22 typedef enum {
23   PCML_NULLSPACE_AUTO,
24   PCML_NULLSPACE_USER,
25   PCML_NULLSPACE_BLOCK,
26   PCML_NULLSPACE_SCALAR
27 } PCMLNullSpaceType;
28 static const char *const PCMLNullSpaceTypes[] = {"AUTO", "USER", "BLOCK", "SCALAR", "PCMLNullSpaceType", "PCML_NULLSPACE_", 0};
29 
30 /* The context (data structure) at each grid level */
31 typedef struct {
32   Vec x, b, r; /* global vectors */
33   Mat A, P, R;
34   KSP ksp;
35   Vec coords; /* projected by ML, if PCSetCoordinates is called; values packed by node */
36 } GridCtx;
37 
38 /* The context used to input PETSc matrix into ML at fine grid */
39 typedef struct {
40   Mat          A;    /* Petsc matrix in aij format */
41   Mat          Aloc; /* local portion of A to be used by ML */
42   Vec          x, y;
43   ML_Operator *mlmat;
44   PetscScalar *pwork; /* tmp array used by PetscML_comm() */
45 } FineGridCtx;
46 
47 /* The context associates a ML matrix with a PETSc shell matrix */
48 typedef struct {
49   Mat          A;     /* PETSc shell matrix associated with mlmat */
50   ML_Operator *mlmat; /* ML matrix assorciated with A */
51 } Mat_MLShell;
52 
53 /* Private context for the ML preconditioner */
54 typedef struct {
55   ML               *ml_object;
56   ML_Aggregate     *agg_object;
57   GridCtx          *gridctx;
58   FineGridCtx      *PetscMLdata;
59   PetscInt          Nlevels, MaxNlevels, MaxCoarseSize, CoarsenScheme, EnergyMinimization, MinPerProc, PutOnSingleProc, RepartitionType, ZoltanScheme;
60   PetscReal         Threshold, DampingFactor, EnergyMinimizationDropTol, MaxMinRatio, AuxThreshold;
61   PetscBool         SpectralNormScheme_Anorm, BlockScaling, EnergyMinimizationCheap, Symmetrize, OldHierarchy, KeepAggInfo, Reusable, Repartition, Aux;
62   PetscBool         reuse_interpolation;
63   PCMLNullSpaceType nulltype;
64   PetscMPIInt       size; /* size of communicator for pc->pmat */
65   PetscInt          dim;  /* data from PCSetCoordinates(_ML) */
66   PetscInt          nloc;
67   PetscReal        *coords; /* ML has a grid object for each level: the finest grid will point into coords */
68 } PC_ML;
69 
70 static int PetscML_getrow(ML_Operator *ML_data, int N_requested_rows, int requested_rows[], int allocated_space, int columns[], double values[], int row_lengths[])
71 {
72   PetscInt     m, i, j, k = 0, row, *aj;
73   PetscScalar *aa;
74   FineGridCtx *ml = (FineGridCtx *)ML_Get_MyGetrowData(ML_data);
75   Mat_SeqAIJ  *a  = (Mat_SeqAIJ *)ml->Aloc->data;
76 
77   if (MatGetSize(ml->Aloc, &m, NULL)) return (0);
78   for (i = 0; i < N_requested_rows; i++) {
79     row            = requested_rows[i];
80     row_lengths[i] = a->ilen[row];
81     if (allocated_space < k + row_lengths[i]) return (0);
82     if ((row >= 0) || (row <= (m - 1))) {
83       aj = a->j + a->i[row];
84       aa = a->a + a->i[row];
85       for (j = 0; j < row_lengths[i]; j++) {
86         columns[k]  = aj[j];
87         values[k++] = aa[j];
88       }
89     }
90   }
91   return (1);
92 }
93 
94 static PetscErrorCode PetscML_comm(double p[], void *ML_data)
95 {
96   FineGridCtx       *ml = (FineGridCtx *)ML_data;
97   Mat                A  = ml->A;
98   Mat_MPIAIJ        *a  = (Mat_MPIAIJ *)A->data;
99   PetscMPIInt        size;
100   PetscInt           i, in_length = A->rmap->n, out_length = ml->Aloc->cmap->n;
101   const PetscScalar *array;
102 
103   PetscFunctionBegin;
104   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
105   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
106 
107   PetscCall(VecPlaceArray(ml->y, p));
108   PetscCall(VecScatterBegin(a->Mvctx, ml->y, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
109   PetscCall(VecScatterEnd(a->Mvctx, ml->y, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
110   PetscCall(VecResetArray(ml->y));
111   PetscCall(VecGetArrayRead(a->lvec, &array));
112   for (i = in_length; i < out_length; i++) p[i] = array[i - in_length];
113   PetscCall(VecRestoreArrayRead(a->lvec, &array));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*
118   Needed since ML expects an int (*)(double *, void *) but PetscErrorCode may be an
119   enum. Instead of modifying PetscML_comm() it is easier to just wrap it
120 */
121 static int ML_PetscML_comm(double p[], void *ML_data)
122 {
123   return (int)PetscML_comm(p, ML_data);
124 }
125 
126 static int PetscML_matvec(ML_Operator *ML_data, int in_length, double p[], int out_length, double ap[])
127 {
128   FineGridCtx *ml = (FineGridCtx *)ML_Get_MyMatvecData(ML_data);
129   Mat          A = ml->A, Aloc = ml->Aloc;
130   PetscMPIInt  size;
131   PetscScalar *pwork = ml->pwork;
132   PetscInt     i;
133 
134   PetscFunctionBegin;
135   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
136   if (size == 1) {
137     PetscCall(VecPlaceArray(ml->x, p));
138   } else {
139     for (i = 0; i < in_length; i++) pwork[i] = p[i];
140     PetscCall(PetscML_comm(pwork, ml));
141     PetscCall(VecPlaceArray(ml->x, pwork));
142   }
143   PetscCall(VecPlaceArray(ml->y, ap));
144   PetscCall(MatMult(Aloc, ml->x, ml->y));
145   PetscCall(VecResetArray(ml->x));
146   PetscCall(VecResetArray(ml->y));
147   PetscFunctionReturn(PETSC_SUCCESS);
148 }
149 
150 static PetscErrorCode MatMult_ML(Mat A, Vec x, Vec y)
151 {
152   Mat_MLShell       *shell;
153   PetscScalar       *yarray;
154   const PetscScalar *xarray;
155   PetscInt           x_length, y_length;
156 
157   PetscFunctionBegin;
158   PetscCall(MatShellGetContext(A, &shell));
159   PetscCall(VecGetArrayRead(x, &xarray));
160   PetscCall(VecGetArray(y, &yarray));
161   x_length = shell->mlmat->invec_leng;
162   y_length = shell->mlmat->outvec_leng;
163   PetscStackCallExternalVoid("ML_Operator_Apply", ML_Operator_Apply(shell->mlmat, x_length, (PetscScalar *)xarray, y_length, yarray));
164   PetscCall(VecRestoreArrayRead(x, &xarray));
165   PetscCall(VecRestoreArray(y, &yarray));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 /* newtype is ignored since only handles one case */
170 static PetscErrorCode MatConvert_MPIAIJ_ML(Mat A, MatType newtype, MatReuse scall, Mat *Aloc)
171 {
172   Mat_MPIAIJ  *mpimat = (Mat_MPIAIJ *)A->data;
173   Mat_SeqAIJ  *mat, *a = (Mat_SeqAIJ *)(mpimat->A)->data, *b = (Mat_SeqAIJ *)(mpimat->B)->data;
174   PetscInt    *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
175   PetscScalar *aa, *ba, *ca;
176   PetscInt     am = A->rmap->n, an = A->cmap->n, i, j, k;
177   PetscInt    *ci, *cj, ncols;
178 
179   PetscFunctionBegin;
180   PetscCheck(am == an, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "A must have a square diagonal portion, am: %d != an: %d", am, an);
181   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, (const PetscScalar **)&aa));
182   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, (const PetscScalar **)&ba));
183   if (scall == MAT_INITIAL_MATRIX) {
184     PetscCall(PetscMalloc1(1 + am, &ci));
185     ci[0] = 0;
186     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
187     PetscCall(PetscMalloc1(1 + ci[am], &cj));
188     PetscCall(PetscMalloc1(1 + ci[am], &ca));
189 
190     k = 0;
191     for (i = 0; i < am; i++) {
192       /* diagonal portion of A */
193       ncols = ai[i + 1] - ai[i];
194       for (j = 0; j < ncols; j++) {
195         cj[k]   = *aj++;
196         ca[k++] = *aa++;
197       }
198       /* off-diagonal portion of A */
199       ncols = bi[i + 1] - bi[i];
200       for (j = 0; j < ncols; j++) {
201         cj[k] = an + (*bj);
202         bj++;
203         ca[k++] = *ba++;
204       }
205     }
206     PetscCheck(k == ci[am], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "k: %d != ci[am]: %d", k, ci[am]);
207 
208     /* put together the new matrix */
209     an = mpimat->A->cmap->n + mpimat->B->cmap->n;
210     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, an, ci, cj, ca, Aloc));
211 
212     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
213     /* Since these are PETSc arrays, change flags to free them as necessary. */
214     mat          = (Mat_SeqAIJ *)(*Aloc)->data;
215     mat->free_a  = PETSC_TRUE;
216     mat->free_ij = PETSC_TRUE;
217 
218     mat->nonew = 0;
219   } else if (scall == MAT_REUSE_MATRIX) {
220     mat = (Mat_SeqAIJ *)(*Aloc)->data;
221     ci  = mat->i;
222     cj  = mat->j;
223     ca  = mat->a;
224     for (i = 0; i < am; i++) {
225       /* diagonal portion of A */
226       ncols = ai[i + 1] - ai[i];
227       for (j = 0; j < ncols; j++) *ca++ = *aa++;
228       /* off-diagonal portion of A */
229       ncols = bi[i + 1] - bi[i];
230       for (j = 0; j < ncols; j++) *ca++ = *ba++;
231     }
232   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
233   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, (const PetscScalar **)&aa));
234   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, (const PetscScalar **)&ba));
235   PetscFunctionReturn(PETSC_SUCCESS);
236 }
237 
238 static PetscErrorCode MatDestroy_ML(Mat A)
239 {
240   Mat_MLShell *shell;
241 
242   PetscFunctionBegin;
243   PetscCall(MatShellGetContext(A, &shell));
244   PetscCall(PetscFree(shell));
245   PetscFunctionReturn(PETSC_SUCCESS);
246 }
247 
248 static PetscErrorCode MatWrapML_SeqAIJ(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
249 {
250   struct ML_CSR_MSRdata *matdata = (struct ML_CSR_MSRdata *)mlmat->data;
251   PetscInt               m = mlmat->outvec_leng, n = mlmat->invec_leng, *nnz = NULL, nz_max;
252   PetscInt              *ml_cols = matdata->columns, *ml_rowptr = matdata->rowptr, *aj, i;
253   PetscScalar           *ml_vals = matdata->values, *aa;
254 
255   PetscFunctionBegin;
256   PetscCheck(mlmat->getrow, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "mlmat->getrow = NULL");
257   if (m != n) { /* ML Pmat and Rmat are in CSR format. Pass array pointers into SeqAIJ matrix */
258     if (reuse) {
259       Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*newmat)->data;
260       aij->i          = ml_rowptr;
261       aij->j          = ml_cols;
262       aij->a          = ml_vals;
263     } else {
264       /* sort ml_cols and ml_vals */
265       PetscCall(PetscMalloc1(m + 1, &nnz));
266       for (i = 0; i < m; i++) nnz[i] = ml_rowptr[i + 1] - ml_rowptr[i];
267       aj = ml_cols;
268       aa = ml_vals;
269       for (i = 0; i < m; i++) {
270         PetscCall(PetscSortIntWithScalarArray(nnz[i], aj, aa));
271         aj += nnz[i];
272         aa += nnz[i];
273       }
274       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, ml_rowptr, ml_cols, ml_vals, newmat));
275       PetscCall(PetscFree(nnz));
276     }
277     PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
278     PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
279     PetscFunctionReturn(PETSC_SUCCESS);
280   }
281 
282   nz_max = PetscMax(1, mlmat->max_nz_per_row);
283   PetscCall(PetscMalloc2(nz_max, &aa, nz_max, &aj));
284   if (!reuse) {
285     PetscCall(MatCreate(PETSC_COMM_SELF, newmat));
286     PetscCall(MatSetSizes(*newmat, m, n, PETSC_DECIDE, PETSC_DECIDE));
287     PetscCall(MatSetType(*newmat, MATSEQAIJ));
288     /* keep track of block size for A matrices */
289     PetscCall(MatSetBlockSize(*newmat, mlmat->num_PDEs));
290 
291     PetscCall(PetscMalloc1(m, &nnz));
292     for (i = 0; i < m; i++) PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &nnz[i]));
293     PetscCall(MatSeqAIJSetPreallocation(*newmat, 0, nnz));
294   }
295   for (i = 0; i < m; i++) {
296     PetscInt ncols;
297 
298     PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &ncols));
299     PetscCall(MatSetValues(*newmat, 1, &i, ncols, aj, aa, INSERT_VALUES));
300   }
301   PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
302   PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
303 
304   PetscCall(PetscFree2(aa, aj));
305   PetscCall(PetscFree(nnz));
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static PetscErrorCode MatWrapML_SHELL(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
310 {
311   PetscInt     m, n;
312   ML_Comm     *MLcomm;
313   Mat_MLShell *shellctx;
314 
315   PetscFunctionBegin;
316   m = mlmat->outvec_leng;
317   n = mlmat->invec_leng;
318 
319   if (reuse) {
320     PetscCall(MatShellGetContext(*newmat, &shellctx));
321     shellctx->mlmat = mlmat;
322     PetscFunctionReturn(PETSC_SUCCESS);
323   }
324 
325   MLcomm = mlmat->comm;
326 
327   PetscCall(PetscNew(&shellctx));
328   PetscCall(MatCreateShell(MLcomm->USR_comm, m, n, PETSC_DETERMINE, PETSC_DETERMINE, shellctx, newmat));
329   PetscCall(MatShellSetOperation(*newmat, MATOP_MULT, (void (*)(void))MatMult_ML));
330   PetscCall(MatShellSetOperation(*newmat, MATOP_DESTROY, (void (*)(void))MatDestroy_ML));
331 
332   shellctx->A     = *newmat;
333   shellctx->mlmat = mlmat;
334   PetscFunctionReturn(PETSC_SUCCESS);
335 }
336 
337 static PetscErrorCode MatWrapML_MPIAIJ(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
338 {
339   PetscInt    *aj;
340   PetscScalar *aa;
341   PetscInt     i, j, *gordering;
342   PetscInt     m = mlmat->outvec_leng, n, nz_max, row;
343   Mat          A;
344 
345   PetscFunctionBegin;
346   PetscCheck(mlmat->getrow, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "mlmat->getrow = NULL");
347   n = mlmat->invec_leng;
348   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "m %d must equal to n %d", m, n);
349 
350   /* create global row numbering for a ML_Operator */
351   PetscStackCallExternalVoid("ML_build_global_numbering", ML_build_global_numbering(mlmat, &gordering, "rows"));
352 
353   nz_max = PetscMax(1, mlmat->max_nz_per_row) + 1;
354   PetscCall(PetscMalloc2(nz_max, &aa, nz_max, &aj));
355   if (reuse) {
356     A = *newmat;
357   } else {
358     PetscInt *nnzA, *nnzB, *nnz;
359     PetscInt  rstart;
360     PetscCall(MatCreate(mlmat->comm->USR_comm, &A));
361     PetscCall(MatSetSizes(A, m, n, PETSC_DECIDE, PETSC_DECIDE));
362     PetscCall(MatSetType(A, MATMPIAIJ));
363     /* keep track of block size for A matrices */
364     PetscCall(MatSetBlockSize(A, mlmat->num_PDEs));
365     PetscCall(PetscMalloc3(m, &nnzA, m, &nnzB, m, &nnz));
366     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, mlmat->comm->USR_comm));
367     rstart -= m;
368 
369     for (i = 0; i < m; i++) {
370       row = gordering[i] - rstart;
371       PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &nnz[i]));
372       nnzA[row] = 0;
373       for (j = 0; j < nnz[i]; j++) {
374         if (aj[j] < m) nnzA[row]++;
375       }
376       nnzB[row] = nnz[i] - nnzA[row];
377     }
378     PetscCall(MatMPIAIJSetPreallocation(A, 0, nnzA, 0, nnzB));
379     PetscCall(PetscFree3(nnzA, nnzB, nnz));
380   }
381   for (i = 0; i < m; i++) {
382     PetscInt ncols;
383     row = gordering[i];
384 
385     PetscStackCallExternalVoid(",ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &ncols));
386     for (j = 0; j < ncols; j++) aj[j] = gordering[aj[j]];
387     PetscCall(MatSetValues(A, 1, &row, ncols, aj, aa, INSERT_VALUES));
388   }
389   PetscStackCallExternalVoid("ML_free", ML_free(gordering));
390   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
391   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
392   *newmat = A;
393 
394   PetscCall(PetscFree2(aa, aj));
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 /*
399    PCSetCoordinates_ML
400 
401    Input Parameter:
402    .  pc - the preconditioner context
403 */
404 static PetscErrorCode PCSetCoordinates_ML(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
405 {
406   PC_MG   *mg    = (PC_MG *)pc->data;
407   PC_ML   *pc_ml = (PC_ML *)mg->innerctx;
408   PetscInt arrsz, oldarrsz, bs, my0, kk, ii, nloc, Iend, aloc;
409   Mat      Amat = pc->pmat;
410 
411   /* this function copied and modified from PCSetCoordinates_GEO -TGI */
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(Amat, MAT_CLASSID, 1);
414   PetscCall(MatGetBlockSize(Amat, &bs));
415 
416   PetscCall(MatGetOwnershipRange(Amat, &my0, &Iend));
417   aloc = (Iend - my0);
418   nloc = (Iend - my0) / bs;
419 
420   PetscCheck((nloc == a_nloc) || (aloc == a_nloc), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Number of local blocks %" PetscInt_FMT " must be %" PetscInt_FMT " or %" PetscInt_FMT ".", a_nloc, nloc, aloc);
421 
422   oldarrsz    = pc_ml->dim * pc_ml->nloc;
423   pc_ml->dim  = ndm;
424   pc_ml->nloc = nloc;
425   arrsz       = ndm * nloc;
426 
427   /* create data - syntactic sugar that should be refactored at some point */
428   if (pc_ml->coords == 0 || (oldarrsz != arrsz)) {
429     PetscCall(PetscFree(pc_ml->coords));
430     PetscCall(PetscMalloc1(arrsz, &pc_ml->coords));
431   }
432   for (kk = 0; kk < arrsz; kk++) pc_ml->coords[kk] = -999.;
433   /* copy data in - column oriented */
434   if (nloc == a_nloc) {
435     for (kk = 0; kk < nloc; kk++) {
436       for (ii = 0; ii < ndm; ii++) pc_ml->coords[ii * nloc + kk] = coords[kk * ndm + ii];
437     }
438   } else { /* assumes the coordinates are blocked */
439     for (kk = 0; kk < nloc; kk++) {
440       for (ii = 0; ii < ndm; ii++) pc_ml->coords[ii * nloc + kk] = coords[bs * kk * ndm + ii];
441     }
442   }
443   PetscFunctionReturn(PETSC_SUCCESS);
444 }
445 
446 extern PetscErrorCode PCReset_MG(PC);
447 PetscErrorCode        PCReset_ML(PC pc)
448 {
449   PC_MG   *mg    = (PC_MG *)pc->data;
450   PC_ML   *pc_ml = (PC_ML *)mg->innerctx;
451   PetscInt level, fine_level = pc_ml->Nlevels - 1, dim = pc_ml->dim;
452 
453   PetscFunctionBegin;
454   if (dim) {
455     for (level = 0; level <= fine_level; level++) PetscCall(VecDestroy(&pc_ml->gridctx[level].coords));
456     if (pc_ml->ml_object && pc_ml->ml_object->Grid) {
457       ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)pc_ml->ml_object->Grid[0].Grid;
458       grid_info->x                      = 0; /* do this so ML doesn't try to free coordinates */
459       grid_info->y                      = 0;
460       grid_info->z                      = 0;
461       PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Aggregate_VizAndStats_Clean(pc_ml->ml_object));
462     }
463   }
464   PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Aggregate_Destroy(&pc_ml->agg_object));
465   PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Destroy(&pc_ml->ml_object));
466 
467   if (pc_ml->PetscMLdata) {
468     PetscCall(PetscFree(pc_ml->PetscMLdata->pwork));
469     PetscCall(MatDestroy(&pc_ml->PetscMLdata->Aloc));
470     PetscCall(VecDestroy(&pc_ml->PetscMLdata->x));
471     PetscCall(VecDestroy(&pc_ml->PetscMLdata->y));
472   }
473   PetscCall(PetscFree(pc_ml->PetscMLdata));
474 
475   if (pc_ml->gridctx) {
476     for (level = 0; level < fine_level; level++) {
477       if (pc_ml->gridctx[level].A) PetscCall(MatDestroy(&pc_ml->gridctx[level].A));
478       if (pc_ml->gridctx[level].P) PetscCall(MatDestroy(&pc_ml->gridctx[level].P));
479       if (pc_ml->gridctx[level].R) PetscCall(MatDestroy(&pc_ml->gridctx[level].R));
480       if (pc_ml->gridctx[level].x) PetscCall(VecDestroy(&pc_ml->gridctx[level].x));
481       if (pc_ml->gridctx[level].b) PetscCall(VecDestroy(&pc_ml->gridctx[level].b));
482       if (pc_ml->gridctx[level + 1].r) PetscCall(VecDestroy(&pc_ml->gridctx[level + 1].r));
483     }
484   }
485   PetscCall(PetscFree(pc_ml->gridctx));
486   PetscCall(PetscFree(pc_ml->coords));
487 
488   pc_ml->dim  = 0;
489   pc_ml->nloc = 0;
490   PetscCall(PCReset_MG(pc));
491   PetscFunctionReturn(PETSC_SUCCESS);
492 }
493 
494 /*
495    PCSetUp_ML - Prepares for the use of the ML preconditioner
496                     by setting data structures and options.
497 
498    Input Parameter:
499 .  pc - the preconditioner context
500 
501    Application Interface Routine: PCSetUp()
502 
503    Note:
504    The interface routine PCSetUp() is not usually called directly by
505    the user, but instead is called by PCApply() if necessary.
506 */
507 extern PetscErrorCode PCSetFromOptions_MG(PC, PetscOptionItems *PetscOptionsObject);
508 extern PetscErrorCode PCReset_MG(PC);
509 
510 PetscErrorCode PCSetUp_ML(PC pc)
511 {
512   PetscMPIInt      size;
513   FineGridCtx     *PetscMLdata;
514   ML              *ml_object;
515   ML_Aggregate    *agg_object;
516   ML_Operator     *mlmat;
517   PetscInt         nlocal_allcols, Nlevels, mllevel, level, level1, m, fine_level, bs;
518   Mat              A, Aloc;
519   GridCtx         *gridctx;
520   PC_MG           *mg    = (PC_MG *)pc->data;
521   PC_ML           *pc_ml = (PC_ML *)mg->innerctx;
522   PetscBool        isSeq, isMPI;
523   KSP              smoother;
524   PC               subpc;
525   PetscInt         mesh_level, old_mesh_level;
526   MatInfo          info;
527   static PetscBool cite = PETSC_FALSE;
528 
529   PetscFunctionBegin;
530   PetscCall(PetscCitationsRegister("@TechReport{ml_users_guide,\n  author = {M. Sala and J.J. Hu and R.S. Tuminaro},\n  title = {{ML}3.1 {S}moothed {A}ggregation {U}ser's {G}uide},\n  institution =  {Sandia National Laboratories},\n  number = "
531                                    "{SAND2004-4821},\n  year = 2004\n}\n",
532                                    &cite));
533   A = pc->pmat;
534   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
535 
536   if (pc->setupcalled) {
537     if (pc->flag == SAME_NONZERO_PATTERN && pc_ml->reuse_interpolation) {
538       /*
539        Reuse interpolaton instead of recomputing aggregates and updating the whole hierarchy. This is less expensive for
540        multiple solves in which the matrix is not changing too quickly.
541        */
542       ml_object             = pc_ml->ml_object;
543       gridctx               = pc_ml->gridctx;
544       Nlevels               = pc_ml->Nlevels;
545       fine_level            = Nlevels - 1;
546       gridctx[fine_level].A = A;
547 
548       PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeq));
549       PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isMPI));
550       PetscCheck(isMPI || isSeq, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Matrix type '%s' cannot be used with ML. ML can only handle AIJ matrices.", ((PetscObject)A)->type_name);
551       if (isMPI) {
552         PetscCall(MatConvert_MPIAIJ_ML(A, NULL, MAT_INITIAL_MATRIX, &Aloc));
553       } else {
554         Aloc = A;
555         PetscCall(PetscObjectReference((PetscObject)Aloc));
556       }
557 
558       PetscCall(MatGetSize(Aloc, &m, &nlocal_allcols));
559       PetscMLdata = pc_ml->PetscMLdata;
560       PetscCall(MatDestroy(&PetscMLdata->Aloc));
561       PetscMLdata->A    = A;
562       PetscMLdata->Aloc = Aloc;
563       PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Init_Amatrix(ml_object, 0, m, m, PetscMLdata));
564       PetscStackCallExternalVoid("ML_Set_Amatrix_Matvec", ML_Set_Amatrix_Matvec(ml_object, 0, PetscML_matvec));
565 
566       mesh_level = ml_object->ML_finest_level;
567       while (ml_object->SingleLevel[mesh_level].Rmat->to) {
568         old_mesh_level = mesh_level;
569         mesh_level     = ml_object->SingleLevel[mesh_level].Rmat->to->levelnum;
570 
571         /* clean and regenerate A */
572         mlmat = &(ml_object->Amat[mesh_level]);
573         PetscStackCallExternalVoid("ML_Operator_Clean", ML_Operator_Clean(mlmat));
574         PetscStackCallExternalVoid("ML_Operator_Init", ML_Operator_Init(mlmat, ml_object->comm));
575         PetscStackCallExternalVoid("ML_Gen_AmatrixRAP", ML_Gen_AmatrixRAP(ml_object, old_mesh_level, mesh_level));
576       }
577 
578       level = fine_level - 1;
579       if (size == 1) { /* convert ML P, R and A into seqaij format */
580         for (mllevel = 1; mllevel < Nlevels; mllevel++) {
581           mlmat = &(ml_object->Amat[mllevel]);
582           PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_REUSE_MATRIX, &gridctx[level].A));
583           level--;
584         }
585       } else { /* convert ML P and R into shell format, ML A into mpiaij format */
586         for (mllevel = 1; mllevel < Nlevels; mllevel++) {
587           mlmat = &(ml_object->Amat[mllevel]);
588           PetscCall(MatWrapML_MPIAIJ(mlmat, MAT_REUSE_MATRIX, &gridctx[level].A));
589           level--;
590         }
591       }
592 
593       for (level = 0; level < fine_level; level++) {
594         if (level > 0) PetscCall(PCMGSetResidual(pc, level, PCMGResidualDefault, gridctx[level].A));
595         PetscCall(KSPSetOperators(gridctx[level].ksp, gridctx[level].A, gridctx[level].A));
596       }
597       PetscCall(PCMGSetResidual(pc, fine_level, PCMGResidualDefault, gridctx[fine_level].A));
598       PetscCall(KSPSetOperators(gridctx[fine_level].ksp, gridctx[level].A, gridctx[fine_level].A));
599 
600       PetscCall(PCSetUp_MG(pc));
601       PetscFunctionReturn(PETSC_SUCCESS);
602     } else {
603       /* since ML can change the size of vectors/matrices at any level we must destroy everything */
604       PetscCall(PCReset_ML(pc));
605     }
606   }
607 
608   /* setup special features of PCML */
609   /* convert A to Aloc to be used by ML at fine grid */
610   pc_ml->size = size;
611   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeq));
612   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isMPI));
613   PetscCheck(isMPI || isSeq, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Matrix type '%s' cannot be used with ML. ML can only handle AIJ matrices.", ((PetscObject)A)->type_name);
614   if (isMPI) {
615     PetscCall(MatConvert_MPIAIJ_ML(A, NULL, MAT_INITIAL_MATRIX, &Aloc));
616   } else {
617     Aloc = A;
618     PetscCall(PetscObjectReference((PetscObject)Aloc));
619   }
620 
621   /* create and initialize struct 'PetscMLdata' */
622   PetscCall(PetscNew(&PetscMLdata));
623   pc_ml->PetscMLdata = PetscMLdata;
624   PetscCall(PetscMalloc1(Aloc->cmap->n + 1, &PetscMLdata->pwork));
625 
626   PetscCall(MatCreateVecs(Aloc, &PetscMLdata->x, &PetscMLdata->y));
627 
628   PetscMLdata->A    = A;
629   PetscMLdata->Aloc = Aloc;
630   if (pc_ml->dim) { /* create vecs around the coordinate data given */
631     PetscInt   i, j, dim = pc_ml->dim;
632     PetscInt   nloc = pc_ml->nloc, nlocghost;
633     PetscReal *ghostedcoords;
634 
635     PetscCall(MatGetBlockSize(A, &bs));
636     nlocghost = Aloc->cmap->n / bs;
637     PetscCall(PetscMalloc1(dim * nlocghost, &ghostedcoords));
638     for (i = 0; i < dim; i++) {
639       /* copy coordinate values into first component of pwork */
640       for (j = 0; j < nloc; j++) PetscMLdata->pwork[bs * j] = pc_ml->coords[nloc * i + j];
641       /* get the ghost values */
642       PetscCall(PetscML_comm(PetscMLdata->pwork, PetscMLdata));
643       /* write into the vector */
644       for (j = 0; j < nlocghost; j++) ghostedcoords[i * nlocghost + j] = PetscMLdata->pwork[bs * j];
645     }
646     /* replace the original coords with the ghosted coords, because these are
647      * what ML needs */
648     PetscCall(PetscFree(pc_ml->coords));
649     pc_ml->coords = ghostedcoords;
650   }
651 
652   /* create ML discretization matrix at fine grid */
653   /* ML requires input of fine-grid matrix. It determines nlevels. */
654   PetscCall(MatGetSize(Aloc, &m, &nlocal_allcols));
655   PetscCall(MatGetBlockSize(A, &bs));
656   PetscStackCallExternalVoid("ML_Create", ML_Create(&ml_object, pc_ml->MaxNlevels));
657   PetscStackCallExternalVoid("ML_Comm_Set_UsrComm", ML_Comm_Set_UsrComm(ml_object->comm, PetscObjectComm((PetscObject)A)));
658   pc_ml->ml_object = ml_object;
659   PetscStackCallExternalVoid("ML_Init_Amatrix", ML_Init_Amatrix(ml_object, 0, m, m, PetscMLdata));
660   PetscStackCallExternalVoid("ML_Set_Amatrix_Getrow", ML_Set_Amatrix_Getrow(ml_object, 0, PetscML_getrow, ML_PetscML_comm, nlocal_allcols));
661   PetscStackCallExternalVoid("ML_Set_Amatrix_Matvec", ML_Set_Amatrix_Matvec(ml_object, 0, PetscML_matvec));
662 
663   PetscStackCallExternalVoid("ML_Set_Symmetrize", ML_Set_Symmetrize(ml_object, pc_ml->Symmetrize ? ML_YES : ML_NO));
664 
665   /* aggregation */
666   PetscStackCallExternalVoid("ML_Aggregate_Create", ML_Aggregate_Create(&agg_object));
667   pc_ml->agg_object = agg_object;
668 
669   {
670     MatNullSpace mnull;
671     PetscCall(MatGetNearNullSpace(A, &mnull));
672     if (pc_ml->nulltype == PCML_NULLSPACE_AUTO) {
673       if (mnull) pc_ml->nulltype = PCML_NULLSPACE_USER;
674       else if (bs > 1) pc_ml->nulltype = PCML_NULLSPACE_BLOCK;
675       else pc_ml->nulltype = PCML_NULLSPACE_SCALAR;
676     }
677     switch (pc_ml->nulltype) {
678     case PCML_NULLSPACE_USER: {
679       PetscScalar       *nullvec;
680       const PetscScalar *v;
681       PetscBool          has_const;
682       PetscInt           i, j, mlocal, nvec, M;
683       const Vec         *vecs;
684 
685       PetscCheck(mnull, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Must provide explicit null space using MatSetNearNullSpace() to use user-specified null space");
686       PetscCall(MatGetSize(A, &M, NULL));
687       PetscCall(MatGetLocalSize(Aloc, &mlocal, NULL));
688       PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
689       PetscCall(PetscMalloc1((nvec + !!has_const) * mlocal, &nullvec));
690       if (has_const)
691         for (i = 0; i < mlocal; i++) nullvec[i] = 1.0 / M;
692       for (i = 0; i < nvec; i++) {
693         PetscCall(VecGetArrayRead(vecs[i], &v));
694         for (j = 0; j < mlocal; j++) nullvec[(i + !!has_const) * mlocal + j] = v[j];
695         PetscCall(VecRestoreArrayRead(vecs[i], &v));
696       }
697       PetscStackCallExternalVoid("ML_Aggregate_Create", PetscCall(ML_Aggregate_Set_NullSpace(agg_object, bs, nvec + !!has_const, nullvec, mlocal)));
698       PetscCall(PetscFree(nullvec));
699     } break;
700     case PCML_NULLSPACE_BLOCK:
701       PetscStackCallExternalVoid("ML_Aggregate_Set_NullSpace", PetscCall(ML_Aggregate_Set_NullSpace(agg_object, bs, bs, 0, 0)));
702       break;
703     case PCML_NULLSPACE_SCALAR:
704       break;
705     default:
706       SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown null space type");
707     }
708   }
709   PetscStackCallExternalVoid("ML_Aggregate_Set_MaxCoarseSize", ML_Aggregate_Set_MaxCoarseSize(agg_object, pc_ml->MaxCoarseSize));
710   /* set options */
711   switch (pc_ml->CoarsenScheme) {
712   case 1:
713     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_Coupled", ML_Aggregate_Set_CoarsenScheme_Coupled(agg_object));
714     break;
715   case 2:
716     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_MIS", ML_Aggregate_Set_CoarsenScheme_MIS(agg_object));
717     break;
718   case 3:
719     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_METIS", ML_Aggregate_Set_CoarsenScheme_METIS(agg_object));
720     break;
721   }
722   PetscStackCallExternalVoid("ML_Aggregate_Set_Threshold", ML_Aggregate_Set_Threshold(agg_object, pc_ml->Threshold));
723   PetscStackCallExternalVoid("ML_Aggregate_Set_DampingFactor", ML_Aggregate_Set_DampingFactor(agg_object, pc_ml->DampingFactor));
724   if (pc_ml->SpectralNormScheme_Anorm) PetscStackCallExternalVoid("ML_Set_SpectralNormScheme_Anorm", ML_Set_SpectralNormScheme_Anorm(ml_object));
725   agg_object->keep_agg_information      = (int)pc_ml->KeepAggInfo;
726   agg_object->keep_P_tentative          = (int)pc_ml->Reusable;
727   agg_object->block_scaled_SA           = (int)pc_ml->BlockScaling;
728   agg_object->minimizing_energy         = (int)pc_ml->EnergyMinimization;
729   agg_object->minimizing_energy_droptol = (double)pc_ml->EnergyMinimizationDropTol;
730   agg_object->cheap_minimizing_energy   = (int)pc_ml->EnergyMinimizationCheap;
731 
732   if (pc_ml->Aux) {
733     PetscCheck(pc_ml->dim, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Auxiliary matrix requires coordinates");
734     ml_object->Amat[0].aux_data->threshold = pc_ml->AuxThreshold;
735     ml_object->Amat[0].aux_data->enable    = 1;
736     ml_object->Amat[0].aux_data->max_level = 10;
737     ml_object->Amat[0].num_PDEs            = bs;
738   }
739 
740   PetscCall(MatGetInfo(A, MAT_LOCAL, &info));
741   ml_object->Amat[0].N_nonzeros = (int)info.nz_used;
742 
743   if (pc_ml->dim) {
744     PetscInt                i, dim = pc_ml->dim;
745     ML_Aggregate_Viz_Stats *grid_info;
746     PetscInt                nlocghost;
747 
748     PetscCall(MatGetBlockSize(A, &bs));
749     nlocghost = Aloc->cmap->n / bs;
750 
751     PetscStackCallExternalVoid("ML_Aggregate_VizAndStats_Setup(", ML_Aggregate_VizAndStats_Setup(ml_object)); /* create ml info for coords */
752     grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Grid[0].Grid;
753     for (i = 0; i < dim; i++) {
754       /* set the finest level coordinates to point to the column-order array
755        * in pc_ml */
756       /* NOTE: must point away before VizAndStats_Clean so ML doesn't free */
757       switch (i) {
758       case 0:
759         grid_info->x = pc_ml->coords + nlocghost * i;
760         break;
761       case 1:
762         grid_info->y = pc_ml->coords + nlocghost * i;
763         break;
764       case 2:
765         grid_info->z = pc_ml->coords + nlocghost * i;
766         break;
767       default:
768         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_SIZ, "PCML coordinate dimension must be <= 3");
769       }
770     }
771     grid_info->Ndim = dim;
772   }
773 
774   /* repartitioning */
775   if (pc_ml->Repartition) {
776     PetscStackCallExternalVoid("ML_Repartition_Activate", ML_Repartition_Activate(ml_object));
777     PetscStackCallExternalVoid("ML_Repartition_Set_LargestMinMaxRatio", ML_Repartition_Set_LargestMinMaxRatio(ml_object, pc_ml->MaxMinRatio));
778     PetscStackCallExternalVoid("ML_Repartition_Set_MinPerProc", ML_Repartition_Set_MinPerProc(ml_object, pc_ml->MinPerProc));
779     PetscStackCallExternalVoid("ML_Repartition_Set_PutOnSingleProc", ML_Repartition_Set_PutOnSingleProc(ml_object, pc_ml->PutOnSingleProc));
780 #if 0 /* Function not yet defined in ml-6.2 */
781     /* I'm not sure what compatibility issues might crop up if we partitioned
782      * on the finest level, so to be safe repartition starts on the next
783      * finest level (reflection default behavior in
784      * ml_MultiLevelPreconditioner) */
785     PetscStackCallExternalVoid("ML_Repartition_Set_StartLevel",ML_Repartition_Set_StartLevel(ml_object,1));
786 #endif
787 
788     if (!pc_ml->RepartitionType) {
789       PetscInt i;
790 
791       PetscCheck(pc_ml->dim, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "ML Zoltan repartitioning requires coordinates");
792       PetscStackCallExternalVoid("ML_Repartition_Set_Partitioner", ML_Repartition_Set_Partitioner(ml_object, ML_USEZOLTAN));
793       PetscStackCallExternalVoid("ML_Aggregate_Set_Dimensions", ML_Aggregate_Set_Dimensions(agg_object, pc_ml->dim));
794 
795       for (i = 0; i < ml_object->ML_num_levels; i++) {
796         ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Grid[i].Grid;
797         grid_info->zoltan_type            = pc_ml->ZoltanScheme + 1; /* ml numbers options 1, 2, 3 */
798         /* defaults from ml_agg_info.c */
799         grid_info->zoltan_estimated_its = 40; /* only relevant to hypergraph / fast hypergraph */
800         grid_info->zoltan_timers        = 0;
801         grid_info->smoothing_steps      = 4; /* only relevant to hypergraph / fast hypergraph */
802       }
803     } else {
804       PetscStackCallExternalVoid("ML_Repartition_Set_Partitioner", ML_Repartition_Set_Partitioner(ml_object, ML_USEPARMETIS));
805     }
806   }
807 
808   if (pc_ml->OldHierarchy) {
809     PetscStackCallExternalVoid("ML_Gen_MGHierarchy_UsingAggregation", Nlevels = ML_Gen_MGHierarchy_UsingAggregation(ml_object, 0, ML_INCREASING, agg_object));
810   } else {
811     PetscStackCallExternalVoid("ML_Gen_MultiLevelHierarchy_UsingAggregation", Nlevels = ML_Gen_MultiLevelHierarchy_UsingAggregation(ml_object, 0, ML_INCREASING, agg_object));
812   }
813   PetscCheck(Nlevels > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Nlevels %d must > 0", Nlevels);
814   pc_ml->Nlevels = Nlevels;
815   fine_level     = Nlevels - 1;
816 
817   PetscCall(PCMGSetLevels(pc, Nlevels, NULL));
818   /* set default smoothers */
819   for (level = 1; level <= fine_level; level++) {
820     PetscCall(PCMGGetSmoother(pc, level, &smoother));
821     PetscCall(KSPSetType(smoother, KSPRICHARDSON));
822     PetscCall(KSPGetPC(smoother, &subpc));
823     PetscCall(PCSetType(subpc, PCSOR));
824   }
825   PetscObjectOptionsBegin((PetscObject)pc);
826   PetscCall(PCSetFromOptions_MG(pc, PetscOptionsObject)); /* should be called in PCSetFromOptions_ML(), but cannot be called prior to PCMGSetLevels() */
827   PetscOptionsEnd();
828 
829   PetscCall(PetscMalloc1(Nlevels, &gridctx));
830 
831   pc_ml->gridctx = gridctx;
832 
833   /* wrap ML matrices by PETSc shell matrices at coarsened grids.
834      Level 0 is the finest grid for ML, but coarsest for PETSc! */
835   gridctx[fine_level].A = A;
836 
837   level = fine_level - 1;
838   /* TODO: support for GPUs */
839   if (size == 1) { /* convert ML P, R and A into seqaij format */
840     for (mllevel = 1; mllevel < Nlevels; mllevel++) {
841       mlmat = &(ml_object->Pmat[mllevel]);
842       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].P));
843       mlmat = &(ml_object->Rmat[mllevel - 1]);
844       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].R));
845 
846       mlmat = &(ml_object->Amat[mllevel]);
847       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].A));
848       level--;
849     }
850   } else { /* convert ML P and R into shell format, ML A into mpiaij format */
851     for (mllevel = 1; mllevel < Nlevels; mllevel++) {
852       mlmat = &(ml_object->Pmat[mllevel]);
853       PetscCall(MatWrapML_SHELL(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].P));
854       mlmat = &(ml_object->Rmat[mllevel - 1]);
855       PetscCall(MatWrapML_SHELL(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].R));
856 
857       mlmat = &(ml_object->Amat[mllevel]);
858       PetscCall(MatWrapML_MPIAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].A));
859       level--;
860     }
861   }
862 
863   /* create vectors and ksp at all levels */
864   for (level = 0; level < fine_level; level++) {
865     level1 = level + 1;
866 
867     PetscCall(MatCreateVecs(gridctx[level].A, &gridctx[level].x, &gridctx[level].b));
868     PetscCall(MatCreateVecs(gridctx[level1].A, NULL, &gridctx[level1].r));
869     PetscCall(PCMGSetX(pc, level, gridctx[level].x));
870     PetscCall(PCMGSetRhs(pc, level, gridctx[level].b));
871     PetscCall(PCMGSetR(pc, level1, gridctx[level1].r));
872 
873     if (level == 0) {
874       PetscCall(PCMGGetCoarseSolve(pc, &gridctx[level].ksp));
875     } else {
876       PetscCall(PCMGGetSmoother(pc, level, &gridctx[level].ksp));
877     }
878   }
879   PetscCall(PCMGGetSmoother(pc, fine_level, &gridctx[fine_level].ksp));
880 
881   /* create coarse level and the interpolation between the levels */
882   for (level = 0; level < fine_level; level++) {
883     level1 = level + 1;
884 
885     PetscCall(PCMGSetInterpolation(pc, level1, gridctx[level].P));
886     PetscCall(PCMGSetRestriction(pc, level1, gridctx[level].R));
887     if (level > 0) PetscCall(PCMGSetResidual(pc, level, PCMGResidualDefault, gridctx[level].A));
888     PetscCall(KSPSetOperators(gridctx[level].ksp, gridctx[level].A, gridctx[level].A));
889   }
890   PetscCall(PCMGSetResidual(pc, fine_level, PCMGResidualDefault, gridctx[fine_level].A));
891   PetscCall(KSPSetOperators(gridctx[fine_level].ksp, gridctx[level].A, gridctx[fine_level].A));
892 
893   /* put coordinate info in levels */
894   if (pc_ml->dim) {
895     PetscInt   i, j, dim = pc_ml->dim;
896     PetscInt   bs, nloc;
897     PC         subpc;
898     PetscReal *array;
899 
900     level = fine_level;
901     for (mllevel = 0; mllevel < Nlevels; mllevel++) {
902       ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Amat[mllevel].to->Grid->Grid;
903       MPI_Comm                comm      = ((PetscObject)gridctx[level].A)->comm;
904 
905       PetscCall(MatGetBlockSize(gridctx[level].A, &bs));
906       PetscCall(MatGetLocalSize(gridctx[level].A, NULL, &nloc));
907       nloc /= bs; /* number of local nodes */
908 
909       PetscCall(VecCreate(comm, &gridctx[level].coords));
910       PetscCall(VecSetSizes(gridctx[level].coords, dim * nloc, PETSC_DECIDE));
911       PetscCall(VecSetType(gridctx[level].coords, VECMPI));
912       PetscCall(VecGetArray(gridctx[level].coords, &array));
913       for (j = 0; j < nloc; j++) {
914         for (i = 0; i < dim; i++) {
915           switch (i) {
916           case 0:
917             array[dim * j + i] = grid_info->x[j];
918             break;
919           case 1:
920             array[dim * j + i] = grid_info->y[j];
921             break;
922           case 2:
923             array[dim * j + i] = grid_info->z[j];
924             break;
925           default:
926             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_SIZ, "PCML coordinate dimension must be <= 3");
927           }
928         }
929       }
930 
931       /* passing coordinates to smoothers/coarse solver, should they need them */
932       PetscCall(KSPGetPC(gridctx[level].ksp, &subpc));
933       PetscCall(PCSetCoordinates(subpc, dim, nloc, array));
934       PetscCall(VecRestoreArray(gridctx[level].coords, &array));
935       level--;
936     }
937   }
938 
939   /* setupcalled is set to 0 so that MG is setup from scratch */
940   pc->setupcalled = 0;
941   PetscCall(PCSetUp_MG(pc));
942   PetscFunctionReturn(PETSC_SUCCESS);
943 }
944 
945 /*
946    PCDestroy_ML - Destroys the private context for the ML preconditioner
947    that was created with PCCreate_ML().
948 
949    Input Parameter:
950 .  pc - the preconditioner context
951 
952    Application Interface Routine: PCDestroy()
953 */
954 PetscErrorCode PCDestroy_ML(PC pc)
955 {
956   PC_MG *mg    = (PC_MG *)pc->data;
957   PC_ML *pc_ml = (PC_ML *)mg->innerctx;
958 
959   PetscFunctionBegin;
960   PetscCall(PCReset_ML(pc));
961   PetscCall(PetscFree(pc_ml));
962   PetscCall(PCDestroy_MG(pc));
963   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
964   PetscFunctionReturn(PETSC_SUCCESS);
965 }
966 
967 PetscErrorCode PCSetFromOptions_ML(PC pc, PetscOptionItems *PetscOptionsObject)
968 {
969   PetscInt    indx, PrintLevel, partindx;
970   const char *scheme[] = {"Uncoupled", "Coupled", "MIS", "METIS"};
971   const char *part[]   = {"Zoltan", "ParMETIS"};
972 #if defined(HAVE_ML_ZOLTAN)
973   const char *zscheme[] = {"RCB", "hypergraph", "fast_hypergraph"};
974 #endif
975   PC_MG      *mg    = (PC_MG *)pc->data;
976   PC_ML      *pc_ml = (PC_ML *)mg->innerctx;
977   PetscMPIInt size;
978   MPI_Comm    comm;
979 
980   PetscFunctionBegin;
981   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
982   PetscCallMPI(MPI_Comm_size(comm, &size));
983   PetscOptionsHeadBegin(PetscOptionsObject, "ML options");
984 
985   PrintLevel = 0;
986   indx       = 0;
987   partindx   = 0;
988 
989   PetscCall(PetscOptionsInt("-pc_ml_PrintLevel", "Print level", "ML_Set_PrintLevel", PrintLevel, &PrintLevel, NULL));
990   PetscStackCallExternalVoid("ML_Set_PrintLevel", ML_Set_PrintLevel(PrintLevel));
991   PetscCall(PetscOptionsInt("-pc_ml_maxNlevels", "Maximum number of levels", "None", pc_ml->MaxNlevels, &pc_ml->MaxNlevels, NULL));
992   PetscCall(PetscOptionsInt("-pc_ml_maxCoarseSize", "Maximum coarsest mesh size", "ML_Aggregate_Set_MaxCoarseSize", pc_ml->MaxCoarseSize, &pc_ml->MaxCoarseSize, NULL));
993   PetscCall(PetscOptionsEList("-pc_ml_CoarsenScheme", "Aggregate Coarsen Scheme", "ML_Aggregate_Set_CoarsenScheme_*", scheme, 4, scheme[0], &indx, NULL));
994 
995   pc_ml->CoarsenScheme = indx;
996 
997   PetscCall(PetscOptionsReal("-pc_ml_DampingFactor", "P damping factor", "ML_Aggregate_Set_DampingFactor", pc_ml->DampingFactor, &pc_ml->DampingFactor, NULL));
998   PetscCall(PetscOptionsReal("-pc_ml_Threshold", "Smoother drop tol", "ML_Aggregate_Set_Threshold", pc_ml->Threshold, &pc_ml->Threshold, NULL));
999   PetscCall(PetscOptionsBool("-pc_ml_SpectralNormScheme_Anorm", "Method used for estimating spectral radius", "ML_Set_SpectralNormScheme_Anorm", pc_ml->SpectralNormScheme_Anorm, &pc_ml->SpectralNormScheme_Anorm, NULL));
1000   PetscCall(PetscOptionsBool("-pc_ml_Symmetrize", "Symmetrize aggregation", "ML_Set_Symmetrize", pc_ml->Symmetrize, &pc_ml->Symmetrize, NULL));
1001   PetscCall(PetscOptionsBool("-pc_ml_BlockScaling", "Scale all dofs at each node together", "None", pc_ml->BlockScaling, &pc_ml->BlockScaling, NULL));
1002   PetscCall(PetscOptionsEnum("-pc_ml_nullspace", "Which type of null space information to use", "None", PCMLNullSpaceTypes, (PetscEnum)pc_ml->nulltype, (PetscEnum *)&pc_ml->nulltype, NULL));
1003   PetscCall(PetscOptionsInt("-pc_ml_EnergyMinimization", "Energy minimization norm type (0=no minimization; see ML manual for 1,2,3; -1 and 4 undocumented)", "None", pc_ml->EnergyMinimization, &pc_ml->EnergyMinimization, NULL));
1004   PetscCall(PetscOptionsBool("-pc_ml_reuse_interpolation", "Reuse the interpolation operators when possible (cheaper, weaker when matrix entries change a lot)", "None", pc_ml->reuse_interpolation, &pc_ml->reuse_interpolation, NULL));
1005   /*
1006     The following checks a number of conditions.  If we let this stuff slip by, then ML's error handling will take over.
1007     This is suboptimal because it amounts to calling exit(1) so we check for the most common conditions.
1008 
1009     We also try to set some sane defaults when energy minimization is activated, otherwise it's hard to find a working
1010     combination of options and ML's exit(1) explanations don't help matters.
1011   */
1012   PetscCheck(pc_ml->EnergyMinimization >= -1 && pc_ml->EnergyMinimization <= 4, comm, PETSC_ERR_ARG_OUTOFRANGE, "EnergyMinimization must be in range -1..4");
1013   PetscCheck(pc_ml->EnergyMinimization != 4 || size == 1, comm, PETSC_ERR_SUP, "Energy minimization type 4 does not work in parallel");
1014   if (pc_ml->EnergyMinimization == 4) PetscCall(PetscInfo(pc, "Mandel's energy minimization scheme is experimental and broken in ML-6.2\n"));
1015   if (pc_ml->EnergyMinimization) PetscCall(PetscOptionsReal("-pc_ml_EnergyMinimizationDropTol", "Energy minimization drop tolerance", "None", pc_ml->EnergyMinimizationDropTol, &pc_ml->EnergyMinimizationDropTol, NULL));
1016   if (pc_ml->EnergyMinimization == 2) {
1017     /* According to ml_MultiLevelPreconditioner.cpp, this option is only meaningful for norm type (2) */
1018     PetscCall(PetscOptionsBool("-pc_ml_EnergyMinimizationCheap", "Use cheaper variant of norm type 2", "None", pc_ml->EnergyMinimizationCheap, &pc_ml->EnergyMinimizationCheap, NULL));
1019   }
1020   /* energy minimization sometimes breaks if this is turned off, the more classical stuff should be okay without it */
1021   if (pc_ml->EnergyMinimization) pc_ml->KeepAggInfo = PETSC_TRUE;
1022   PetscCall(PetscOptionsBool("-pc_ml_KeepAggInfo", "Allows the preconditioner to be reused, or auxiliary matrices to be generated", "None", pc_ml->KeepAggInfo, &pc_ml->KeepAggInfo, NULL));
1023   /* Option (-1) doesn't work at all (calls exit(1)) if the tentative restriction operator isn't stored. */
1024   if (pc_ml->EnergyMinimization == -1) pc_ml->Reusable = PETSC_TRUE;
1025   PetscCall(PetscOptionsBool("-pc_ml_Reusable", "Store intermedaiate data structures so that the multilevel hierarchy is reusable", "None", pc_ml->Reusable, &pc_ml->Reusable, NULL));
1026   /*
1027     ML's C API is severely underdocumented and lacks significant functionality.  The C++ API calls
1028     ML_Gen_MultiLevelHierarchy_UsingAggregation() which is a modified copy (!?) of the documented function
1029     ML_Gen_MGHierarchy_UsingAggregation().  This modification, however, does not provide a strict superset of the
1030     functionality in the old function, so some users may still want to use it.  Note that many options are ignored in
1031     this context, but ML doesn't provide a way to find out which ones.
1032    */
1033   PetscCall(PetscOptionsBool("-pc_ml_OldHierarchy", "Use old routine to generate hierarchy", "None", pc_ml->OldHierarchy, &pc_ml->OldHierarchy, NULL));
1034   PetscCall(PetscOptionsBool("-pc_ml_repartition", "Allow ML to repartition levels of the hierarchy", "ML_Repartition_Activate", pc_ml->Repartition, &pc_ml->Repartition, NULL));
1035   if (pc_ml->Repartition) {
1036     PetscCall(PetscOptionsReal("-pc_ml_repartitionMaxMinRatio", "Acceptable ratio of repartitioned sizes", "ML_Repartition_Set_LargestMinMaxRatio", pc_ml->MaxMinRatio, &pc_ml->MaxMinRatio, NULL));
1037     PetscCall(PetscOptionsInt("-pc_ml_repartitionMinPerProc", "Smallest repartitioned size", "ML_Repartition_Set_MinPerProc", pc_ml->MinPerProc, &pc_ml->MinPerProc, NULL));
1038     PetscCall(PetscOptionsInt("-pc_ml_repartitionPutOnSingleProc", "Problem size automatically repartitioned to one processor", "ML_Repartition_Set_PutOnSingleProc", pc_ml->PutOnSingleProc, &pc_ml->PutOnSingleProc, NULL));
1039 #if defined(HAVE_ML_ZOLTAN)
1040     partindx = 0;
1041     PetscCall(PetscOptionsEList("-pc_ml_repartitionType", "Repartitioning library to use", "ML_Repartition_Set_Partitioner", part, 2, part[0], &partindx, NULL));
1042 
1043     pc_ml->RepartitionType = partindx;
1044     if (!partindx) {
1045       PetscInt zindx = 0;
1046 
1047       PetscCall(PetscOptionsEList("-pc_ml_repartitionZoltanScheme", "Repartitioning scheme to use", "None", zscheme, 3, zscheme[0], &zindx, NULL));
1048 
1049       pc_ml->ZoltanScheme = zindx;
1050     }
1051 #else
1052     partindx = 1;
1053     PetscCall(PetscOptionsEList("-pc_ml_repartitionType", "Repartitioning library to use", "ML_Repartition_Set_Partitioner", part, 2, part[1], &partindx, NULL));
1054     pc_ml->RepartitionType = partindx;
1055     PetscCheck(partindx, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP_SYS, "ML not compiled with Zoltan");
1056 #endif
1057     PetscCall(PetscOptionsBool("-pc_ml_Aux", "Aggregate using auxiliary coordinate-based laplacian", "None", pc_ml->Aux, &pc_ml->Aux, NULL));
1058     PetscCall(PetscOptionsReal("-pc_ml_AuxThreshold", "Auxiliary smoother drop tol", "None", pc_ml->AuxThreshold, &pc_ml->AuxThreshold, NULL));
1059   }
1060   PetscOptionsHeadEnd();
1061   PetscFunctionReturn(PETSC_SUCCESS);
1062 }
1063 
1064 /*
1065    PCCreate_ML - Creates a ML preconditioner context, PC_ML,
1066    and sets this as the private data within the generic preconditioning
1067    context, PC, that was created within PCCreate().
1068 
1069    Input Parameter:
1070 .  pc - the preconditioner context
1071 
1072    Application Interface Routine: PCCreate()
1073 */
1074 
1075 /*MC
1076      PCML - Use the SNL ML algebraic multigrid preconditioner.
1077 
1078    Options Database Keys:
1079    Multigrid options(inherited):
1080 +  -pc_mg_cycle_type <v> - v for V cycle, w for W-cycle (`PCMGSetCycleType()`)
1081 .  -pc_mg_distinct_smoothup - Should one configure the up and down smoothers separately (`PCMGSetDistinctSmoothUp()`)
1082 -  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1083 
1084    ML Options Database Key:
1085 +  -pc_ml_PrintLevel <0> - Print level (`ML_Set_PrintLevel()`)
1086 .  -pc_ml_maxNlevels <10> - Maximum number of levels (None)
1087 .  -pc_ml_maxCoarseSize <1> - Maximum coarsest mesh size (`ML_Aggregate_Set_MaxCoarseSize()`)
1088 .  -pc_ml_CoarsenScheme <Uncoupled> - (one of) Uncoupled Coupled MIS METIS
1089 .  -pc_ml_DampingFactor <1.33333> - P damping factor (`ML_Aggregate_Set_DampingFactor()`)
1090 .  -pc_ml_Threshold <0> - Smoother drop tol (`ML_Aggregate_Set_Threshold()`)
1091 .  -pc_ml_SpectralNormScheme_Anorm <false> - Method used for estimating spectral radius (`ML_Set_SpectralNormScheme_Anorm()`)
1092 .  -pc_ml_repartition <false> - Allow ML to repartition levels of the hierarchy (`ML_Repartition_Activate()`)
1093 .  -pc_ml_repartitionMaxMinRatio <1.3> - Acceptable ratio of repartitioned sizes (`ML_Repartition_Set_LargestMinMaxRatio()`)
1094 .  -pc_ml_repartitionMinPerProc <512> - Smallest repartitioned size (`ML_Repartition_Set_MinPerProc()`)
1095 .  -pc_ml_repartitionPutOnSingleProc <5000> - Problem size automatically repartitioned to one processor (`ML_Repartition_Set_PutOnSingleProc()`)
1096 .  -pc_ml_repartitionType <Zoltan> - Repartitioning library to use (`ML_Repartition_Set_Partitioner()`)
1097 .  -pc_ml_repartitionZoltanScheme <RCB> - Repartitioning scheme to use (None)
1098 .  -pc_ml_Aux <false> - Aggregate using auxiliary coordinate-based Laplacian (None)
1099 -  -pc_ml_AuxThreshold <0.0> - Auxiliary smoother drop tol (None)
1100 
1101    Level: intermediate
1102 
1103    Developer Note:
1104    The coarser grid matrices and restriction/interpolation
1105    operators are computed by ML, with the matrices converted to PETSc matrices in `MATAIJ` format
1106    and the restriction/interpolation operators wrapped as PETSc shell matrices.
1107 
1108 .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCMGType`, `PCMG`, `PCHYPRE`, `PCGAMG`,
1109           `PCMGSetLevels()`, `PCMGGetLevels()`, `PCMGSetType()`, `MPSetCycles()`, `PCMGSetDistinctSmoothUp()`,
1110           `PCMGGetCoarseSolve()`, `PCMGSetResidual()`, `PCMGSetInterpolation()`,
1111           `PCMGSetRestriction()`, `PCMGGetSmoother()`, `PCMGGetSmootherUp()`, `PCMGGetSmootherDown()`,
1112           `PCMGSetCycleTypeOnLevel()`, `PCMGSetRhs()`, `PCMGSetX()`, `PCMGSetR()`
1113 M*/
1114 
1115 PETSC_EXTERN PetscErrorCode PCCreate_ML(PC pc)
1116 {
1117   PC_ML *pc_ml;
1118   PC_MG *mg;
1119 
1120   PetscFunctionBegin;
1121   /* PCML is an inherited class of PCMG. Initialize pc as PCMG */
1122   PetscCall(PCSetType(pc, PCMG)); /* calls PCCreate_MG() and MGCreate_Private() */
1123   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCML));
1124   /* Since PCMG tries to use DM associated with PC must delete it */
1125   PetscCall(DMDestroy(&pc->dm));
1126   PetscCall(PCMGSetGalerkin(pc, PC_MG_GALERKIN_EXTERNAL));
1127   mg = (PC_MG *)pc->data;
1128 
1129   /* create a supporting struct and attach it to pc */
1130   PetscCall(PetscNew(&pc_ml));
1131   mg->innerctx = pc_ml;
1132 
1133   pc_ml->ml_object                = 0;
1134   pc_ml->agg_object               = 0;
1135   pc_ml->gridctx                  = 0;
1136   pc_ml->PetscMLdata              = 0;
1137   pc_ml->Nlevels                  = -1;
1138   pc_ml->MaxNlevels               = 10;
1139   pc_ml->MaxCoarseSize            = 1;
1140   pc_ml->CoarsenScheme            = 1;
1141   pc_ml->Threshold                = 0.0;
1142   pc_ml->DampingFactor            = 4.0 / 3.0;
1143   pc_ml->SpectralNormScheme_Anorm = PETSC_FALSE;
1144   pc_ml->size                     = 0;
1145   pc_ml->dim                      = 0;
1146   pc_ml->nloc                     = 0;
1147   pc_ml->coords                   = 0;
1148   pc_ml->Repartition              = PETSC_FALSE;
1149   pc_ml->MaxMinRatio              = 1.3;
1150   pc_ml->MinPerProc               = 512;
1151   pc_ml->PutOnSingleProc          = 5000;
1152   pc_ml->RepartitionType          = 0;
1153   pc_ml->ZoltanScheme             = 0;
1154   pc_ml->Aux                      = PETSC_FALSE;
1155   pc_ml->AuxThreshold             = 0.0;
1156 
1157   /* allow for coordinates to be passed */
1158   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_ML));
1159 
1160   /* overwrite the pointers of PCMG by the functions of PCML */
1161   pc->ops->setfromoptions = PCSetFromOptions_ML;
1162   pc->ops->setup          = PCSetUp_ML;
1163   pc->ops->reset          = PCReset_ML;
1164   pc->ops->destroy        = PCDestroy_ML;
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167