xref: /petsc/src/ksp/pc/impls/ml/ml.c (revision 2ff79c18c26c94ed8cb599682f680f231dca6444)
1 /*
2    Provides an interface to the ML smoothed Aggregation
3    Note: Something non-obvious breaks -pc_mg_type ADDITIVE for parallel runs
4                                     Jed Brown, see [PETSC #18321, #18449].
5 */
6 #include <petsc/private/pcimpl.h>   /*I "petscpc.h" I*/
7 #include <petsc/private/pcmgimpl.h> /*I "petscksp.h" I*/
8 #include <../src/mat/impls/aij/seq/aij.h>
9 #include <../src/mat/impls/aij/mpi/mpiaij.h>
10 #include <petscdm.h> /* for DMDestroy(&pc->mg) hack */
11 
12 EXTERN_C_BEGIN
13 /* HAVE_CONFIG_H flag is required by ML include files */
14 #ifndef HAVE_CONFIG_H
15   #define HAVE_CONFIG_H
16 #endif
17 #include <ml_include.h>
18 #include <ml_viz_stats.h>
19 EXTERN_C_END
20 
21 typedef enum {
22   PCML_NULLSPACE_AUTO,
23   PCML_NULLSPACE_USER,
24   PCML_NULLSPACE_BLOCK,
25   PCML_NULLSPACE_SCALAR
26 } PCMLNullSpaceType;
27 static const char *const PCMLNullSpaceTypes[] = {"AUTO", "USER", "BLOCK", "SCALAR", "PCMLNullSpaceType", "PCML_NULLSPACE_", 0};
28 
29 /* The context (data structure) at each grid level */
30 typedef struct {
31   Vec x, b, r; /* global vectors */
32   Mat A, P, R;
33   KSP ksp;
34   Vec coords; /* projected by ML, if PCSetCoordinates is called; values packed by node */
35 } GridCtx;
36 
37 /* The context used to input PETSc matrix into ML at fine grid */
38 typedef struct {
39   Mat          A;    /* PETSc matrix in aij format */
40   Mat          Aloc; /* local portion of A to be used by ML */
41   Vec          x, y;
42   ML_Operator *mlmat;
43   PetscScalar *pwork; /* tmp array used by PetscML_comm() */
44 } FineGridCtx;
45 
46 /* The context associates a ML matrix with a PETSc shell matrix */
47 typedef struct {
48   Mat          A;     /* PETSc shell matrix associated with mlmat */
49   ML_Operator *mlmat; /* ML matrix assorciated with A */
50 } Mat_MLShell;
51 
52 /* Private context for the ML preconditioner */
53 typedef struct {
54   ML               *ml_object;
55   ML_Aggregate     *agg_object;
56   GridCtx          *gridctx;
57   FineGridCtx      *PetscMLdata;
58   PetscInt          Nlevels, MaxNlevels, MaxCoarseSize, CoarsenScheme, EnergyMinimization, MinPerProc, PutOnSingleProc, RepartitionType, ZoltanScheme;
59   PetscReal         Threshold, DampingFactor, EnergyMinimizationDropTol, MaxMinRatio, AuxThreshold;
60   PetscBool         SpectralNormScheme_Anorm, BlockScaling, EnergyMinimizationCheap, Symmetrize, OldHierarchy, KeepAggInfo, Reusable, Repartition, Aux;
61   PetscBool         reuse_interpolation;
62   PCMLNullSpaceType nulltype;
63   PetscMPIInt       size; /* size of communicator for pc->pmat */
64   PetscInt          dim;  /* data from PCSetCoordinates(_ML) */
65   PetscInt          nloc;
66   PetscReal        *coords; /* ML has a grid object for each level: the finest grid will point into coords */
67 } PC_ML;
68 
69 static int PetscML_getrow(ML_Operator *ML_data, int N_requested_rows, int requested_rows[], int allocated_space, int columns[], double values[], int row_lengths[])
70 {
71   PetscInt     m, i, j, k = 0, row, *aj;
72   PetscScalar *aa;
73   FineGridCtx *ml = (FineGridCtx *)ML_Get_MyGetrowData(ML_data);
74   Mat_SeqAIJ  *a  = (Mat_SeqAIJ *)ml->Aloc->data;
75 
76   if (MatGetSize(ml->Aloc, &m, NULL)) return 0;
77   for (i = 0; i < N_requested_rows; i++) {
78     row            = requested_rows[i];
79     row_lengths[i] = a->ilen[row];
80     if (allocated_space < k + row_lengths[i]) return 0;
81     if ((row >= 0) || (row <= (m - 1))) {
82       aj = a->j + a->i[row];
83       aa = a->a + a->i[row];
84       for (j = 0; j < row_lengths[i]; j++) {
85         columns[k]  = aj[j];
86         values[k++] = aa[j];
87       }
88     }
89   }
90   return 1;
91 }
92 
93 static PetscErrorCode PetscML_comm(double p[], void *ML_data)
94 {
95   FineGridCtx       *ml = (FineGridCtx *)ML_data;
96   Mat                A  = ml->A;
97   Mat_MPIAIJ        *a  = (Mat_MPIAIJ *)A->data;
98   PetscMPIInt        size;
99   PetscInt           i, in_length = A->rmap->n, out_length = ml->Aloc->cmap->n;
100   const PetscScalar *array;
101 
102   PetscFunctionBegin;
103   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
104   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
105 
106   PetscCall(VecPlaceArray(ml->y, p));
107   PetscCall(VecScatterBegin(a->Mvctx, ml->y, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
108   PetscCall(VecScatterEnd(a->Mvctx, ml->y, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
109   PetscCall(VecResetArray(ml->y));
110   PetscCall(VecGetArrayRead(a->lvec, &array));
111   for (i = in_length; i < out_length; i++) p[i] = array[i - in_length];
112   PetscCall(VecRestoreArrayRead(a->lvec, &array));
113   PetscFunctionReturn(PETSC_SUCCESS);
114 }
115 
116 /*
117   Needed since ML expects an int (*)(double *, void *) but PetscErrorCode may be an
118   enum. Instead of modifying PetscML_comm() it is easier to just wrap it
119 */
120 static int ML_PetscML_comm(double p[], void *ML_data)
121 {
122   return (int)PetscML_comm(p, ML_data);
123 }
124 
125 static int PetscML_matvec(ML_Operator *ML_data, int in_length, double p[], int out_length, double ap[])
126 {
127   FineGridCtx *ml = (FineGridCtx *)ML_Get_MyMatvecData(ML_data);
128   Mat          A = ml->A, Aloc = ml->Aloc;
129   PetscMPIInt  size;
130   PetscScalar *pwork = ml->pwork;
131   PetscInt     i;
132 
133   PetscFunctionBegin;
134   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
135   if (size == 1) {
136     PetscCall(VecPlaceArray(ml->x, p));
137   } else {
138     for (i = 0; i < in_length; i++) pwork[i] = p[i];
139     PetscCall(PetscML_comm(pwork, ml));
140     PetscCall(VecPlaceArray(ml->x, pwork));
141   }
142   PetscCall(VecPlaceArray(ml->y, ap));
143   PetscCall(MatMult(Aloc, ml->x, ml->y));
144   PetscCall(VecResetArray(ml->x));
145   PetscCall(VecResetArray(ml->y));
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148 
149 static PetscErrorCode MatMult_ML(Mat A, Vec x, Vec y)
150 {
151   Mat_MLShell       *shell;
152   PetscScalar       *yarray;
153   const PetscScalar *xarray;
154   PetscInt           x_length, y_length;
155 
156   PetscFunctionBegin;
157   PetscCall(MatShellGetContext(A, &shell));
158   PetscCall(VecGetArrayRead(x, &xarray));
159   PetscCall(VecGetArray(y, &yarray));
160   x_length = shell->mlmat->invec_leng;
161   y_length = shell->mlmat->outvec_leng;
162   PetscStackCallExternalVoid("ML_Operator_Apply", ML_Operator_Apply(shell->mlmat, x_length, (PetscScalar *)xarray, y_length, yarray));
163   PetscCall(VecRestoreArrayRead(x, &xarray));
164   PetscCall(VecRestoreArray(y, &yarray));
165   PetscFunctionReturn(PETSC_SUCCESS);
166 }
167 
168 /* newtype is ignored since only handles one case */
169 static PetscErrorCode MatConvert_MPIAIJ_ML(Mat A, MatType newtype, MatReuse scall, Mat *Aloc)
170 {
171   Mat_MPIAIJ  *mpimat = (Mat_MPIAIJ *)A->data;
172   Mat_SeqAIJ  *mat, *a = (Mat_SeqAIJ *)mpimat->A->data, *b = (Mat_SeqAIJ *)mpimat->B->data;
173   PetscInt    *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
174   PetscScalar *aa, *ba, *ca;
175   PetscInt     am = A->rmap->n, an = A->cmap->n, i, j, k;
176   PetscInt    *ci, *cj, ncols;
177 
178   PetscFunctionBegin;
179   PetscCheck(am == an, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "A must have a square diagonal portion, am: %d != an: %d", am, an);
180   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, (const PetscScalar **)&aa));
181   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, (const PetscScalar **)&ba));
182   if (scall == MAT_INITIAL_MATRIX) {
183     PetscCall(PetscMalloc1(1 + am, &ci));
184     ci[0] = 0;
185     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
186     PetscCall(PetscMalloc1(1 + ci[am], &cj));
187     PetscCall(PetscMalloc1(1 + ci[am], &ca));
188 
189     k = 0;
190     for (i = 0; i < am; i++) {
191       /* diagonal portion of A */
192       ncols = ai[i + 1] - ai[i];
193       for (j = 0; j < ncols; j++) {
194         cj[k]   = *aj++;
195         ca[k++] = *aa++;
196       }
197       /* off-diagonal portion of A */
198       ncols = bi[i + 1] - bi[i];
199       for (j = 0; j < ncols; j++) {
200         cj[k] = an + (*bj);
201         bj++;
202         ca[k++] = *ba++;
203       }
204     }
205     PetscCheck(k == ci[am], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "k: %d != ci[am]: %d", k, ci[am]);
206 
207     /* put together the new matrix */
208     an = mpimat->A->cmap->n + mpimat->B->cmap->n;
209     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, an, ci, cj, ca, Aloc));
210 
211     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
212     /* Since these are PETSc arrays, change flags to free them as necessary. */
213     mat          = (Mat_SeqAIJ *)(*Aloc)->data;
214     mat->free_a  = PETSC_TRUE;
215     mat->free_ij = PETSC_TRUE;
216 
217     mat->nonew = 0;
218   } else if (scall == MAT_REUSE_MATRIX) {
219     mat = (Mat_SeqAIJ *)(*Aloc)->data;
220     ci  = mat->i;
221     cj  = mat->j;
222     ca  = mat->a;
223     for (i = 0; i < am; i++) {
224       /* diagonal portion of A */
225       ncols = ai[i + 1] - ai[i];
226       for (j = 0; j < ncols; j++) *ca++ = *aa++;
227       /* off-diagonal portion of A */
228       ncols = bi[i + 1] - bi[i];
229       for (j = 0; j < ncols; j++) *ca++ = *ba++;
230     }
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
232   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, (const PetscScalar **)&aa));
233   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, (const PetscScalar **)&ba));
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 static PetscErrorCode MatDestroy_ML(Mat A)
238 {
239   Mat_MLShell *shell;
240 
241   PetscFunctionBegin;
242   PetscCall(MatShellGetContext(A, &shell));
243   PetscCall(PetscFree(shell));
244   PetscFunctionReturn(PETSC_SUCCESS);
245 }
246 
247 static PetscErrorCode MatWrapML_SeqAIJ(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
248 {
249   struct ML_CSR_MSRdata *matdata = (struct ML_CSR_MSRdata *)mlmat->data;
250   PetscInt               m = mlmat->outvec_leng, n = mlmat->invec_leng, *nnz = NULL, nz_max;
251   PetscInt              *ml_cols = matdata->columns, *ml_rowptr = matdata->rowptr, *aj, i;
252   PetscScalar           *ml_vals = matdata->values, *aa;
253 
254   PetscFunctionBegin;
255   PetscCheck(mlmat->getrow, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "mlmat->getrow = NULL");
256   if (m != n) { /* ML Pmat and Rmat are in CSR format. Pass array pointers into SeqAIJ matrix */
257     if (reuse) {
258       Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*newmat)->data;
259       aij->i          = ml_rowptr;
260       aij->j          = ml_cols;
261       aij->a          = ml_vals;
262     } else {
263       /* sort ml_cols and ml_vals */
264       PetscCall(PetscMalloc1(m + 1, &nnz));
265       for (i = 0; i < m; i++) nnz[i] = ml_rowptr[i + 1] - ml_rowptr[i];
266       aj = ml_cols;
267       aa = ml_vals;
268       for (i = 0; i < m; i++) {
269         PetscCall(PetscSortIntWithScalarArray(nnz[i], aj, aa));
270         aj += nnz[i];
271         aa += nnz[i];
272       }
273       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, ml_rowptr, ml_cols, ml_vals, newmat));
274       PetscCall(PetscFree(nnz));
275     }
276     PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
277     PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
278     PetscFunctionReturn(PETSC_SUCCESS);
279   }
280 
281   nz_max = PetscMax(1, mlmat->max_nz_per_row);
282   PetscCall(PetscMalloc2(nz_max, &aa, nz_max, &aj));
283   if (!reuse) {
284     PetscCall(MatCreate(PETSC_COMM_SELF, newmat));
285     PetscCall(MatSetSizes(*newmat, m, n, PETSC_DECIDE, PETSC_DECIDE));
286     PetscCall(MatSetType(*newmat, MATSEQAIJ));
287     /* keep track of block size for A matrices */
288     PetscCall(MatSetBlockSize(*newmat, mlmat->num_PDEs));
289 
290     PetscCall(PetscMalloc1(m, &nnz));
291     for (i = 0; i < m; i++) PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &nnz[i]));
292     PetscCall(MatSeqAIJSetPreallocation(*newmat, 0, nnz));
293   }
294   for (i = 0; i < m; i++) {
295     PetscInt ncols;
296 
297     PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &ncols));
298     PetscCall(MatSetValues(*newmat, 1, &i, ncols, aj, aa, INSERT_VALUES));
299   }
300   PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
301   PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
302 
303   PetscCall(PetscFree2(aa, aj));
304   PetscCall(PetscFree(nnz));
305   PetscFunctionReturn(PETSC_SUCCESS);
306 }
307 
308 static PetscErrorCode MatWrapML_SHELL(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
309 {
310   PetscInt     m, n;
311   ML_Comm     *MLcomm;
312   Mat_MLShell *shellctx;
313 
314   PetscFunctionBegin;
315   m = mlmat->outvec_leng;
316   n = mlmat->invec_leng;
317 
318   if (reuse) {
319     PetscCall(MatShellGetContext(*newmat, &shellctx));
320     shellctx->mlmat = mlmat;
321     PetscFunctionReturn(PETSC_SUCCESS);
322   }
323 
324   MLcomm = mlmat->comm;
325 
326   PetscCall(PetscNew(&shellctx));
327   PetscCall(MatCreateShell(MLcomm->USR_comm, m, n, PETSC_DETERMINE, PETSC_DETERMINE, shellctx, newmat));
328   PetscCall(MatShellSetOperation(*newmat, MATOP_MULT, (void (*)(void))MatMult_ML));
329   PetscCall(MatShellSetOperation(*newmat, MATOP_DESTROY, (void (*)(void))MatDestroy_ML));
330 
331   shellctx->A     = *newmat;
332   shellctx->mlmat = mlmat;
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 static PetscErrorCode MatWrapML_MPIAIJ(ML_Operator *mlmat, MatReuse reuse, Mat *newmat)
337 {
338   PetscInt    *aj;
339   PetscScalar *aa;
340   PetscInt     i, j, *gordering;
341   PetscInt     m = mlmat->outvec_leng, n, nz_max, row;
342   Mat          A;
343 
344   PetscFunctionBegin;
345   PetscCheck(mlmat->getrow, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "mlmat->getrow = NULL");
346   n = mlmat->invec_leng;
347   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "m %d must equal to n %d", m, n);
348 
349   /* create global row numbering for a ML_Operator */
350   PetscStackCallExternalVoid("ML_build_global_numbering", ML_build_global_numbering(mlmat, &gordering, "rows"));
351 
352   nz_max = PetscMax(1, mlmat->max_nz_per_row) + 1;
353   PetscCall(PetscMalloc2(nz_max, &aa, nz_max, &aj));
354   if (reuse) {
355     A = *newmat;
356   } else {
357     PetscInt *nnzA, *nnzB, *nnz;
358     PetscInt  rstart;
359     PetscCall(MatCreate(mlmat->comm->USR_comm, &A));
360     PetscCall(MatSetSizes(A, m, n, PETSC_DECIDE, PETSC_DECIDE));
361     PetscCall(MatSetType(A, MATMPIAIJ));
362     /* keep track of block size for A matrices */
363     PetscCall(MatSetBlockSize(A, mlmat->num_PDEs));
364     PetscCall(PetscMalloc3(m, &nnzA, m, &nnzB, m, &nnz));
365     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, mlmat->comm->USR_comm));
366     rstart -= m;
367 
368     for (i = 0; i < m; i++) {
369       row = gordering[i] - rstart;
370       PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &nnz[i]));
371       nnzA[row] = 0;
372       for (j = 0; j < nnz[i]; j++) {
373         if (aj[j] < m) nnzA[row]++;
374       }
375       nnzB[row] = nnz[i] - nnzA[row];
376     }
377     PetscCall(MatMPIAIJSetPreallocation(A, 0, nnzA, 0, nnzB));
378     PetscCall(PetscFree3(nnzA, nnzB, nnz));
379   }
380   for (i = 0; i < m; i++) {
381     PetscInt ncols;
382     row = gordering[i];
383 
384     PetscStackCallExternalVoid(",ML_Operator_Getrow", ML_Operator_Getrow(mlmat, 1, &i, nz_max, aj, aa, &ncols));
385     for (j = 0; j < ncols; j++) aj[j] = gordering[aj[j]];
386     PetscCall(MatSetValues(A, 1, &row, ncols, aj, aa, INSERT_VALUES));
387   }
388   PetscStackCallExternalVoid("ML_free", ML_free(gordering));
389   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
390   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
391   *newmat = A;
392 
393   PetscCall(PetscFree2(aa, aj));
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*
398    PCSetCoordinates_ML
399 
400    Input Parameter:
401    .  pc - the preconditioner context
402 */
403 static PetscErrorCode PCSetCoordinates_ML(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
404 {
405   PC_MG   *mg    = (PC_MG *)pc->data;
406   PC_ML   *pc_ml = (PC_ML *)mg->innerctx;
407   PetscInt arrsz, oldarrsz, bs, my0, kk, ii, nloc, Iend, aloc;
408   Mat      Amat = pc->pmat;
409 
410   /* this function copied and modified from PCSetCoordinates_GEO -TGI */
411   PetscFunctionBegin;
412   PetscValidHeaderSpecific(Amat, MAT_CLASSID, 1);
413   PetscCall(MatGetBlockSize(Amat, &bs));
414 
415   PetscCall(MatGetOwnershipRange(Amat, &my0, &Iend));
416   aloc = (Iend - my0);
417   nloc = (Iend - my0) / bs;
418 
419   PetscCheck((nloc == a_nloc) || (aloc == a_nloc), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Number of local blocks %" PetscInt_FMT " must be %" PetscInt_FMT " or %" PetscInt_FMT ".", a_nloc, nloc, aloc);
420 
421   oldarrsz    = pc_ml->dim * pc_ml->nloc;
422   pc_ml->dim  = ndm;
423   pc_ml->nloc = nloc;
424   arrsz       = ndm * nloc;
425 
426   /* create data - syntactic sugar that should be refactored at some point */
427   if (pc_ml->coords == 0 || (oldarrsz != arrsz)) {
428     PetscCall(PetscFree(pc_ml->coords));
429     PetscCall(PetscMalloc1(arrsz, &pc_ml->coords));
430   }
431   for (kk = 0; kk < arrsz; kk++) pc_ml->coords[kk] = -999.;
432   /* copy data in - column-oriented */
433   if (nloc == a_nloc) {
434     for (kk = 0; kk < nloc; kk++) {
435       for (ii = 0; ii < ndm; ii++) pc_ml->coords[ii * nloc + kk] = coords[kk * ndm + ii];
436     }
437   } else { /* assumes the coordinates are blocked */
438     for (kk = 0; kk < nloc; kk++) {
439       for (ii = 0; ii < ndm; ii++) pc_ml->coords[ii * nloc + kk] = coords[bs * kk * ndm + ii];
440     }
441   }
442   PetscFunctionReturn(PETSC_SUCCESS);
443 }
444 
445 static PetscErrorCode PCReset_ML(PC pc)
446 {
447   PC_MG   *mg    = (PC_MG *)pc->data;
448   PC_ML   *pc_ml = (PC_ML *)mg->innerctx;
449   PetscInt level, fine_level = pc_ml->Nlevels - 1, dim = pc_ml->dim;
450 
451   PetscFunctionBegin;
452   if (dim) {
453     for (level = 0; level <= fine_level; level++) PetscCall(VecDestroy(&pc_ml->gridctx[level].coords));
454     if (pc_ml->ml_object && pc_ml->ml_object->Grid) {
455       ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)pc_ml->ml_object->Grid[0].Grid;
456       grid_info->x                      = 0; /* do this so ML doesn't try to free coordinates */
457       grid_info->y                      = 0;
458       grid_info->z                      = 0;
459       PetscStackCallExternalVoid("ML_Operator_Getrow", ML_Aggregate_VizAndStats_Clean(pc_ml->ml_object));
460     }
461   }
462   PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Aggregate_Destroy(&pc_ml->agg_object));
463   PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Destroy(&pc_ml->ml_object));
464 
465   if (pc_ml->PetscMLdata) {
466     PetscCall(PetscFree(pc_ml->PetscMLdata->pwork));
467     PetscCall(MatDestroy(&pc_ml->PetscMLdata->Aloc));
468     PetscCall(VecDestroy(&pc_ml->PetscMLdata->x));
469     PetscCall(VecDestroy(&pc_ml->PetscMLdata->y));
470   }
471   PetscCall(PetscFree(pc_ml->PetscMLdata));
472 
473   if (pc_ml->gridctx) {
474     for (level = 0; level < fine_level; level++) {
475       if (pc_ml->gridctx[level].A) PetscCall(MatDestroy(&pc_ml->gridctx[level].A));
476       if (pc_ml->gridctx[level].P) PetscCall(MatDestroy(&pc_ml->gridctx[level].P));
477       if (pc_ml->gridctx[level].R) PetscCall(MatDestroy(&pc_ml->gridctx[level].R));
478       if (pc_ml->gridctx[level].x) PetscCall(VecDestroy(&pc_ml->gridctx[level].x));
479       if (pc_ml->gridctx[level].b) PetscCall(VecDestroy(&pc_ml->gridctx[level].b));
480       if (pc_ml->gridctx[level + 1].r) PetscCall(VecDestroy(&pc_ml->gridctx[level + 1].r));
481     }
482   }
483   PetscCall(PetscFree(pc_ml->gridctx));
484   PetscCall(PetscFree(pc_ml->coords));
485 
486   pc_ml->dim  = 0;
487   pc_ml->nloc = 0;
488   PetscCall(PCReset_MG(pc));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 /*
493    PCSetUp_ML - Prepares for the use of the ML preconditioner
494                     by setting data structures and options.
495 
496    Input Parameter:
497 .  pc - the preconditioner context
498 
499    Application Interface Routine: PCSetUp()
500 
501    Note:
502    The interface routine PCSetUp() is not usually called directly by
503    the user, but instead is called by PCApply() if necessary.
504 */
505 static PetscErrorCode PCSetUp_ML(PC pc)
506 {
507   PetscMPIInt      size;
508   FineGridCtx     *PetscMLdata;
509   ML              *ml_object;
510   ML_Aggregate    *agg_object;
511   ML_Operator     *mlmat;
512   PetscInt         nlocal_allcols, Nlevels, mllevel, level, level1, m, fine_level, bs;
513   Mat              A, Aloc;
514   GridCtx         *gridctx;
515   PC_MG           *mg    = (PC_MG *)pc->data;
516   PC_ML           *pc_ml = (PC_ML *)mg->innerctx;
517   PetscBool        isSeq, isMPI;
518   KSP              smoother;
519   PC               subpc;
520   PetscInt         mesh_level, old_mesh_level;
521   MatInfo          info;
522   static PetscBool cite = PETSC_FALSE;
523 
524   PetscFunctionBegin;
525   PetscCall(PetscCitationsRegister("@TechReport{ml_users_guide,\n  author = {M. Sala and J.J. Hu and R.S. Tuminaro},\n  title = {{ML}3.1 {S}moothed {A}ggregation {U}ser's {G}uide},\n  institution =  {Sandia National Laboratories},\n  number = "
526                                    "{SAND2004-4821},\n  year = 2004\n}\n",
527                                    &cite));
528   A = pc->pmat;
529   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
530 
531   if (pc->setupcalled) {
532     if (pc->flag == SAME_NONZERO_PATTERN && pc_ml->reuse_interpolation) {
533       /*
534        Reuse interpolaton instead of recomputing aggregates and updating the whole hierarchy. This is less expensive for
535        multiple solves in which the matrix is not changing too quickly.
536        */
537       ml_object             = pc_ml->ml_object;
538       gridctx               = pc_ml->gridctx;
539       Nlevels               = pc_ml->Nlevels;
540       fine_level            = Nlevels - 1;
541       gridctx[fine_level].A = A;
542 
543       PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeq));
544       PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isMPI));
545       PetscCheck(isMPI || isSeq, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Matrix type '%s' cannot be used with ML. ML can only handle AIJ matrices.", ((PetscObject)A)->type_name);
546       if (isMPI) {
547         PetscCall(MatConvert_MPIAIJ_ML(A, NULL, MAT_INITIAL_MATRIX, &Aloc));
548       } else {
549         Aloc = A;
550         PetscCall(PetscObjectReference((PetscObject)Aloc));
551       }
552 
553       PetscCall(MatGetSize(Aloc, &m, &nlocal_allcols));
554       PetscMLdata = pc_ml->PetscMLdata;
555       PetscCall(MatDestroy(&PetscMLdata->Aloc));
556       PetscMLdata->A    = A;
557       PetscMLdata->Aloc = Aloc;
558       PetscStackCallExternalVoid("ML_Aggregate_Destroy", ML_Init_Amatrix(ml_object, 0, m, m, PetscMLdata));
559       PetscStackCallExternalVoid("ML_Set_Amatrix_Matvec", ML_Set_Amatrix_Matvec(ml_object, 0, PetscML_matvec));
560 
561       mesh_level = ml_object->ML_finest_level;
562       while (ml_object->SingleLevel[mesh_level].Rmat->to) {
563         old_mesh_level = mesh_level;
564         mesh_level     = ml_object->SingleLevel[mesh_level].Rmat->to->levelnum;
565 
566         /* clean and regenerate A */
567         mlmat = &ml_object->Amat[mesh_level];
568         PetscStackCallExternalVoid("ML_Operator_Clean", ML_Operator_Clean(mlmat));
569         PetscStackCallExternalVoid("ML_Operator_Init", ML_Operator_Init(mlmat, ml_object->comm));
570         PetscStackCallExternalVoid("ML_Gen_AmatrixRAP", ML_Gen_AmatrixRAP(ml_object, old_mesh_level, mesh_level));
571       }
572 
573       level = fine_level - 1;
574       if (size == 1) { /* convert ML P, R and A into seqaij format */
575         for (mllevel = 1; mllevel < Nlevels; mllevel++) {
576           mlmat = &ml_object->Amat[mllevel];
577           PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_REUSE_MATRIX, &gridctx[level].A));
578           level--;
579         }
580       } else { /* convert ML P and R into shell format, ML A into mpiaij format */
581         for (mllevel = 1; mllevel < Nlevels; mllevel++) {
582           mlmat = &ml_object->Amat[mllevel];
583           PetscCall(MatWrapML_MPIAIJ(mlmat, MAT_REUSE_MATRIX, &gridctx[level].A));
584           level--;
585         }
586       }
587 
588       for (level = 0; level < fine_level; level++) {
589         if (level > 0) PetscCall(PCMGSetResidual(pc, level, PCMGResidualDefault, gridctx[level].A));
590         PetscCall(KSPSetOperators(gridctx[level].ksp, gridctx[level].A, gridctx[level].A));
591       }
592       PetscCall(PCMGSetResidual(pc, fine_level, PCMGResidualDefault, gridctx[fine_level].A));
593       PetscCall(KSPSetOperators(gridctx[fine_level].ksp, gridctx[level].A, gridctx[fine_level].A));
594 
595       PetscCall(PCSetUp_MG(pc));
596       PetscFunctionReturn(PETSC_SUCCESS);
597     } else {
598       /* since ML can change the size of vectors/matrices at any level we must destroy everything */
599       PetscCall(PCReset_ML(pc));
600     }
601   }
602 
603   /* setup special features of PCML */
604   /* convert A to Aloc to be used by ML at fine grid */
605   pc_ml->size = size;
606   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeq));
607   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isMPI));
608   PetscCheck(isMPI || isSeq, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Matrix type '%s' cannot be used with ML. ML can only handle AIJ matrices.", ((PetscObject)A)->type_name);
609   if (isMPI) {
610     PetscCall(MatConvert_MPIAIJ_ML(A, NULL, MAT_INITIAL_MATRIX, &Aloc));
611   } else {
612     Aloc = A;
613     PetscCall(PetscObjectReference((PetscObject)Aloc));
614   }
615 
616   /* create and initialize struct 'PetscMLdata' */
617   PetscCall(PetscNew(&PetscMLdata));
618   pc_ml->PetscMLdata = PetscMLdata;
619   PetscCall(PetscMalloc1(Aloc->cmap->n + 1, &PetscMLdata->pwork));
620 
621   PetscCall(MatCreateVecs(Aloc, &PetscMLdata->x, &PetscMLdata->y));
622 
623   PetscMLdata->A    = A;
624   PetscMLdata->Aloc = Aloc;
625   if (pc_ml->dim) { /* create vecs around the coordinate data given */
626     PetscInt   i, j, dim = pc_ml->dim;
627     PetscInt   nloc = pc_ml->nloc, nlocghost;
628     PetscReal *ghostedcoords;
629 
630     PetscCall(MatGetBlockSize(A, &bs));
631     nlocghost = Aloc->cmap->n / bs;
632     PetscCall(PetscMalloc1(dim * nlocghost, &ghostedcoords));
633     for (i = 0; i < dim; i++) {
634       /* copy coordinate values into first component of pwork */
635       for (j = 0; j < nloc; j++) PetscMLdata->pwork[bs * j] = pc_ml->coords[nloc * i + j];
636       /* get the ghost values */
637       PetscCall(PetscML_comm(PetscMLdata->pwork, PetscMLdata));
638       /* write into the vector */
639       for (j = 0; j < nlocghost; j++) ghostedcoords[i * nlocghost + j] = PetscMLdata->pwork[bs * j];
640     }
641     /* replace the original coords with the ghosted coords, because these are
642      * what ML needs */
643     PetscCall(PetscFree(pc_ml->coords));
644     pc_ml->coords = ghostedcoords;
645   }
646 
647   /* create ML discretization matrix at fine grid */
648   /* ML requires input of fine-grid matrix. It determines nlevels. */
649   PetscCall(MatGetSize(Aloc, &m, &nlocal_allcols));
650   PetscCall(MatGetBlockSize(A, &bs));
651   PetscStackCallExternalVoid("ML_Create", ML_Create(&ml_object, pc_ml->MaxNlevels));
652   PetscStackCallExternalVoid("ML_Comm_Set_UsrComm", ML_Comm_Set_UsrComm(ml_object->comm, PetscObjectComm((PetscObject)A)));
653   pc_ml->ml_object = ml_object;
654   PetscStackCallExternalVoid("ML_Init_Amatrix", ML_Init_Amatrix(ml_object, 0, m, m, PetscMLdata));
655   PetscStackCallExternalVoid("ML_Set_Amatrix_Getrow", ML_Set_Amatrix_Getrow(ml_object, 0, PetscML_getrow, ML_PetscML_comm, nlocal_allcols));
656   PetscStackCallExternalVoid("ML_Set_Amatrix_Matvec", ML_Set_Amatrix_Matvec(ml_object, 0, PetscML_matvec));
657 
658   PetscStackCallExternalVoid("ML_Set_Symmetrize", ML_Set_Symmetrize(ml_object, pc_ml->Symmetrize ? ML_YES : ML_NO));
659 
660   /* aggregation */
661   PetscStackCallExternalVoid("ML_Aggregate_Create", ML_Aggregate_Create(&agg_object));
662   pc_ml->agg_object = agg_object;
663 
664   {
665     MatNullSpace mnull;
666     PetscCall(MatGetNearNullSpace(A, &mnull));
667     if (pc_ml->nulltype == PCML_NULLSPACE_AUTO) {
668       if (mnull) pc_ml->nulltype = PCML_NULLSPACE_USER;
669       else if (bs > 1) pc_ml->nulltype = PCML_NULLSPACE_BLOCK;
670       else pc_ml->nulltype = PCML_NULLSPACE_SCALAR;
671     }
672     switch (pc_ml->nulltype) {
673     case PCML_NULLSPACE_USER: {
674       PetscScalar       *nullvec;
675       const PetscScalar *v;
676       PetscBool          has_const;
677       PetscInt           i, j, mlocal, nvec, M;
678       const Vec         *vecs;
679 
680       PetscCheck(mnull, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Must provide explicit null space using MatSetNearNullSpace() to use user-specified null space");
681       PetscCall(MatGetSize(A, &M, NULL));
682       PetscCall(MatGetLocalSize(Aloc, &mlocal, NULL));
683       PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
684       PetscCall(PetscMalloc1((nvec + !!has_const) * mlocal, &nullvec));
685       if (has_const)
686         for (i = 0; i < mlocal; i++) nullvec[i] = 1.0 / M;
687       for (i = 0; i < nvec; i++) {
688         PetscCall(VecGetArrayRead(vecs[i], &v));
689         for (j = 0; j < mlocal; j++) nullvec[(i + !!has_const) * mlocal + j] = v[j];
690         PetscCall(VecRestoreArrayRead(vecs[i], &v));
691       }
692       PetscStackCallExternalVoid("ML_Aggregate_Set_NullSpace", ML_Aggregate_Set_NullSpace(agg_object, bs, nvec + !!has_const, nullvec, mlocal));
693       PetscCall(PetscFree(nullvec));
694     } break;
695     case PCML_NULLSPACE_BLOCK:
696       PetscStackCallExternalVoid("ML_Aggregate_Set_NullSpace", ML_Aggregate_Set_NullSpace(agg_object, bs, bs, 0, 0));
697       break;
698     case PCML_NULLSPACE_SCALAR:
699       break;
700     default:
701       SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown null space type");
702     }
703   }
704   PetscStackCallExternalVoid("ML_Aggregate_Set_MaxCoarseSize", ML_Aggregate_Set_MaxCoarseSize(agg_object, pc_ml->MaxCoarseSize));
705   /* set options */
706   switch (pc_ml->CoarsenScheme) {
707   case 1:
708     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_Coupled", ML_Aggregate_Set_CoarsenScheme_Coupled(agg_object));
709     break;
710   case 2:
711     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_MIS", ML_Aggregate_Set_CoarsenScheme_MIS(agg_object));
712     break;
713   case 3:
714     PetscStackCallExternalVoid("ML_Aggregate_Set_CoarsenScheme_METIS", ML_Aggregate_Set_CoarsenScheme_METIS(agg_object));
715     break;
716   }
717   PetscStackCallExternalVoid("ML_Aggregate_Set_Threshold", ML_Aggregate_Set_Threshold(agg_object, pc_ml->Threshold));
718   PetscStackCallExternalVoid("ML_Aggregate_Set_DampingFactor", ML_Aggregate_Set_DampingFactor(agg_object, pc_ml->DampingFactor));
719   if (pc_ml->SpectralNormScheme_Anorm) PetscStackCallExternalVoid("ML_Set_SpectralNormScheme_Anorm", ML_Set_SpectralNormScheme_Anorm(ml_object));
720   agg_object->keep_agg_information      = (int)pc_ml->KeepAggInfo;
721   agg_object->keep_P_tentative          = (int)pc_ml->Reusable;
722   agg_object->block_scaled_SA           = (int)pc_ml->BlockScaling;
723   agg_object->minimizing_energy         = (int)pc_ml->EnergyMinimization;
724   agg_object->minimizing_energy_droptol = (double)pc_ml->EnergyMinimizationDropTol;
725   agg_object->cheap_minimizing_energy   = (int)pc_ml->EnergyMinimizationCheap;
726 
727   if (pc_ml->Aux) {
728     PetscCheck(pc_ml->dim, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Auxiliary matrix requires coordinates");
729     ml_object->Amat[0].aux_data->threshold = pc_ml->AuxThreshold;
730     ml_object->Amat[0].aux_data->enable    = 1;
731     ml_object->Amat[0].aux_data->max_level = 10;
732     ml_object->Amat[0].num_PDEs            = bs;
733   }
734 
735   PetscCall(MatGetInfo(A, MAT_LOCAL, &info));
736   ml_object->Amat[0].N_nonzeros = (int)info.nz_used;
737 
738   if (pc_ml->dim) {
739     PetscInt                i, dim = pc_ml->dim;
740     ML_Aggregate_Viz_Stats *grid_info;
741     PetscInt                nlocghost;
742 
743     PetscCall(MatGetBlockSize(A, &bs));
744     nlocghost = Aloc->cmap->n / bs;
745 
746     PetscStackCallExternalVoid("ML_Aggregate_VizAndStats_Setup(", ML_Aggregate_VizAndStats_Setup(ml_object)); /* create ml info for coords */
747     grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Grid[0].Grid;
748     for (i = 0; i < dim; i++) {
749       /* set the finest level coordinates to point to the column-order array
750        * in pc_ml */
751       /* NOTE: must point away before VizAndStats_Clean so ML doesn't free */
752       switch (i) {
753       case 0:
754         grid_info->x = pc_ml->coords + nlocghost * i;
755         break;
756       case 1:
757         grid_info->y = pc_ml->coords + nlocghost * i;
758         break;
759       case 2:
760         grid_info->z = pc_ml->coords + nlocghost * i;
761         break;
762       default:
763         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_SIZ, "PCML coordinate dimension must be <= 3");
764       }
765     }
766     grid_info->Ndim = dim;
767   }
768 
769   /* repartitioning */
770   if (pc_ml->Repartition) {
771     PetscStackCallExternalVoid("ML_Repartition_Activate", ML_Repartition_Activate(ml_object));
772     PetscStackCallExternalVoid("ML_Repartition_Set_LargestMinMaxRatio", ML_Repartition_Set_LargestMinMaxRatio(ml_object, pc_ml->MaxMinRatio));
773     PetscStackCallExternalVoid("ML_Repartition_Set_MinPerProc", ML_Repartition_Set_MinPerProc(ml_object, pc_ml->MinPerProc));
774     PetscStackCallExternalVoid("ML_Repartition_Set_PutOnSingleProc", ML_Repartition_Set_PutOnSingleProc(ml_object, pc_ml->PutOnSingleProc));
775 #if 0 /* Function not yet defined in ml-6.2 */
776     /* I'm not sure what compatibility issues might crop up if we partitioned
777      * on the finest level, so to be safe repartition starts on the next
778      * finest level (reflection default behavior in
779      * ml_MultiLevelPreconditioner) */
780     PetscStackCallExternalVoid("ML_Repartition_Set_StartLevel",ML_Repartition_Set_StartLevel(ml_object,1));
781 #endif
782 
783     if (!pc_ml->RepartitionType) {
784       PetscInt i;
785 
786       PetscCheck(pc_ml->dim, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "ML Zoltan repartitioning requires coordinates");
787       PetscStackCallExternalVoid("ML_Repartition_Set_Partitioner", ML_Repartition_Set_Partitioner(ml_object, ML_USEZOLTAN));
788       PetscStackCallExternalVoid("ML_Aggregate_Set_Dimensions", ML_Aggregate_Set_Dimensions(agg_object, pc_ml->dim));
789 
790       for (i = 0; i < ml_object->ML_num_levels; i++) {
791         ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Grid[i].Grid;
792         grid_info->zoltan_type            = pc_ml->ZoltanScheme + 1; /* ml numbers options 1, 2, 3 */
793         /* defaults from ml_agg_info.c */
794         grid_info->zoltan_estimated_its = 40; /* only relevant to hypergraph / fast hypergraph */
795         grid_info->zoltan_timers        = 0;
796         grid_info->smoothing_steps      = 4; /* only relevant to hypergraph / fast hypergraph */
797       }
798     } else {
799       PetscStackCallExternalVoid("ML_Repartition_Set_Partitioner", ML_Repartition_Set_Partitioner(ml_object, ML_USEPARMETIS));
800     }
801   }
802 
803   if (pc_ml->OldHierarchy) {
804     PetscStackCallExternalVoid("ML_Gen_MGHierarchy_UsingAggregation", Nlevels = ML_Gen_MGHierarchy_UsingAggregation(ml_object, 0, ML_INCREASING, agg_object));
805   } else {
806     PetscStackCallExternalVoid("ML_Gen_MultiLevelHierarchy_UsingAggregation", Nlevels = ML_Gen_MultiLevelHierarchy_UsingAggregation(ml_object, 0, ML_INCREASING, agg_object));
807   }
808   PetscCheck(Nlevels > 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Nlevels %d must > 0", Nlevels);
809   pc_ml->Nlevels = Nlevels;
810   fine_level     = Nlevels - 1;
811 
812   PetscCall(PCMGSetLevels(pc, Nlevels, NULL));
813   /* set default smoothers */
814   for (level = 1; level <= fine_level; level++) {
815     PetscCall(PCMGGetSmoother(pc, level, &smoother));
816     PetscCall(KSPSetType(smoother, KSPRICHARDSON));
817     PetscCall(KSPGetPC(smoother, &subpc));
818     PetscCall(PCSetType(subpc, PCSOR));
819   }
820   PetscObjectOptionsBegin((PetscObject)pc);
821   PetscCall(PCSetFromOptions_MG(pc, PetscOptionsObject)); /* should be called in PCSetFromOptions_ML(), but cannot be called prior to PCMGSetLevels() */
822   PetscOptionsEnd();
823 
824   PetscCall(PetscMalloc1(Nlevels, &gridctx));
825 
826   pc_ml->gridctx = gridctx;
827 
828   /* wrap ML matrices by PETSc shell matrices at coarsened grids.
829      Level 0 is the finest grid for ML, but coarsest for PETSc! */
830   gridctx[fine_level].A = A;
831 
832   level = fine_level - 1;
833   /* TODO: support for GPUs */
834   if (size == 1) { /* convert ML P, R and A into seqaij format */
835     for (mllevel = 1; mllevel < Nlevels; mllevel++) {
836       mlmat = &ml_object->Pmat[mllevel];
837       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].P));
838       mlmat = &ml_object->Rmat[mllevel - 1];
839       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].R));
840 
841       mlmat = &ml_object->Amat[mllevel];
842       PetscCall(MatWrapML_SeqAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].A));
843       level--;
844     }
845   } else { /* convert ML P and R into shell format, ML A into mpiaij format */
846     for (mllevel = 1; mllevel < Nlevels; mllevel++) {
847       mlmat = &ml_object->Pmat[mllevel];
848       PetscCall(MatWrapML_SHELL(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].P));
849       mlmat = &ml_object->Rmat[mllevel - 1];
850       PetscCall(MatWrapML_SHELL(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].R));
851 
852       mlmat = &ml_object->Amat[mllevel];
853       PetscCall(MatWrapML_MPIAIJ(mlmat, MAT_INITIAL_MATRIX, &gridctx[level].A));
854       level--;
855     }
856   }
857 
858   /* create vectors and ksp at all levels */
859   for (level = 0; level < fine_level; level++) {
860     level1 = level + 1;
861 
862     PetscCall(MatCreateVecs(gridctx[level].A, &gridctx[level].x, &gridctx[level].b));
863     PetscCall(MatCreateVecs(gridctx[level1].A, NULL, &gridctx[level1].r));
864     PetscCall(PCMGSetX(pc, level, gridctx[level].x));
865     PetscCall(PCMGSetRhs(pc, level, gridctx[level].b));
866     PetscCall(PCMGSetR(pc, level1, gridctx[level1].r));
867 
868     if (level == 0) {
869       PetscCall(PCMGGetCoarseSolve(pc, &gridctx[level].ksp));
870     } else {
871       PetscCall(PCMGGetSmoother(pc, level, &gridctx[level].ksp));
872     }
873   }
874   PetscCall(PCMGGetSmoother(pc, fine_level, &gridctx[fine_level].ksp));
875 
876   /* create coarse level and the interpolation between the levels */
877   for (level = 0; level < fine_level; level++) {
878     level1 = level + 1;
879 
880     PetscCall(PCMGSetInterpolation(pc, level1, gridctx[level].P));
881     PetscCall(PCMGSetRestriction(pc, level1, gridctx[level].R));
882     if (level > 0) PetscCall(PCMGSetResidual(pc, level, PCMGResidualDefault, gridctx[level].A));
883     PetscCall(KSPSetOperators(gridctx[level].ksp, gridctx[level].A, gridctx[level].A));
884   }
885   PetscCall(PCMGSetResidual(pc, fine_level, PCMGResidualDefault, gridctx[fine_level].A));
886   PetscCall(KSPSetOperators(gridctx[fine_level].ksp, gridctx[level].A, gridctx[fine_level].A));
887 
888   /* put coordinate info in levels */
889   if (pc_ml->dim) {
890     PetscInt   i, j, dim = pc_ml->dim;
891     PetscInt   bs, nloc;
892     PC         subpc;
893     PetscReal *array;
894 
895     level = fine_level;
896     for (mllevel = 0; mllevel < Nlevels; mllevel++) {
897       ML_Aggregate_Viz_Stats *grid_info = (ML_Aggregate_Viz_Stats *)ml_object->Amat[mllevel].to->Grid->Grid;
898       MPI_Comm                comm      = ((PetscObject)gridctx[level].A)->comm;
899 
900       PetscCall(MatGetBlockSize(gridctx[level].A, &bs));
901       PetscCall(MatGetLocalSize(gridctx[level].A, NULL, &nloc));
902       nloc /= bs; /* number of local nodes */
903 
904       PetscCall(VecCreate(comm, &gridctx[level].coords));
905       PetscCall(VecSetSizes(gridctx[level].coords, dim * nloc, PETSC_DECIDE));
906       PetscCall(VecSetType(gridctx[level].coords, VECMPI));
907       PetscCall(VecGetArray(gridctx[level].coords, &array));
908       for (j = 0; j < nloc; j++) {
909         for (i = 0; i < dim; i++) {
910           switch (i) {
911           case 0:
912             array[dim * j + i] = grid_info->x[j];
913             break;
914           case 1:
915             array[dim * j + i] = grid_info->y[j];
916             break;
917           case 2:
918             array[dim * j + i] = grid_info->z[j];
919             break;
920           default:
921             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_SIZ, "PCML coordinate dimension must be <= 3");
922           }
923         }
924       }
925 
926       /* passing coordinates to smoothers/coarse solver, should they need them */
927       PetscCall(KSPGetPC(gridctx[level].ksp, &subpc));
928       PetscCall(PCSetCoordinates(subpc, dim, nloc, array));
929       PetscCall(VecRestoreArray(gridctx[level].coords, &array));
930       level--;
931     }
932   }
933 
934   /* setupcalled is set to 0 so that MG is setup from scratch */
935   pc->setupcalled = PETSC_FALSE;
936   PetscCall(PCSetUp_MG(pc));
937   PetscFunctionReturn(PETSC_SUCCESS);
938 }
939 
940 /*
941    PCDestroy_ML - Destroys the private context for the ML preconditioner
942    that was created with PCCreate_ML().
943 
944    Input Parameter:
945 .  pc - the preconditioner context
946 
947    Application Interface Routine: PCDestroy()
948 */
949 static PetscErrorCode PCDestroy_ML(PC pc)
950 {
951   PC_MG *mg    = (PC_MG *)pc->data;
952   PC_ML *pc_ml = (PC_ML *)mg->innerctx;
953 
954   PetscFunctionBegin;
955   PetscCall(PCReset_ML(pc));
956   PetscCall(PetscFree(pc_ml));
957   PetscCall(PCDestroy_MG(pc));
958   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
959   PetscFunctionReturn(PETSC_SUCCESS);
960 }
961 
962 static PetscErrorCode PCSetFromOptions_ML(PC pc, PetscOptionItems PetscOptionsObject)
963 {
964   PetscInt    indx, PrintLevel, partindx;
965   const char *scheme[] = {"Uncoupled", "Coupled", "MIS", "METIS"};
966   const char *part[]   = {"Zoltan", "ParMETIS"};
967 #if defined(HAVE_ML_ZOLTAN)
968   const char *zscheme[] = {"RCB", "hypergraph", "fast_hypergraph"};
969 #endif
970   PC_MG      *mg    = (PC_MG *)pc->data;
971   PC_ML      *pc_ml = (PC_ML *)mg->innerctx;
972   PetscMPIInt size;
973   MPI_Comm    comm;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
977   PetscCallMPI(MPI_Comm_size(comm, &size));
978   PetscOptionsHeadBegin(PetscOptionsObject, "ML options");
979 
980   PrintLevel = 0;
981   indx       = 0;
982   partindx   = 0;
983 
984   PetscCall(PetscOptionsInt("-pc_ml_PrintLevel", "Print level", "ML_Set_PrintLevel", PrintLevel, &PrintLevel, NULL));
985   PetscStackCallExternalVoid("ML_Set_PrintLevel", ML_Set_PrintLevel(PrintLevel));
986   PetscCall(PetscOptionsInt("-pc_ml_maxNlevels", "Maximum number of levels", "None", pc_ml->MaxNlevels, &pc_ml->MaxNlevels, NULL));
987   PetscCall(PetscOptionsInt("-pc_ml_maxCoarseSize", "Maximum coarsest mesh size", "ML_Aggregate_Set_MaxCoarseSize", pc_ml->MaxCoarseSize, &pc_ml->MaxCoarseSize, NULL));
988   PetscCall(PetscOptionsEList("-pc_ml_CoarsenScheme", "Aggregate Coarsen Scheme", "ML_Aggregate_Set_CoarsenScheme_*", scheme, 4, scheme[0], &indx, NULL));
989 
990   pc_ml->CoarsenScheme = indx;
991 
992   PetscCall(PetscOptionsReal("-pc_ml_DampingFactor", "P damping factor", "ML_Aggregate_Set_DampingFactor", pc_ml->DampingFactor, &pc_ml->DampingFactor, NULL));
993   PetscCall(PetscOptionsReal("-pc_ml_Threshold", "Smoother drop tol", "ML_Aggregate_Set_Threshold", pc_ml->Threshold, &pc_ml->Threshold, NULL));
994   PetscCall(PetscOptionsBool("-pc_ml_SpectralNormScheme_Anorm", "Method used for estimating spectral radius", "ML_Set_SpectralNormScheme_Anorm", pc_ml->SpectralNormScheme_Anorm, &pc_ml->SpectralNormScheme_Anorm, NULL));
995   PetscCall(PetscOptionsBool("-pc_ml_Symmetrize", "Symmetrize aggregation", "ML_Set_Symmetrize", pc_ml->Symmetrize, &pc_ml->Symmetrize, NULL));
996   PetscCall(PetscOptionsBool("-pc_ml_BlockScaling", "Scale all dofs at each node together", "None", pc_ml->BlockScaling, &pc_ml->BlockScaling, NULL));
997   PetscCall(PetscOptionsEnum("-pc_ml_nullspace", "Which type of null space information to use", "None", PCMLNullSpaceTypes, (PetscEnum)pc_ml->nulltype, (PetscEnum *)&pc_ml->nulltype, NULL));
998   PetscCall(PetscOptionsInt("-pc_ml_EnergyMinimization", "Energy minimization norm type (0=no minimization; see ML manual for 1,2,3; -1 and 4 undocumented)", "None", pc_ml->EnergyMinimization, &pc_ml->EnergyMinimization, NULL));
999   PetscCall(PetscOptionsBool("-pc_ml_reuse_interpolation", "Reuse the interpolation operators when possible (cheaper, weaker when matrix entries change a lot)", "None", pc_ml->reuse_interpolation, &pc_ml->reuse_interpolation, NULL));
1000   /*
1001     The following checks a number of conditions.  If we let this stuff slip by, then ML's error handling will take over.
1002     This is suboptimal because it amounts to calling exit(1) so we check for the most common conditions.
1003 
1004     We also try to set some sane defaults when energy minimization is activated, otherwise it's hard to find a working
1005     combination of options and ML's exit(1) explanations don't help matters.
1006   */
1007   PetscCheck(pc_ml->EnergyMinimization >= -1 && pc_ml->EnergyMinimization <= 4, comm, PETSC_ERR_ARG_OUTOFRANGE, "EnergyMinimization must be in range -1..4");
1008   PetscCheck(pc_ml->EnergyMinimization != 4 || size == 1, comm, PETSC_ERR_SUP, "Energy minimization type 4 does not work in parallel");
1009   if (pc_ml->EnergyMinimization == 4) PetscCall(PetscInfo(pc, "Mandel's energy minimization scheme is experimental and broken in ML-6.2\n"));
1010   if (pc_ml->EnergyMinimization) PetscCall(PetscOptionsReal("-pc_ml_EnergyMinimizationDropTol", "Energy minimization drop tolerance", "None", pc_ml->EnergyMinimizationDropTol, &pc_ml->EnergyMinimizationDropTol, NULL));
1011   if (pc_ml->EnergyMinimization == 2) {
1012     /* According to ml_MultiLevelPreconditioner.cpp, this option is only meaningful for norm type (2) */
1013     PetscCall(PetscOptionsBool("-pc_ml_EnergyMinimizationCheap", "Use cheaper variant of norm type 2", "None", pc_ml->EnergyMinimizationCheap, &pc_ml->EnergyMinimizationCheap, NULL));
1014   }
1015   /* energy minimization sometimes breaks if this is turned off, the more classical stuff should be okay without it */
1016   if (pc_ml->EnergyMinimization) pc_ml->KeepAggInfo = PETSC_TRUE;
1017   PetscCall(PetscOptionsBool("-pc_ml_KeepAggInfo", "Allows the preconditioner to be reused, or auxiliary matrices to be generated", "None", pc_ml->KeepAggInfo, &pc_ml->KeepAggInfo, NULL));
1018   /* Option (-1) doesn't work at all (calls exit(1)) if the tentative restriction operator isn't stored. */
1019   if (pc_ml->EnergyMinimization == -1) pc_ml->Reusable = PETSC_TRUE;
1020   PetscCall(PetscOptionsBool("-pc_ml_Reusable", "Store intermedaiate data structures so that the multilevel hierarchy is reusable", "None", pc_ml->Reusable, &pc_ml->Reusable, NULL));
1021   /*
1022     ML's C API is severely underdocumented and lacks significant functionality.  The C++ API calls
1023     ML_Gen_MultiLevelHierarchy_UsingAggregation() which is a modified copy (!?) of the documented function
1024     ML_Gen_MGHierarchy_UsingAggregation().  This modification, however, does not provide a strict superset of the
1025     functionality in the old function, so some users may still want to use it.  Note that many options are ignored in
1026     this context, but ML doesn't provide a way to find out which ones.
1027    */
1028   PetscCall(PetscOptionsBool("-pc_ml_OldHierarchy", "Use old routine to generate hierarchy", "None", pc_ml->OldHierarchy, &pc_ml->OldHierarchy, NULL));
1029   PetscCall(PetscOptionsBool("-pc_ml_repartition", "Allow ML to repartition levels of the hierarchy", "ML_Repartition_Activate", pc_ml->Repartition, &pc_ml->Repartition, NULL));
1030   if (pc_ml->Repartition) {
1031     PetscCall(PetscOptionsReal("-pc_ml_repartitionMaxMinRatio", "Acceptable ratio of repartitioned sizes", "ML_Repartition_Set_LargestMinMaxRatio", pc_ml->MaxMinRatio, &pc_ml->MaxMinRatio, NULL));
1032     PetscCall(PetscOptionsInt("-pc_ml_repartitionMinPerProc", "Smallest repartitioned size", "ML_Repartition_Set_MinPerProc", pc_ml->MinPerProc, &pc_ml->MinPerProc, NULL));
1033     PetscCall(PetscOptionsInt("-pc_ml_repartitionPutOnSingleProc", "Problem size automatically repartitioned to one processor", "ML_Repartition_Set_PutOnSingleProc", pc_ml->PutOnSingleProc, &pc_ml->PutOnSingleProc, NULL));
1034 #if defined(HAVE_ML_ZOLTAN)
1035     partindx = 0;
1036     PetscCall(PetscOptionsEList("-pc_ml_repartitionType", "Repartitioning library to use", "ML_Repartition_Set_Partitioner", part, 2, part[0], &partindx, NULL));
1037 
1038     pc_ml->RepartitionType = partindx;
1039     if (!partindx) {
1040       PetscInt zindx = 0;
1041 
1042       PetscCall(PetscOptionsEList("-pc_ml_repartitionZoltanScheme", "Repartitioning scheme to use", "None", zscheme, 3, zscheme[0], &zindx, NULL));
1043 
1044       pc_ml->ZoltanScheme = zindx;
1045     }
1046 #else
1047     partindx = 1;
1048     PetscCall(PetscOptionsEList("-pc_ml_repartitionType", "Repartitioning library to use", "ML_Repartition_Set_Partitioner", part, 2, part[1], &partindx, NULL));
1049     pc_ml->RepartitionType = partindx;
1050     PetscCheck(partindx, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP_SYS, "ML not compiled with Zoltan");
1051 #endif
1052     PetscCall(PetscOptionsBool("-pc_ml_Aux", "Aggregate using auxiliary coordinate-based laplacian", "None", pc_ml->Aux, &pc_ml->Aux, NULL));
1053     PetscCall(PetscOptionsReal("-pc_ml_AuxThreshold", "Auxiliary smoother drop tol", "None", pc_ml->AuxThreshold, &pc_ml->AuxThreshold, NULL));
1054   }
1055   PetscOptionsHeadEnd();
1056   PetscFunctionReturn(PETSC_SUCCESS);
1057 }
1058 
1059 /*
1060    PCCreate_ML - Creates a ML preconditioner context, PC_ML,
1061    and sets this as the private data within the generic preconditioning
1062    context, PC, that was created within PCCreate().
1063 
1064    Input Parameter:
1065 .  pc - the preconditioner context
1066 
1067    Application Interface Routine: PCCreate()
1068 */
1069 
1070 /*MC
1071      PCML - Use the SNL ML algebraic multigrid preconditioner.
1072 
1073    Options Database Keys:
1074    Multigrid options(inherited):
1075 +  -pc_mg_cycle_type <v> - v for V cycle, w for W-cycle (`PCMGSetCycleType()`)
1076 .  -pc_mg_distinct_smoothup - Should one configure the up and down smoothers separately (`PCMGSetDistinctSmoothUp()`)
1077 -  -pc_mg_type <multiplicative> - (one of) additive multiplicative full kascade
1078 
1079    ML Options Database Key:
1080 +  -pc_ml_PrintLevel <0>                    - Print level (`ML_Set_PrintLevel()`)
1081 .  -pc_ml_maxNlevels <10>                   - Maximum number of levels (None)
1082 .  -pc_ml_maxCoarseSize <1>                 - Maximum coarsest mesh size (`ML_Aggregate_Set_MaxCoarseSize()`)
1083 .  -pc_ml_CoarsenScheme <Uncoupled>         - (one of) Uncoupled Coupled MIS METIS
1084 .  -pc_ml_DampingFactor <1.33333>           - P damping factor (`ML_Aggregate_Set_DampingFactor()`)
1085 .  -pc_ml_Threshold <0>                     - Smoother drop tol (`ML_Aggregate_Set_Threshold()`)
1086 .  -pc_ml_SpectralNormScheme_Anorm <false>  - Method used for estimating spectral radius (`ML_Set_SpectralNormScheme_Anorm()`)
1087 .  -pc_ml_repartition <false>               - Allow ML to repartition levels of the hierarchy (`ML_Repartition_Activate()`)
1088 .  -pc_ml_repartitionMaxMinRatio <1.3>      - Acceptable ratio of repartitioned sizes (`ML_Repartition_Set_LargestMinMaxRatio()`)
1089 .  -pc_ml_repartitionMinPerProc <512>       - Smallest repartitioned size (`ML_Repartition_Set_MinPerProc()`)
1090 .  -pc_ml_repartitionPutOnSingleProc <5000> - Problem size automatically repartitioned to one processor (`ML_Repartition_Set_PutOnSingleProc()`)
1091 .  -pc_ml_repartitionType <Zoltan>          - Repartitioning library to use (`ML_Repartition_Set_Partitioner()`)
1092 .  -pc_ml_repartitionZoltanScheme <RCB>     - Repartitioning scheme to use (None)
1093 .  -pc_ml_Aux <false>                       - Aggregate using auxiliary coordinate-based Laplacian (None)
1094 -  -pc_ml_AuxThreshold <0.0>                - Auxiliary smoother drop tol (None)
1095 
1096    Level: intermediate
1097 
1098    Developer Note:
1099    The coarser grid matrices and restriction/interpolation
1100    operators are computed by ML, with the matrices converted to PETSc matrices in `MATAIJ` format
1101    and the restriction/interpolation operators wrapped as PETSc shell matrices.
1102 
1103 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCMGType`, `PCMG`, `PCHYPRE`, `PCGAMG`,
1104           `PCMGSetLevels()`, `PCMGGetLevels()`, `PCMGSetType()`, `MPSetCycles()`, `PCMGSetDistinctSmoothUp()`,
1105           `PCMGGetCoarseSolve()`, `PCMGSetResidual()`, `PCMGSetInterpolation()`,
1106           `PCMGSetRestriction()`, `PCMGGetSmoother()`, `PCMGGetSmootherUp()`, `PCMGGetSmootherDown()`,
1107           `PCMGSetCycleTypeOnLevel()`, `PCMGSetRhs()`, `PCMGSetX()`, `PCMGSetR()`
1108 M*/
1109 
1110 PETSC_EXTERN PetscErrorCode PCCreate_ML(PC pc)
1111 {
1112   PC_ML *pc_ml;
1113   PC_MG *mg;
1114 
1115   PetscFunctionBegin;
1116   /* PCML is an inherited class of PCMG. Initialize pc as PCMG */
1117   PetscCall(PCSetType(pc, PCMG)); /* calls PCCreate_MG() and MGCreate_Private() */
1118   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, PCML));
1119   /* Since PCMG tries to use DM associated with PC must delete it */
1120   PetscCall(DMDestroy(&pc->dm));
1121   PetscCall(PCMGSetGalerkin(pc, PC_MG_GALERKIN_EXTERNAL));
1122   mg = (PC_MG *)pc->data;
1123 
1124   /* create a supporting struct and attach it to pc */
1125   PetscCall(PetscNew(&pc_ml));
1126   mg->innerctx = pc_ml;
1127 
1128   pc_ml->ml_object                = 0;
1129   pc_ml->agg_object               = 0;
1130   pc_ml->gridctx                  = 0;
1131   pc_ml->PetscMLdata              = 0;
1132   pc_ml->Nlevels                  = -1;
1133   pc_ml->MaxNlevels               = 10;
1134   pc_ml->MaxCoarseSize            = 1;
1135   pc_ml->CoarsenScheme            = 1;
1136   pc_ml->Threshold                = 0.0;
1137   pc_ml->DampingFactor            = 4.0 / 3.0;
1138   pc_ml->SpectralNormScheme_Anorm = PETSC_FALSE;
1139   pc_ml->size                     = 0;
1140   pc_ml->dim                      = 0;
1141   pc_ml->nloc                     = 0;
1142   pc_ml->coords                   = 0;
1143   pc_ml->Repartition              = PETSC_FALSE;
1144   pc_ml->MaxMinRatio              = 1.3;
1145   pc_ml->MinPerProc               = 512;
1146   pc_ml->PutOnSingleProc          = 5000;
1147   pc_ml->RepartitionType          = 0;
1148   pc_ml->ZoltanScheme             = 0;
1149   pc_ml->Aux                      = PETSC_FALSE;
1150   pc_ml->AuxThreshold             = 0.0;
1151 
1152   /* allow for coordinates to be passed */
1153   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_ML));
1154 
1155   /* overwrite the pointers of PCMG by the functions of PCML */
1156   pc->ops->setfromoptions = PCSetFromOptions_ML;
1157   pc->ops->setup          = PCSetUp_ML;
1158   pc->ops->reset          = PCReset_ML;
1159   pc->ops->destroy        = PCDestroy_ML;
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162