xref: /petsc/src/ts/utils/dmplexlandau/plexland.c (revision d1574ab4cd8a57f7bf337fbedaca39dfd6f00f15)
1 #include <petsc/private/dmpleximpl.h>   /*I "petscdmplex.h" I*/
2 #include <petsclandau.h>                /*I "petsclandau.h"   I*/
3 #include <petscts.h>
4 #include <petscdmforest.h>
5 
6 /* Landau collision operator */
7 #define PETSC_THREAD_SYNC
8 #define PETSC_DEVICE_FUNC_DECL static
9 #include "land_tensors.h"
10 
11 /* vector padding not supported */
12 #define LANDAU_VL  1
13 
14 int LandauGetIPDataSize(const LandauIPData *const d) {
15   return d->nip_*(1 + d->dim_ + d->ns_); /* assumes Nq == Nd */
16 }
17 
18 static PetscErrorCode LandauPointDataCreate(LandauIPData *IPData, PetscInt dim, PetscInt nip, PetscInt Ns)
19 {
20   PetscErrorCode  ierr;
21   PetscInt        sz, nip_pad = nip ; /* LANDAU_VL*(nip/LANDAU_VL + !!(nip%LANDAU_VL)); */
22   LandauIPReal    *pdata;
23   PetscFunctionBegin;
24   IPData->dim_ = dim;
25   IPData->nip_ = nip_pad;
26   IPData->ns_  = Ns;
27   sz = LandauGetIPDataSize(IPData);
28   ierr = PetscMalloc(sizeof(LandauIPReal)*sz,&pdata);CHKERRQ(ierr);
29   /* pack data */
30   IPData->w    = pdata + 0; /* w */
31   IPData->x    = pdata + 1*nip_pad;
32   IPData->y    = pdata + 2*nip_pad;
33   IPData->z    = pdata + 3*nip_pad;
34   IPData->coefs= pdata + (dim+1)*nip_pad;
35   PetscFunctionReturn(0);
36 }
37 static PetscErrorCode LandauGPUDataDestroy(void *ptr)
38 {
39    P4estVertexMaps *maps = (P4estVertexMaps *)ptr;
40    PetscErrorCode  ierr;
41    PetscFunctionBegin;
42    if (maps->deviceType != LANDAU_CPU) {
43 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
44      if (maps->deviceType == LANDAU_KOKKOS) {
45        ierr = LandauKokkosDestroyMatMaps(maps);CHKERRQ(ierr); // imples Kokkos does
46      } // else could be CUDA
47 #elif defined(PETSC_HAVE_CUDA)
48      if (maps->deviceType == LANDAU_CUDA){
49        ierr = LandauCUDADestroyMatMaps(maps);CHKERRQ(ierr);
50      } else SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_PLIB, "maps->deviceType %D ?????",maps->deviceType);
51 #endif
52    }
53    ierr = PetscFree(maps->c_maps);CHKERRQ(ierr);
54    ierr = PetscFree(maps->gIdx);CHKERRQ(ierr);
55    ierr = PetscFree(maps);CHKERRQ(ierr);
56    PetscFunctionReturn(0);
57 }
58 static PetscErrorCode LandauPointDataDestroy(LandauIPData *IPData)
59 {
60   PetscErrorCode   ierr;
61   PetscFunctionBegin;
62   ierr = PetscFree(IPData->w);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 /* ------------------------------------------------------------------- */
66 /*
67   LandauFormJacobian_Internal - Evaluates Jacobian matrix.
68 
69   Input Parameters:
70   .  globX - input vector
71   .  actx - optional user-defined context
72   .  dim - dimension
73 
74   Output Parameters:
75   .  J0acP - Jacobian matrix filled, not created
76 */
77 PetscErrorCode LandauFormJacobian_Internal(Vec a_X, Mat JacP, const PetscInt dim, void *a_ctx)
78 {
79   LandauCtx         *ctx = (LandauCtx*)a_ctx;
80   PetscErrorCode    ierr;
81   PetscInt          cStart, cEnd, elemMatSize;
82   DM                plex = NULL;
83   PetscDS           prob;
84   PetscSection      section,globsection;
85   PetscInt          numCells,totDim,ej,Nq,*Nbf,*Ncf,Nb,Ncx,Nf,d,f,fieldA,qj;
86   PetscQuadrature   quad;
87   PetscTabulation   *Tf;
88   PetscReal         nu_alpha[LANDAU_MAX_SPECIES], nu_beta[LANDAU_MAX_SPECIES];
89   const PetscReal   *quadWeights;
90   PetscReal         *invJ,*invJ_a;
91   PetscReal         invMass[LANDAU_MAX_SPECIES],Eq_m[LANDAU_MAX_SPECIES],m_0=ctx->m_0; /* normalize mass -- not needed! */
92   PetscLogDouble    flops;
93   Vec               locX;
94   LandauIPData      IPData;
95   PetscContainer    container;
96   P4estVertexMaps   *maps=NULL;
97 
98   PetscFunctionBegin;
99   PetscValidHeaderSpecific(a_X,VEC_CLASSID,1);
100   PetscValidHeaderSpecific(JacP,MAT_CLASSID,2);
101   PetscValidPointer(ctx,4);
102   ierr = PetscLogEventBegin(ctx->events[1],0,0,0,0);CHKERRQ(ierr);
103 
104   /* check for matrix container for GPU assembly */
105   ierr = PetscObjectQuery((PetscObject) JacP, "assembly_maps", (PetscObject *) &container);CHKERRQ(ierr);
106   if (container /* && ctx->deviceType != LANDAU_CPU */) {
107     if (!ctx->gpu_assembly) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"GPU matrix container but no GPU assembly");
108     ierr = PetscContainerGetPointer(container, (void **) &maps);CHKERRQ(ierr);
109     if (!maps) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"empty GPU matrix container");
110   }
111   ierr = DMConvert(ctx->dmv, DMPLEX, &plex);CHKERRQ(ierr);
112   ierr = DMCreateLocalVector(plex, &locX);CHKERRQ(ierr);
113   ierr = VecZeroEntries(locX);CHKERRQ(ierr); /* zero BCs so don't set */
114   ierr = DMGlobalToLocalBegin(plex, a_X, INSERT_VALUES, locX);CHKERRQ(ierr);
115   ierr = DMGlobalToLocalEnd  (plex, a_X, INSERT_VALUES, locX);CHKERRQ(ierr);
116   ierr = DMPlexGetHeightStratum(plex, 0, &cStart, &cEnd);CHKERRQ(ierr);
117   ierr = DMGetLocalSection(plex, &section);CHKERRQ(ierr);
118   ierr = DMGetGlobalSection(plex, &globsection);CHKERRQ(ierr);
119   ierr = DMGetDS(plex, &prob);CHKERRQ(ierr);
120   ierr = PetscDSGetTabulation(prob, &Tf);CHKERRQ(ierr); // Bf, &Df
121   ierr = PetscDSGetDimensions(prob, &Nbf);CHKERRQ(ierr); Nb = Nbf[0]; /* number of vertices*S */
122   ierr = PetscSectionGetNumFields(section, &Nf);CHKERRQ(ierr);         if (Nf!=ctx->num_species) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Nf %D != S",Nf);
123   ierr = PetscDSGetComponents(prob, &Ncf);CHKERRQ(ierr); Ncx = Ncf[0]; if (Ncx!=1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Nc %D != 1",Ncx);
124   for (fieldA=0;fieldA<Nf;fieldA++) {
125     invMass[fieldA] = m_0/ctx->masses[fieldA];
126     Eq_m[fieldA] = ctx->Ez * ctx->t_0 * ctx->charges[fieldA] / (ctx->v_0 * ctx->masses[fieldA]); /* normalize dimensionless */
127     if (dim==2) Eq_m[fieldA] *=  2 * PETSC_PI; /* add the 2pi term that is not in Landau */
128     nu_alpha[fieldA] = PetscSqr(ctx->charges[fieldA]/m_0)*m_0/ctx->masses[fieldA];
129     nu_beta[fieldA] = PetscSqr(ctx->charges[fieldA]/ctx->epsilon0)*ctx->lnLam / (8*PETSC_PI) * ctx->t_0*ctx->n_0/PetscPowReal(ctx->v_0,3);
130   }
131   ierr = PetscDSGetTotalDimension(prob, &totDim);CHKERRQ(ierr);
132   numCells = cEnd - cStart;
133   ierr = PetscFEGetQuadrature(ctx->fe[0], &quad);CHKERRQ(ierr);
134   ierr = PetscQuadratureGetData(quad, NULL, NULL, &Nq, NULL, &quadWeights);CHKERRQ(ierr);
135   if (Nb!=Nq) SETERRQ4(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Nb!=Nq %D %D over integration or simplices? Tf[0]->Nb=%D dim=%D",Nb,Nq,Tf[0]->Nb,dim);
136   if (Nq >LANDAU_MAX_NQ) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Order too high. Nq = %D > LANDAU_MAX_NQ (%D)",Nq,LANDAU_MAX_NQ);
137   if (LANDAU_DIM != dim) SETERRQ2(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "dim %D != LANDAU_DIM %d",dim,LANDAU_DIM);
138   flops = (PetscLogDouble)numCells*(PetscLogDouble)Nq*(PetscLogDouble)(5*dim*dim*Nf*Nf + 165);
139   ierr = MatZeroEntries(JacP);CHKERRQ(ierr);
140   elemMatSize = totDim*totDim;
141 
142   {
143     static int         cc = 0;
144     PetscScalar        uu[LANDAU_MAX_SPECIES],u_x[LANDAU_MAX_SPECIES*LANDAU_DIM];
145     /* collect f data */
146     if (ctx->verbose > 2 || (ctx->verbose > 0 && cc++ == 0)) {
147       PetscInt N,Nloc;
148       ierr = MatGetSize(JacP,&N,NULL);CHKERRQ(ierr);
149       ierr = VecGetSize(locX,&Nloc);CHKERRQ(ierr);
150       ierr = PetscPrintf(PETSC_COMM_WORLD,"[%D]%s: %D IPs, %D cells, totDim=%D, Nb=%D, Nq=%D, elemMatSize=%D, dim=%D, Tab: Nb=%D Nf=%D Np=%D cdim=%D N=%D N+hang=%D\n",
151                          0,"FormLandau",Nq*numCells,numCells, totDim, Nb, Nq, elemMatSize, dim, Tf[0]->Nb, Nf, Tf[0]->Np, Tf[0]->cdim, N, Nloc);CHKERRQ(ierr);
152     }
153     ierr = LandauPointDataCreate(&IPData, dim, Nq*numCells, Nf);CHKERRQ(ierr);
154     ierr = PetscMalloc1(IPData.nip_*dim*dim,&invJ_a);CHKERRQ(ierr);
155     /* cache geometry and x, f and df/dx at IPs */
156     for (ej = 0, invJ = invJ_a ; ej < numCells; ++ej, invJ += Nq*dim*dim) {
157       PetscReal    vj[LANDAU_MAX_NQ*LANDAU_DIM],detJj[LANDAU_MAX_NQ], Jdummy[LANDAU_MAX_NQ*LANDAU_DIM*LANDAU_DIM];
158       PetscScalar *coef = NULL;
159       ierr = DMPlexComputeCellGeometryFEM(plex, cStart+ej, quad, vj, Jdummy, invJ, detJj);CHKERRQ(ierr);
160       ierr = DMPlexVecGetClosure(plex, section, locX, cStart+ej, NULL, &coef);CHKERRQ(ierr);
161       ierr = PetscMemcpy(&IPData.coefs[ej*Nb*Nf],coef,Nb*Nf*sizeof(PetscScalar));CHKERRQ(ierr); /* change if LandauIPReal != PetscScalar */
162       /* create point data for cell i for Landau tensor: x, f(x), grad f(x) */
163       for (qj = 0; qj < Nq; ++qj) {
164         PetscInt         gidx = (ej*Nq + qj);
165         PetscScalar      refSpaceDer[LANDAU_DIM];
166         PetscInt         dOffset = 0, fOffset = 0;
167         IPData.x[gidx] = vj[qj * dim + 0]; /* coordinate */
168         IPData.y[gidx] = vj[qj * dim + 1];
169         if (dim==3) IPData.z[gidx] = vj[qj * dim + 2];
170         IPData.w[gidx] = detJj[qj] * quadWeights[qj];
171         if (dim==2) IPData.w[gidx] *= IPData.x[gidx];  /* cylindrical coordinate, w/o 2pi */
172         /* get u & du (EvaluateFieldJets) */
173         for (f = 0; f < Nf; ++f) {
174           const PetscReal *Bq = &Tf[f]->T[0][qj*Nb];
175           const PetscReal *Dq = &Tf[f]->T[1][qj*Nb*dim];
176           PetscInt         b, e;
177           uu[fOffset] = 0.0;
178           for (d = 0; d < LANDAU_DIM; ++d) refSpaceDer[d] = 0.0;
179           for (b = 0; b < Nb; ++b) {
180             const PetscInt    cidx = b;
181             uu[fOffset] += Bq[cidx]*coef[dOffset+cidx];
182             for (d = 0; d < dim; ++d) refSpaceDer[d] += Dq[cidx*dim+d]*coef[dOffset+cidx];
183           }
184           for (d = 0; d < dim; ++d) {
185             for (e = 0, u_x[fOffset*dim+d] = 0.0; e < dim; ++e) {
186               u_x[fOffset*dim+d] += invJ[qj * dim * dim + e*dim+d]*refSpaceDer[e];
187             }
188           }
189           fOffset += 1;
190           dOffset += Nb;
191         }
192       } /* q */
193       ierr = DMPlexVecRestoreClosure(plex, section, locX, cStart+ej, NULL, &coef);CHKERRQ(ierr);
194     } /* e */
195   }
196   /* pad with zeros */
197   for (fieldA = numCells*Nq; fieldA < IPData.nip_; ++fieldA, invJ += dim*dim) {
198     for (d = 0; d < dim; ++d) {
199       int e;
200       for (e = 0; e < dim; ++e) {
201         invJ[e*dim+d] = 0;
202       }
203     }
204   }
205 
206   ierr = DMRestoreLocalVector(plex, &locX);CHKERRQ(ierr);
207   ierr = PetscLogEventEnd(ctx->events[1],0,0,0,0);CHKERRQ(ierr);
208   /* do it */
209   if (ctx->deviceType == LANDAU_CUDA || ctx->deviceType == LANDAU_KOKKOS) {
210     if (ctx->deviceType == LANDAU_CUDA) {
211 #if defined(PETSC_HAVE_CUDA)
212       ierr = LandauCUDAJacobian(plex,Nq,nu_alpha,nu_beta,invMass,Eq_m,&IPData,invJ_a,ctx->events,JacP);CHKERRQ(ierr);
213 #else
214       SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"-landau_device_type %s not built","cuda");
215 #endif
216     } else if (ctx->deviceType == LANDAU_KOKKOS) {
217 #if defined(PETSC_HAVE_KOKKOS) && !defined(__cplusplus)
218       ierr = LandauKokkosJacobian(plex,Nq,nu_alpha,nu_beta,invMass,Eq_m,&IPData,invJ_a,ctx->subThreadBlockSize,ctx->events,JacP);CHKERRQ(ierr);
219 #else
220       SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"-landau_device_type %s not built","kokkos");
221 #endif
222     }
223   } else { /* CPU version */
224     PetscInt                ei, qi;
225     PetscScalar             *elemMat;
226     PetscReal               *ff, *dudx, *dudy, *dudz;
227     const PetscReal * const BB = Tf[0]->T[0], * const DD = Tf[0]->T[1];
228     ierr = PetscLogEventBegin(ctx->events[8],0,0,0,0);CHKERRQ(ierr);
229     ierr = PetscMalloc5(elemMatSize, &elemMat, IPData.nip_*Nf, &ff,IPData.nip_*Nf, &dudx, IPData.nip_*Nf, &dudy, dim==3 ? IPData.nip_*Nf : 0, &dudz);CHKERRQ(ierr);
230     /* compute f and df */
231     for (ei = cStart, invJ = invJ_a; ei < cEnd; ++ei, invJ += Nq*dim*dim) {
232       LandauIPReal  *coef = &IPData.coefs[ei*Nb*Nf];
233       PetscScalar   u_x[LANDAU_MAX_SPECIES][LANDAU_DIM];
234       /* get f and df */
235       for (qi = 0; qi < Nq; ++qi) {
236         const PetscReal  *Bq = &BB[qi*Nb];
237         const PetscReal  *Dq = &DD[qi*Nb*dim];
238         const PetscInt   gidx = ei*Nq + qi;
239         /* get f & df */
240         for (f = 0; f < Nf; ++f) {
241           PetscInt    b, e;
242           PetscScalar refSpaceDer[LANDAU_DIM];
243           ff[gidx + f*IPData.nip_] = 0.0;
244           for (d = 0; d < LANDAU_DIM; ++d) refSpaceDer[d] = 0.0;
245           for (b = 0; b < Nb; ++b) {
246             const PetscInt    cidx = b;
247             ff[gidx + f*IPData.nip_] += Bq[cidx]*coef[f*Nb+cidx];
248             for (d = 0; d < dim; ++d) refSpaceDer[d] += Dq[cidx*dim+d]*coef[f*Nb+cidx];
249           }
250           for (d = 0; d < dim; ++d) {
251             for (e = 0, u_x[f][d] = 0.0; e < dim; ++e) {
252               u_x[f][d] += invJ[qi * dim * dim + e*dim+d]*refSpaceDer[e];
253             }
254           }
255         }
256         for (f=0;f<Nf;f++) {
257           dudx[gidx + f*IPData.nip_] = PetscRealPart(u_x[f][0]);
258           dudy[gidx + f*IPData.nip_] = PetscRealPart(u_x[f][1]);
259 #if LANDAU_DIM==3
260           dudz[gidx + f*IPData.nip_] = PetscRealPart(u_x[f][2]);
261 #endif
262         }
263       }
264     }
265     ierr = PetscLogEventEnd(ctx->events[8],0,0,0,0);CHKERRQ(ierr);
266     for (ej = cStart, invJ = invJ_a; ej < cEnd; ++ej, invJ += Nq*dim*dim) {
267       ierr = PetscLogEventBegin(ctx->events[3],0,0,0,0);CHKERRQ(ierr);
268       ierr = PetscMemzero(elemMat, totDim *totDim * sizeof(PetscScalar));CHKERRQ(ierr);
269       ierr = PetscLogEventEnd(ctx->events[3],0,0,0,0);CHKERRQ(ierr);
270       ierr = PetscLogEventBegin(ctx->events[4],0,0,0,0);CHKERRQ(ierr);
271       ierr = PetscLogFlops((PetscLogDouble)Nq*flops);CHKERRQ(ierr);
272       for (qj = 0; qj < Nq; ++qj) {
273         PetscReal               g2[LANDAU_MAX_SPECIES][LANDAU_DIM], g3[LANDAU_MAX_SPECIES][LANDAU_DIM][LANDAU_DIM], gg2_temp[LANDAU_DIM], gg3_temp[LANDAU_DIM][LANDAU_DIM];
274         const PetscInt          jpidx = Nq*(ej-cStart) + qj;
275         const PetscReal * const BB = Tf[0]->T[0], * const DD = Tf[0]->T[1], * const invJj = &invJ[qj*dim*dim];
276         PetscReal               gg2[LANDAU_MAX_SPECIES][LANDAU_DIM],gg3[LANDAU_MAX_SPECIES][LANDAU_DIM][LANDAU_DIM];
277         PetscInt                d,d2,dp,d3,ipidx,fieldA;
278         const PetscReal         vj[3] = {IPData.x[jpidx], IPData.y[jpidx], IPData.z ? IPData.z[jpidx] : 0}, wj = IPData.w[jpidx];
279         // create g2 & g3
280         for (d=0;d<dim;d++) { // clear accumulation data D & K
281           gg2_temp[d] = 0;
282           for (d2=0;d2<dim;d2++) gg3_temp[d][d2] = 0;
283         }
284         for (ipidx = 0; ipidx < IPData.nip_; ipidx++) {
285           const PetscReal wi = IPData.w[ipidx], x = IPData.x[ipidx], y = IPData.y[ipidx];
286           PetscReal       temp1[3] = {0, 0, 0}, temp2 = 0;
287 #if LANDAU_DIM==2
288           PetscReal       Ud[2][2], Uk[2][2];
289           LandauTensor2D(vj, x, y, Ud, Uk, (ipidx==jpidx) ? 0. : 1.);
290 #else
291           PetscReal U[3][3], z = IPData.z[ipidx];
292           LandauTensor3D(vj, x, y, z, U, (ipidx==jpidx) ? 0. : 1.);
293 #endif
294           for (fieldA = 0; fieldA < Nf; ++fieldA) {
295             temp1[0] += dudx[ipidx + fieldA*IPData.nip_]*nu_beta[fieldA]*invMass[fieldA];
296             temp1[1] += dudy[ipidx + fieldA*IPData.nip_]*nu_beta[fieldA]*invMass[fieldA];
297 #if LANDAU_DIM==3
298             temp1[2] += dudz[ipidx + fieldA*IPData.nip_]*nu_beta[fieldA]*invMass[fieldA];
299 #endif
300             temp2    += ff[ipidx + fieldA*IPData.nip_]*nu_beta[fieldA];
301           }
302           temp1[0] *= wi;
303           temp1[1] *= wi;
304 #if LANDAU_DIM==3
305           temp1[2] *= wi;
306 #endif
307           temp2    *= wi;
308 #if LANDAU_DIM==2
309           for (d2 = 0; d2 < 2; d2++) {
310             for (d3 = 0; d3 < 2; ++d3) {
311               /* K = U * grad(f): g2=e: i,A */
312               gg2_temp[d2] += Uk[d2][d3]*temp1[d3];
313               /* D = -U * (I \kron (fx)): g3=f: i,j,A */
314               gg3_temp[d2][d3] += Ud[d2][d3]*temp2;
315             }
316           }
317 #else
318           for (d2 = 0; d2 < 3; ++d2) {
319             for (d3 = 0; d3 < 3; ++d3) {
320               /* K = U * grad(f): g2 = e: i,A */
321               gg2_temp[d2] += U[d2][d3]*temp1[d3];
322               /* D = -U * (I \kron (fx)): g3 = f: i,j,A */
323               gg3_temp[d2][d3] += U[d2][d3]*temp2;
324             }
325           }
326 #endif
327         } /* IPs */
328         //if (ej==0) printf("\t:%d.%d) temp gg3=%e %e %e %e\n",ej,qj,gg3_temp[0][0],gg3_temp[1][0],gg3_temp[0][1],gg3_temp[1][1]);
329         // add alpha and put in gg2/3
330         for (fieldA = 0; fieldA < Nf; ++fieldA) {
331           for (d2 = 0; d2 < dim; d2++) {
332             gg2[fieldA][d2] = gg2_temp[d2]*nu_alpha[fieldA];
333             for (d3 = 0; d3 < dim; d3++) {
334               gg3[fieldA][d2][d3] = -gg3_temp[d2][d3]*nu_alpha[fieldA]*invMass[fieldA];
335             }
336           }
337         }
338         /* add electric field term once per IP */
339         for (fieldA = 0; fieldA < Nf; ++fieldA) {
340           gg2[fieldA][dim-1] += Eq_m[fieldA];
341         }
342         //intf("%d %d gg2[1][1]=%g\n",myelem,qj,gg2[1][dim-1]);
343         /* Jacobian transform - g2 */
344         for (fieldA = 0; fieldA < Nf; ++fieldA) {
345           for (d = 0; d < dim; ++d) {
346             g2[fieldA][d] = 0.0;
347             for (d2 = 0; d2 < dim; ++d2) {
348               g2[fieldA][d] += invJj[d*dim+d2]*gg2[fieldA][d2];
349               g3[fieldA][d][d2] = 0.0;
350               for (d3 = 0; d3 < dim; ++d3) {
351                 for (dp = 0; dp < dim; ++dp) {
352                   g3[fieldA][d][d2] += invJj[d*dim + d3]*gg3[fieldA][d3][dp]*invJj[d2*dim + dp];
353                 }
354               }
355               g3[fieldA][d][d2] *= wj;
356             }
357             g2[fieldA][d] *= wj;
358           }
359         }
360         /* FE matrix construction */
361         {
362           PetscInt  fieldA,d,f,d2,g;
363           const PetscReal *BJq = &BB[qj*Nb], *DIq = &DD[qj*Nb*dim];
364           /* assemble - on the diagonal (I,I) */
365           for (fieldA = 0; fieldA < Nf ; fieldA++) {
366             for (f = 0; f < Nb ; f++) {
367               const PetscInt i = fieldA*Nb + f; /* Element matrix row */
368               for (g = 0; g < Nb; ++g) {
369                 const PetscInt j    = fieldA*Nb + g; /* Element matrix column */
370                 const PetscInt fOff = i*totDim + j;
371                 for (d = 0; d < dim; ++d) {
372                   elemMat[fOff] += DIq[f*dim+d]*g2[fieldA][d]*BJq[g];
373                   for (d2 = 0; d2 < dim; ++d2) {
374                     elemMat[fOff] += DIq[f*dim + d]*g3[fieldA][d][d2]*DIq[g*dim + d2];
375                   }
376                 }
377               }
378             }
379           }
380         }
381       } /* qj loop */
382       ierr = PetscLogEventEnd(ctx->events[4],0,0,0,0);CHKERRQ(ierr);
383       /* assemble matrix */
384       ierr = PetscLogEventBegin(ctx->events[6],0,0,0,0);CHKERRQ(ierr);
385       if (!maps) {
386         ierr = DMPlexMatSetClosure(plex, section, globsection, JacP, ej, elemMat, ADD_VALUES);CHKERRQ(ierr);
387       } else {  // GPU like assembly for debugging
388         PetscInt      fieldA,idx,q,f,g,d,nr,nc,rows0[LANDAU_MAX_Q],cols0[LANDAU_MAX_Q],rows[LANDAU_MAX_Q],cols[LANDAU_MAX_Q];
389         PetscScalar   vals[LANDAU_MAX_Q*LANDAU_MAX_Q],row_scale[LANDAU_MAX_Q],col_scale[LANDAU_MAX_Q];
390         /* assemble - from the diagonal (I,I) in this format for DMPlexMatSetClosure */
391         for (fieldA = 0; fieldA < Nf ; fieldA++) {
392           LandauIdx *const Idxs = &maps->gIdx[ej-cStart][fieldA][0];
393           for (f = 0; f < Nb ; f++) {
394             idx = Idxs[f];
395             if (idx >= 0) {
396               nr = 1;
397               rows0[0] = idx;
398               row_scale[0] = 1.;
399               //PetscPrintf(PETSC_COMM_SELF,"Normal I %D\n",idx);
400             } else {
401               idx = -idx - 1;
402               nr = maps->num_face;
403               //PetscPrintf(PETSC_COMM_SELF,"\t C(%D): rows: ",idx);
404               for (q = 0; q < maps->num_face; q++) {
405                 rows0[q]     = maps->c_maps[idx][q].gid;
406                 row_scale[q] = maps->c_maps[idx][q].scale;
407                 //PetscPrintf(PETSC_COMM_SELF," %D ",rows0[q]);
408               }
409               //PetscPrintf(PETSC_COMM_SELF,"\n");
410             }
411             for (g = 0; g < Nb; ++g) {
412               idx = Idxs[g];
413               if (idx >= 0) {
414                 nc = 1;
415                 cols0[0] = idx;
416                 col_scale[0] = 1.;
417                 //PetscPrintf(PETSC_COMM_SELF,"\tNormal J %D\n",idx);
418               } else {
419                 idx = -idx - 1;
420                 nc = maps->num_face;
421                 //PetscPrintf(PETSC_COMM_SELF,"\t C(%D): cols: ",idx);
422                 for (q = 0; q < maps->num_face; q++) {
423                   cols0[q]     = maps->c_maps[idx][q].gid;
424                   col_scale[q] = maps->c_maps[idx][q].scale;
425                   //PetscPrintf(PETSC_COMM_SELF," %D ",cols0[q]);
426                 }
427                 //PetscPrintf(PETSC_COMM_SELF,"\n");
428               }
429               const PetscInt    i = fieldA*Nb + f; /* Element matrix row */
430               const PetscInt    j = fieldA*Nb + g; /* Element matrix column */
431               const PetscScalar Aij = elemMat[i*totDim + j];
432               for (q = 0; q < nr; q++) rows[q] = rows0[q];
433               for (q = 0; q < nc; q++) cols[q] = cols0[q];
434               for (q = 0; q < nr; q++) {
435                 for (d = 0; d < nc; d++) {
436                   vals[q*nc + d] = row_scale[q]*col_scale[d]*Aij;
437                 }
438               }
439               ierr = MatSetValues(JacP,nr,rows,nc,cols,vals,ADD_VALUES);CHKERRQ(ierr);
440             }
441           }
442         }
443       }
444       if (ej==-1) {
445         PetscErrorCode    ierr2;
446         ierr2 = PetscPrintf(PETSC_COMM_SELF,"CPU Element matrix\n");CHKERRQ(ierr2);
447         for (d = 0; d < totDim; ++d){
448           for (f = 0; f < totDim; ++f) {ierr2 = PetscPrintf(PETSC_COMM_SELF," %12.5e",  PetscRealPart(elemMat[d*totDim + f]));CHKERRQ(ierr2);}
449           ierr2 = PetscPrintf(PETSC_COMM_SELF,"\n");CHKERRQ(ierr2);
450         }
451       }
452       ierr = PetscLogEventEnd(ctx->events[6],0,0,0,0);CHKERRQ(ierr);
453     } /* ej cells loop, not cuda */
454     ierr = PetscFree5(elemMat, ff, dudx, dudy, dudz);CHKERRQ(ierr);
455   } /* CPU version */
456   /* assemble matrix or vector */
457   ierr = PetscLogEventBegin(ctx->events[7],0,0,0,0);CHKERRQ(ierr);
458   ierr = MatAssemblyBegin(JacP, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
459   ierr = MatAssemblyEnd(JacP, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460   ierr = PetscLogEventEnd(ctx->events[7],0,0,0,0);CHKERRQ(ierr);
461   ierr = MatViewFromOptions(JacP,NULL,"-landau_mat_view");CHKERRQ(ierr);
462 #define MAP_BF_SIZE (128*LANDAU_DIM*LANDAU_MAX_Q_FACE*LANDAU_MAX_SPECIES)
463   if (ctx->gpu_assembly && !container) {
464     PetscScalar             elemMatrix[LANDAU_MAX_NQ*LANDAU_MAX_NQ*LANDAU_MAX_SPECIES*LANDAU_MAX_SPECIES], *elMat;
465     pointInterpolationP4est pointMaps[MAP_BF_SIZE][LANDAU_MAX_Q_FACE];
466     PetscInt                q,eidx,fieldA;
467     MatType                 type;
468     ierr = PetscInfo1(JacP, "Make GPU maps %D\n",1);CHKERRQ(ierr);
469     ierr = MatGetType(JacP,&type);CHKERRQ(ierr);
470     ierr = PetscLogEventBegin(ctx->events[2],0,0,0,0);CHKERRQ(ierr);
471     ierr = PetscMalloc(sizeof(P4estVertexMaps), &maps);CHKERRQ(ierr);
472     ierr = PetscContainerCreate(PETSC_COMM_SELF, &container);CHKERRQ(ierr);
473     ierr = PetscContainerSetPointer(container, (void *)maps);CHKERRQ(ierr);
474     ierr = PetscContainerSetUserDestroy(container, LandauGPUDataDestroy);CHKERRQ(ierr);
475     ierr = PetscObjectCompose((PetscObject) JacP, "assembly_maps", (PetscObject) container);CHKERRQ(ierr);
476     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
477     // make maps
478     maps->data = NULL;
479     maps->num_elements = numCells;
480     maps->num_face = (PetscInt)(pow(Nq,1./((double)dim))+.001); // Q
481     maps->num_face = (PetscInt)(pow(maps->num_face,(double)(dim-1))+.001); // Q^2
482     maps->num_reduced = 0;
483     maps->deviceType = ctx->deviceType;
484     // count reduced and get
485     ierr = PetscMalloc(maps->num_elements * sizeof *maps->gIdx, &maps->gIdx);CHKERRQ(ierr);
486     for (fieldA=0;fieldA<Nf;fieldA++) {
487       for (ej = cStart, eidx = 0 ; ej < cEnd; ++ej, ++eidx) {
488         for (q = 0; q < Nb; ++q) {
489           PetscInt    numindices,*indices;
490           PetscScalar *valuesOrig = elMat = elemMatrix;
491           ierr = PetscMemzero(elMat, totDim*totDim*sizeof(PetscScalar));CHKERRQ(ierr);
492           elMat[ (fieldA*Nb + q)*totDim + fieldA*Nb + q] = 1;
493           ierr = DMPlexGetClosureIndices(plex, section, globsection, ej, PETSC_TRUE, &numindices, &indices, NULL, (PetscScalar **) &elMat);CHKERRQ(ierr);
494           for (f = 0 ; f < numindices ; ++f) { // look for a non-zero on the diagonal
495             if (PetscAbsReal(PetscRealPart(elMat[f*numindices + f])) > PETSC_MACHINE_EPSILON) {
496               // found it
497               if (PetscAbs(PetscRealPart(elMat[f*numindices + f] - 1.)) < PETSC_MACHINE_EPSILON) {
498                 maps->gIdx[eidx][fieldA][q] = (LandauIdx)indices[f]; // normal vertex 1.0
499                 //ierr = PetscPrintf(PETSC_COMM_SELF,"\t\t f=%D e=%D q=%D Found normal gid=%D %d\n",fieldA,eidx,q,indices[f],maps->gIdx[fieldA][eidx][q]);CHKERRQ(ierr);
500               } else { //found a constraint
501                 int       jj = 0;
502                 PetscReal sum = 0;
503                 const PetscInt ff = f;
504                 maps->gIdx[eidx][fieldA][q] = -maps->num_reduced - 1; // gid = -(idx+1): idx = -gid - 1
505                 do {  // constraints are continous in Plex - exploit that here
506                   int ii;
507                   //ierr = PetscPrintf(PETSC_COMM_SELF,"\t\t\t\t %D.%D) C:%D) id=%D\n",eidx,q,jj,indices[f]);CHKERRQ(ierr);
508                   for (ii = 0, pointMaps[maps->num_reduced][jj].scale = 0; ii < maps->num_face; ii++) { // DMPlex puts them all together
509                     if (ff + ii < numindices) {
510                       pointMaps[maps->num_reduced][jj].scale += PetscRealPart(elMat[f*numindices + ff + ii]);
511                       //ierr = PetscPrintf(PETSC_COMM_SELF,"\t\t\t\t maps[%d,%d].scale = %g %g\n",maps->num_reduced,jj,pointMaps[maps->num_reduced][jj].scale, elMat[f*numindices + ff + ii]);CHKERRQ(ierr);
512                     }
513                   }
514                   sum += pointMaps[maps->num_reduced][jj].scale;
515                   if (pointMaps[maps->num_reduced][jj].scale == 0) pointMaps[maps->num_reduced][jj].gid = -1; // 3D has Q and Q^2 interps -- all contiguous???
516                   else                                             pointMaps[maps->num_reduced][jj].gid = indices[f];
517                   //ierr = PetscPrintf(PETSC_COMM_SELF,"\t\t\t I[%d] = %g\n",jj,pointMaps[maps->num_reduced][jj].scale);CHKERRQ(ierr);
518                 } while (++jj < maps->num_face && ++f < numindices); // jj is incremented if we hit the end
519                 while (jj++ < maps->num_face) {
520                   pointMaps[maps->num_reduced][jj].scale = 0;
521                   pointMaps[maps->num_reduced][jj].gid = -1;
522                   //ierr = PetscPrintf(PETSC_COMM_SELF,"\t\t\t\t\t maps[%d,%d].scale = %g\n",maps->num_reduced,jj,pointMaps[maps->num_reduced][jj].scale);CHKERRQ(ierr);
523                 }
524                 if (PetscAbs(sum-1.0)>PETSC_MACHINE_EPSILON*2.0) { // debug
525                   int       d,f;
526                   PetscReal tmp = 0;
527                   PetscPrintf(PETSC_COMM_SELF,"\t\t%D.%D.%D) ERROR total I = %22.16e (LANDAU_MAX_Q_FACE=%d, #face=%D)\n",eidx,q,fieldA,tmp,LANDAU_MAX_Q_FACE,maps->num_face);
528                   for (d = 0, tmp = 0; d < numindices; ++d){
529                     if (tmp!=0 && PetscAbs(tmp-1.0)>2*PETSC_MACHINE_EPSILON) ierr = PetscPrintf(PETSC_COMM_WORLD,"%3D) %3D: ",d,indices[d]);CHKERRQ(ierr);
530                     for (f = 0; f < numindices; ++f) {
531                       // ierr = PetscPrintf(PETSC_COMM_SELF," %8.2e",  PetscRealPart(elMat[d*numindices + f]));CHKERRQ(ierr);
532                       tmp += PetscRealPart(elMat[d*numindices + f]);
533                     }
534                     if (tmp!=0) ierr = PetscPrintf(PETSC_COMM_WORLD," | %22.16e\n",tmp);CHKERRQ(ierr);
535                   }
536                 }
537                 maps->num_reduced++;
538                 if (maps->num_reduced>=MAP_BF_SIZE) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "maps->num_reduced %d > %d",maps->num_reduced,MAP_BF_SIZE);
539               }
540               break;
541             }
542           }
543           // debug
544           for (d = 0; d < -numindices; ++d){
545             PetscReal tmp = 0;
546             ierr = PetscPrintf(PETSC_COMM_SELF,"\t%3D) %3D: ",d,indices[d]);CHKERRQ(ierr);
547             for (f = 0 ; f < numindices; ++f) {
548               ierr = PetscPrintf(PETSC_COMM_SELF," %12.5e",  PetscRealPart(elMat[d*numindices + f]));CHKERRQ(ierr);
549               tmp += PetscRealPart(elMat[d*numindices + f]);
550             }
551             ierr = PetscPrintf(PETSC_COMM_SELF," | %g\n",tmp);CHKERRQ(ierr);
552           }
553           // cleanup
554           ierr = DMPlexRestoreClosureIndices(plex, section, globsection, ej, PETSC_TRUE, &numindices, &indices, NULL, (PetscScalar **) &elMat);CHKERRQ(ierr);
555           if (elMat != valuesOrig) {ierr = DMRestoreWorkArray(plex, numindices*numindices, MPIU_SCALAR, &elMat);}
556         }
557       }
558     }
559     // allocate and copy point datamaps->gIdx[eidx][field][q] -- for CPU version of this code, for debugging
560     ierr = PetscMalloc(maps->num_reduced * sizeof *maps->c_maps, &maps->c_maps);CHKERRQ(ierr);
561     for (ej = 0; ej < maps->num_reduced; ++ej) {
562       for (q = 0; q < maps->num_face; ++q) {
563         maps->c_maps[ej][q].scale = pointMaps[ej][q].scale;
564         maps->c_maps[ej][q].gid = pointMaps[ej][q].gid;
565       }
566     }
567 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
568     if (ctx->deviceType == LANDAU_KOKKOS) {
569       ierr = LandauKokkosCreateMatMaps(maps, pointMaps,Nf,Nq);CHKERRQ(ierr); // imples Kokkos does
570     } // else could be CUDA
571 #endif
572 #if defined(PETSC_HAVE_CUDA)
573     if (ctx->deviceType == LANDAU_CUDA){
574       ierr = LandauCUDACreateMatMaps(maps, pointMaps,Nf,Nq);CHKERRQ(ierr);
575     }
576 #endif
577     /*  debug */
578     /* for (fieldA=0;fieldA<Nf;fieldA++) { */
579     /*   PetscPrintf(PETSC_COMM_SELF,"field %D:\n",fieldA); */
580     /*   for (eidx = 0 ; eidx < cEnd-cStart; ++eidx) { */
581     /*     PetscPrintf(PETSC_COMM_SELF,"\t elem %D: ",eidx); */
582     /*     for (q = 0; q < Nq; ++q) { */
583     /*       ierr = PetscPrintf(PETSC_COMM_SELF," %3d ",maps->gIdx[eidx][fieldA][q]);CHKERRQ(ierr); */
584     /*     } */
585     /*     PetscPrintf(PETSC_COMM_SELF,"\n"); */
586     /*   } */
587     /*   ierr = PetscPrintf(PETSC_COMM_SELF,"\n"); */
588     /* } */
589     /* for (eidx = 0 ; eidx < maps->num_reduced; ++eidx) { */
590     /*   ierr = PetscPrintf(PETSC_COMM_SELF,"%D: ",eidx+1); */
591     /*   for (q = 0; q < maps->num_face; ++q) { */
592     /*     ierr = PetscPrintf(PETSC_COMM_SELF," %3D , %g ",maps->c_maps[eidx][q].gid,maps->c_maps[eidx][q].scale);CHKERRQ(ierr); */
593     /*   } */
594     /*   ierr = PetscPrintf(PETSC_COMM_SELF,"\n"); */
595     /* } */
596     ierr = PetscLogEventEnd(ctx->events[2],0,0,0,0);CHKERRQ(ierr);
597   }
598   /* clean up */
599   ierr = PetscFree(invJ_a);CHKERRQ(ierr);
600   ierr = DMDestroy(&plex);CHKERRQ(ierr);
601   ierr = LandauPointDataDestroy(&IPData);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #if defined(LANDAU_ADD_BCS)
606 static void zero_bc(PetscInt dim, PetscInt Nf, PetscInt NfAux,
607                     const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
608                     const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
609                     PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar uexact[])
610 {
611   uexact[0] = 0;
612 }
613 #endif
614 
615 #define MATVEC2(__a,__x,__p) {int i,j; for (i=0.; i<2; i++) {__p[i] = 0; for (j=0.; j<2; j++) __p[i] += __a[i][j]*__x[j]; }}
616 static void CircleInflate(PetscReal r1, PetscReal r2, PetscReal r0, PetscInt num_sections, PetscReal x, PetscReal y,
617                           PetscReal *outX, PetscReal *outY)
618 {
619   PetscReal rr = PetscSqrtReal(x*x + y*y), outfact, efact;
620   if (rr < r1 + PETSC_SQRT_MACHINE_EPSILON) {
621     *outX = x; *outY = y;
622   } else {
623     const PetscReal xy[2] = {x,y}, sinphi=y/rr, cosphi=x/rr;
624     PetscReal       cth,sth,xyprime[2],Rth[2][2],rotcos,newrr;
625     if (num_sections==2) {
626       rotcos = 0.70710678118654;
627       outfact = 1.5; efact = 2.5;
628       /* rotate normalized vector into [-pi/4,pi/4) */
629       if (sinphi >= 0.) {         /* top cell, -pi/2 */
630         cth = 0.707106781186548; sth = -0.707106781186548;
631       } else {                    /* bottom cell -pi/8 */
632         cth = 0.707106781186548; sth = .707106781186548;
633       }
634     } else if (num_sections==3) {
635       rotcos = 0.86602540378443;
636       outfact = 1.5; efact = 2.5;
637       /* rotate normalized vector into [-pi/6,pi/6) */
638       if (sinphi >= 0.5) {         /* top cell, -pi/3 */
639         cth = 0.5; sth = -0.866025403784439;
640       } else if (sinphi >= -.5) {  /* mid cell 0 */
641         cth = 1.; sth = .0;
642       } else { /* bottom cell +pi/3 */
643         cth = 0.5; sth = 0.866025403784439;
644       }
645     } else if (num_sections==4) {
646       rotcos = 0.9238795325112;
647       outfact = 1.5; efact = 3;
648       /* rotate normalized vector into [-pi/8,pi/8) */
649       if (sinphi >= 0.707106781186548) {         /* top cell, -3pi/8 */
650         cth = 0.38268343236509; sth = -0.923879532511287;
651       } else if (sinphi >= 0.) {                 /* mid top cell -pi/8 */
652         cth = 0.923879532511287; sth = -.38268343236509;
653       } else if (sinphi >= -0.707106781186548) { /* mid bottom cell + pi/8 */
654         cth = 0.923879532511287; sth = 0.38268343236509;
655       } else {                                   /* bottom cell + 3pi/8 */
656         cth = 0.38268343236509; sth = .923879532511287;
657       }
658     } else {
659       cth = 0.; sth = 0.; rotcos = 0; efact = 0;
660     }
661     Rth[0][0] = cth; Rth[0][1] =-sth;
662     Rth[1][0] = sth; Rth[1][1] = cth;
663     MATVEC2(Rth,xy,xyprime);
664     if (num_sections==2) {
665       newrr = xyprime[0]/rotcos;
666     } else {
667       PetscReal newcosphi=xyprime[0]/rr, rin = r1, rout = rr - rin;
668       PetscReal routmax = r0*rotcos/newcosphi - rin, nroutmax = r0 - rin, routfrac = rout/routmax;
669       newrr = rin + routfrac*nroutmax;
670     }
671     *outX = cosphi*newrr; *outY = sinphi*newrr;
672     /* grade */
673     PetscReal fact,tt,rs,re, rr = PetscSqrtReal(PetscSqr(*outX) + PetscSqr(*outY));
674     if (rr > r2) { rs = r2; re = r0; fact = outfact;} /* outer zone */
675     else {         rs = r1; re = r2; fact = efact;} /* electron zone */
676     tt = (rs + PetscPowReal((rr - rs)/(re - rs),fact) * (re-rs)) / rr;
677     *outX *= tt;
678     *outY *= tt;
679   }
680 }
681 
682 static PetscErrorCode GeometryDMLandau(DM base, PetscInt point, PetscInt dim, const PetscReal abc[], PetscReal xyz[], void *a_ctx)
683 {
684   LandauCtx   *ctx = (LandauCtx*)a_ctx;
685   PetscReal   r = abc[0], z = abc[1];
686   if (ctx->inflate) {
687     PetscReal absR, absZ;
688     absR = PetscAbsReal(r);
689     absZ = PetscAbsReal(z);
690     CircleInflate(ctx->i_radius,ctx->e_radius,ctx->radius,ctx->num_sections,absR,absZ,&absR,&absZ);
691     r = (r > 0) ? absR : -absR;
692     z = (z > 0) ? absZ : -absZ;
693   }
694   xyz[0] = r;
695   xyz[1] = z;
696   if (dim==3) xyz[2] = abc[2];
697 
698   PetscFunctionReturn(0);
699 }
700 
701 static PetscErrorCode ErrorIndicator_Simple(PetscInt dim, PetscReal volume, PetscReal x[], PetscInt Nc, const PetscInt Nf[], const PetscScalar u[], const PetscScalar u_x[], PetscReal *error, void *actx)
702 {
703   PetscReal err = 0.0;
704   PetscInt  f = *(PetscInt*)actx, j;
705   PetscFunctionBegin;
706   for (j = 0; j < dim; ++j) {
707     err += PetscSqr(PetscRealPart(u_x[f*dim+j]));
708   }
709   err = PetscRealPart(u[f]); /* just use rho */
710   *error = volume * err; /* * (ctx->axisymmetric ? 2.*PETSC_PI * r : 1); */
711   PetscFunctionReturn(0);
712 }
713 
714 static PetscErrorCode LandauDMCreateVMesh(MPI_Comm comm, const PetscInt dim, const char prefix[], LandauCtx *ctx, DM *dm)
715 {
716   PetscErrorCode ierr;
717   PetscReal      radius = ctx->radius;
718   size_t         len;
719   char           fname[128] = ""; /* we can add a file if we want */
720 
721   PetscFunctionBegin;
722   /* create DM */
723   ierr = PetscStrlen(fname, &len);CHKERRQ(ierr);
724   if (len) {
725     PetscInt dim2;
726     ierr = DMPlexCreateFromFile(comm, fname, ctx->interpolate, dm);CHKERRQ(ierr);
727     ierr = DMGetDimension(*dm, &dim2);CHKERRQ(ierr);
728     if (LANDAU_DIM != dim2) SETERRQ2(comm, PETSC_ERR_PLIB, "dim %D != LANDAU_DIM %d",dim2,LANDAU_DIM);
729   } else {    /* p4est, quads */
730     /* Create plex mesh of Landau domain */
731     if (!ctx->sphere) {
732       PetscInt       cells[] = {2,2,2};
733       PetscReal      lo[] = {-radius,-radius,-radius}, hi[] = {radius,radius,radius};
734       DMBoundaryType periodicity[3] = {DM_BOUNDARY_NONE, dim==2 ? DM_BOUNDARY_NONE : DM_BOUNDARY_NONE, DM_BOUNDARY_NONE};
735       if (dim==2) { lo[0] = 0; cells[0] = 1; }
736       ierr = DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, cells, lo, hi, periodicity, PETSC_TRUE, dm);CHKERRQ(ierr);
737       ierr = DMLocalizeCoordinates(*dm);CHKERRQ(ierr); /* needed for periodic */
738       if (dim==3) {ierr = PetscObjectSetName((PetscObject) *dm, "cube");CHKERRQ(ierr);}
739       else {ierr = PetscObjectSetName((PetscObject) *dm, "half-plane");CHKERRQ(ierr);}
740     } else if (dim==2) {
741       PetscInt       numCells,cells[16][4],i,j;
742       PetscInt       numVerts;
743       PetscReal      inner_radius1 = ctx->i_radius, inner_radius2 = ctx->e_radius;
744       PetscReal      *flatCoords = NULL;
745       PetscInt       *flatCells = NULL, *pcell;
746       if (ctx->num_sections==2) {
747 #if 1
748         numCells = 5;
749         numVerts = 10;
750         int cells2[][4] = { {0,1,4,3},
751                             {1,2,5,4},
752                             {3,4,7,6},
753                             {4,5,8,7},
754                             {6,7,8,9} };
755         for (i = 0; i < numCells; i++) for (j = 0; j < 4; j++) cells[i][j] = cells2[i][j];
756         ierr = PetscMalloc2(numVerts * 2, &flatCoords, numCells * 4, &flatCells);CHKERRQ(ierr);
757         {
758           PetscReal (*coords)[2] = (PetscReal (*) [2]) flatCoords;
759           for (j = 0; j < numVerts-1; j++) {
760             PetscReal z, r, theta = -PETSC_PI/2 + (j%3) * PETSC_PI/2;
761             PetscReal rad = (j >= 6) ? inner_radius1 : (j >= 3) ? inner_radius2 : ctx->radius;
762             z = rad * PetscSinReal(theta);
763             coords[j][1] = z;
764             r = rad * PetscCosReal(theta);
765             coords[j][0] = r;
766           }
767           coords[numVerts-1][0] = coords[numVerts-1][1] = 0;
768         }
769 #else
770         numCells = 4;
771         numVerts = 8;
772         static int     cells2[][4] = {{0,1,2,3},
773                                      {4,5,1,0},
774                                      {5,6,2,1},
775                                      {6,7,3,2}};
776         for (i = 0; i < numCells; i++) for (j = 0; j < 4; j++) cells[i][j] = cells2[i][j];
777         ierr = loc2(numVerts * 2, &flatCoords, numCells * 4, &flatCells);CHKERRQ(ierr);
778         {
779           PetscReal (*coords)[2] = (PetscReal (*) [2]) flatCoords;
780           PetscInt j;
781           for (j = 0; j < 8; j++) {
782             PetscReal z, r;
783             PetscReal theta = -PETSC_PI/2 + (j%4) * PETSC_PI/3.;
784             PetscReal rad = ctx->radius * ((j < 4) ? 0.5 : 1.0);
785             z = rad * PetscSinReal(theta);
786             coords[j][1] = z;
787             r = rad * PetscCosReal(theta);
788             coords[j][0] = r;
789           }
790         }
791 #endif
792       } else if (ctx->num_sections==3) {
793         numCells = 7;
794         numVerts = 12;
795         int cells2[][4] = { {0,1,5,4},
796                             {1,2,6,5},
797                             {2,3,7,6},
798                             {4,5,9,8},
799                             {5,6,10,9},
800                             {6,7,11,10},
801                             {8,9,10,11} };
802         for (i = 0; i < numCells; i++) for (j = 0; j < 4; j++) cells[i][j] = cells2[i][j];
803         ierr = PetscMalloc2(numVerts * 2, &flatCoords, numCells * 4, &flatCells);CHKERRQ(ierr);
804         {
805           PetscReal (*coords)[2] = (PetscReal (*) [2]) flatCoords;
806           for (j = 0; j < numVerts; j++) {
807             PetscReal z, r, theta = -PETSC_PI/2 + (j%4) * PETSC_PI/3;
808             PetscReal rad = (j >= 8) ? inner_radius1 : (j >= 4) ? inner_radius2 : ctx->radius;
809             z = rad * PetscSinReal(theta);
810             coords[j][1] = z;
811             r = rad * PetscCosReal(theta);
812             coords[j][0] = r;
813           }
814         }
815       } else if (ctx->num_sections==4) {
816         numCells = 10;
817         numVerts = 16;
818         int cells2[][4] = { {0,1,6,5},
819                             {1,2,7,6},
820                             {2,3,8,7},
821                             {3,4,9,8},
822                             {5,6,11,10},
823                             {6,7,12,11},
824                             {7,8,13,12},
825                             {8,9,14,13},
826                             {10,11,12,15},
827                             {12,13,14,15}};
828         for (i = 0; i < numCells; i++) for (j = 0; j < 4; j++) cells[i][j] = cells2[i][j];
829         ierr = PetscMalloc2(numVerts * 2, &flatCoords, numCells * 4, &flatCells);CHKERRQ(ierr);
830         {
831           PetscReal (*coords)[2] = (PetscReal (*) [2]) flatCoords;
832           for (j = 0; j < numVerts-1; j++) {
833             PetscReal z, r, theta = -PETSC_PI/2 + (j%5) * PETSC_PI/4;
834             PetscReal rad = (j >= 10) ? inner_radius1 : (j >= 5) ? inner_radius2 : ctx->radius;
835             z = rad * PetscSinReal(theta);
836             coords[j][1] = z;
837             r = rad * PetscCosReal(theta);
838             coords[j][0] = r;
839           }
840           coords[numVerts-1][0] = coords[numVerts-1][1] = 0;
841         }
842       } else {
843         numCells = 0;
844         numVerts = 0;
845       }
846       for (j = 0, pcell = flatCells; j < numCells; j++, pcell += 4) {
847         pcell[0] = cells[j][0]; pcell[1] = cells[j][1];
848         pcell[2] = cells[j][2]; pcell[3] = cells[j][3];
849       }
850       ierr = DMPlexCreateFromCellListPetsc(comm,2,numCells,numVerts,4,ctx->interpolate,flatCells,2,flatCoords,dm);CHKERRQ(ierr);
851       ierr = PetscFree2(flatCoords,flatCells);CHKERRQ(ierr);
852       ierr = PetscObjectSetName((PetscObject) *dm, "semi-circle");CHKERRQ(ierr);
853     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Velocity space meshes does not support cubed sphere");
854   }
855   ierr = PetscObjectSetOptionsPrefix((PetscObject)*dm,prefix);CHKERRQ(ierr);
856 
857   ierr = DMSetFromOptions(*dm);CHKERRQ(ierr); /* Plex refine */
858 
859   { /* p4est? */
860     char      convType[256];
861     PetscBool flg;
862     ierr = PetscOptionsBegin(PETSC_COMM_WORLD, prefix, "Mesh conversion options", "DMPLEX");CHKERRQ(ierr);
863     ierr = PetscOptionsFList("-dm_landau_type","Convert DMPlex to another format (should not be Plex!)","plexland.c",DMList,DMPLEX,convType,256,&flg);CHKERRQ(ierr);
864     ierr = PetscOptionsEnd();
865     if (flg) {
866       DM dmforest;
867       ierr = DMConvert(*dm,convType,&dmforest);CHKERRQ(ierr);
868       if (dmforest) {
869         PetscBool isForest;
870         if (dmforest->prealloc_only != (*dm)->prealloc_only) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"plex->prealloc_only != dm->prealloc_only");
871         ierr = PetscObjectSetOptionsPrefix((PetscObject)dmforest,prefix);CHKERRQ(ierr);
872         ierr = DMIsForest(dmforest,&isForest);CHKERRQ(ierr);
873         if (isForest) {
874           if (ctx->sphere && ctx->inflate) {
875             ierr = DMForestSetBaseCoordinateMapping(dmforest,GeometryDMLandau,ctx);CHKERRQ(ierr);
876           }
877           if (dmforest->prealloc_only != (*dm)->prealloc_only) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"plex->prealloc_only != dm->prealloc_only");
878           ierr = DMDestroy(dm);CHKERRQ(ierr);
879           *dm = dmforest;
880           ctx->errorIndicator = ErrorIndicator_Simple; /* flag for Forest */
881         } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Converted to non Forest?");
882       } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Convert failed?");
883     }
884   }
885   ierr = PetscObjectSetName((PetscObject) *dm, "Mesh");CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode SetupDS(DM dm, PetscInt dim, LandauCtx *ctx)
890 {
891   PetscErrorCode  ierr;
892   PetscInt        ii;
893   PetscFunctionBegin;
894   for (ii=0;ii<ctx->num_species;ii++) {
895     char     buf[256];
896     if (ii==0) ierr = PetscSNPrintf(buf, 256, "e");
897     else {ierr = PetscSNPrintf(buf, 256, "i%D", ii);CHKERRQ(ierr);}
898     /* Setup Discretization - FEM */
899     ierr = PetscFECreateDefault(PetscObjectComm((PetscObject) dm), dim, 1, PETSC_FALSE, NULL, PETSC_DECIDE, &ctx->fe[ii]);CHKERRQ(ierr);
900     ierr = PetscObjectSetName((PetscObject) ctx->fe[ii], buf);CHKERRQ(ierr);
901     ierr = DMSetField(dm, ii, NULL, (PetscObject) ctx->fe[ii]);CHKERRQ(ierr);
902   }
903   ierr = DMCreateDS(dm);CHKERRQ(ierr);
904   if (1) {
905     PetscInt        ii;
906     PetscSection    section;
907     ierr = DMGetSection(dm, &section);CHKERRQ(ierr);
908     for (ii=0;ii<ctx->num_species;ii++){
909       char buf[256];
910       if (ii==0) ierr = PetscSNPrintf(buf, 256, "se");
911       else ierr = PetscSNPrintf(buf, 256, "si%D", ii);
912       ierr = PetscSectionSetComponentName(section, ii, 0, buf);CHKERRQ(ierr);
913     }
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 /* Define a Maxwellian function for testing out the operator. */
919 
920  /* Using cartesian velocity space coordinates, the particle */
921  /* density, [1/m^3], is defined according to */
922 
923  /* $$ n=\int_{R^3} dv^3 \left(\frac{m}{2\pi T}\right)^{3/2}\exp [- mv^2/(2T)] $$ */
924 
925  /* Using some constant, c, we normalize the velocity vector into a */
926  /* dimensionless variable according to v=c*x. Thus the density, $n$, becomes */
927 
928  /* $$ n=\int_{R^3} dx^3 \left(\frac{mc^2}{2\pi T}\right)^{3/2}\exp [- mc^2/(2T)*x^2] $$ */
929 
930  /* Defining $\theta=2T/mc^2$, we thus find that the probability density */
931  /* for finding the particle within the interval in a box dx^3 around x is */
932 
933  /* f(x;\theta)=\left(\frac{1}{\pi\theta}\right)^{3/2} \exp [ -x^2/\theta ] */
934 
935 typedef struct {
936   LandauCtx   *ctx;
937   PetscReal kT_m;
938   PetscReal n;
939   PetscReal shift;
940 } MaxwellianCtx;
941 
942 static PetscErrorCode maxwellian(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf_dummy, PetscScalar *u, void *actx)
943 {
944   MaxwellianCtx *mctx = (MaxwellianCtx*)actx;
945   LandauCtx     *ctx = mctx->ctx;
946   PetscInt      i;
947   PetscReal     v2 = 0, theta = 2*mctx->kT_m/(ctx->v_0*ctx->v_0); /* theta = 2kT/mc^2 */
948   PetscFunctionBegin;
949   /* compute the exponents, v^2 */
950   for (i = 0; i < dim; ++i) v2 += x[i]*x[i];
951   /* evaluate the Maxwellian */
952   u[0] = mctx->n*PetscPowReal(PETSC_PI*theta,-1.5)*(PetscExpReal(-v2/theta));
953   if (mctx->shift!=0.) {
954     v2 = 0;
955     for (i = 0; i < dim-1; ++i) v2 += x[i]*x[i];
956     v2 += (x[dim-1]-mctx->shift)*(x[dim-1]-mctx->shift);
957     /* evaluate the shifted Maxwellian */
958     u[0] += mctx->n*PetscPowReal(PETSC_PI*theta,-1.5)*(PetscExpReal(-v2/theta));
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 /*@
964  LandauAddMaxwellians - Add a Maxwellian distribution to a state
965 
966  Collective on X
967 
968  Input Parameters:
969  .   dm - The mesh
970  +   time - Current time
971  -   temps - Temperatures of each species
972  .   ns - Number density of each species
973  +   actx - Landau context
974 
975  Output Parameter:
976  .   X  - The state
977 
978  Level: beginner
979 
980 .keywords: mesh
981 .seealso: LandauCreateVelocitySpace()
982 @*/
983 PetscErrorCode LandauAddMaxwellians(DM dm, Vec X, PetscReal time, PetscReal temps[], PetscReal ns[], void *actx)
984 {
985   LandauCtx      *ctx = (LandauCtx*)actx;
986   PetscErrorCode (*initu[LANDAU_MAX_SPECIES])(PetscInt, PetscReal, const PetscReal [], PetscInt, PetscScalar [], void *);
987   PetscErrorCode ierr,ii;
988   PetscInt       dim;
989   MaxwellianCtx  *mctxs[LANDAU_MAX_SPECIES], data[LANDAU_MAX_SPECIES];
990 
991   PetscFunctionBegin;
992   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
993   if (!ctx) { ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr); }
994   for (ii=0;ii<ctx->num_species;ii++) {
995     mctxs[ii] = &data[ii];
996     data[ii].ctx = ctx;
997     data[ii].kT_m = ctx->k*temps[ii]/ctx->masses[ii]; /* kT/m */
998     data[ii].n = ns[ii];
999     initu[ii] = maxwellian;
1000     data[ii].shift = 0;
1001   }
1002   data[0].shift = ctx->electronShift;
1003   /* need to make ADD_ALL_VALUES work - TODO */
1004   ierr = DMProjectFunction(dm, time, initu, (void**)mctxs, INSERT_ALL_VALUES, X);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 /*
1009  LandauSetInitialCondition - Addes Maxwellians with context
1010 
1011 Collective on X
1012 
1013  Input Parameters:
1014  .   dm - The mesh
1015  +   actx - Landau context with T and n
1016 
1017  Output Parameter:
1018  .   X  - The state
1019 
1020  Level: beginner
1021 
1022 .keywords: mesh
1023 .seealso: LandauCreateVelocitySpace(), LandauAddMaxwellians()
1024 */
1025 static PetscErrorCode LandauSetInitialCondition(DM dm, Vec X, void *actx)
1026 {
1027   LandauCtx        *ctx = (LandauCtx*)actx;
1028   PetscErrorCode ierr;
1029   PetscFunctionBegin;
1030   if (!ctx) { ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr); }
1031   ierr = VecZeroEntries(X);CHKERRQ(ierr);
1032   ierr = LandauAddMaxwellians(dm, X, 0.0, ctx->thermal_temps, ctx->n, ctx);CHKERRQ(ierr);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 static PetscErrorCode adaptToleranceFEM(PetscFE fem, Vec sol, PetscReal refineTol[], PetscReal coarsenTol[], PetscInt type, LandauCtx *ctx, DM *newDM)
1037 {
1038   DM               dm, plex, adaptedDM = NULL;
1039   PetscDS          prob;
1040   PetscBool        isForest;
1041   PetscQuadrature  quad;
1042   PetscInt         Nq, *Nb, cStart, cEnd, c, dim, qj, k;
1043   DMLabel          adaptLabel = NULL;
1044   PetscErrorCode   ierr;
1045 
1046   PetscFunctionBegin;
1047   ierr = VecGetDM(sol, &dm);CHKERRQ(ierr);
1048   ierr = DMCreateDS(dm);CHKERRQ(ierr);
1049   ierr = DMGetDS(dm, &prob);CHKERRQ(ierr);
1050   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
1051   ierr = DMIsForest(dm, &isForest);CHKERRQ(ierr);
1052   ierr = DMConvert(dm, DMPLEX, &plex);CHKERRQ(ierr);
1053   ierr = DMPlexGetHeightStratum(plex,0,&cStart,&cEnd);CHKERRQ(ierr);
1054   ierr = DMLabelCreate(PETSC_COMM_SELF,"adapt",&adaptLabel);CHKERRQ(ierr);
1055   ierr = PetscFEGetQuadrature(fem, &quad);CHKERRQ(ierr);
1056   ierr = PetscQuadratureGetData(quad, NULL, NULL, &Nq, NULL, NULL);CHKERRQ(ierr);
1057   if (Nq >LANDAU_MAX_NQ) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"Order too high. Nq = %D > LANDAU_MAX_NQ (%D)",Nq,LANDAU_MAX_NQ);
1058   ierr = PetscDSGetDimensions(prob, &Nb);CHKERRQ(ierr);
1059   if (type==4) {
1060     for (c = cStart; c < cEnd; c++) {
1061       ierr = DMLabelSetValue(adaptLabel, c, DM_ADAPT_REFINE);CHKERRQ(ierr);
1062     }
1063     ierr = PetscInfo1(sol, "Phase:%s: Uniform refinement\n","adaptToleranceFEM");CHKERRQ(ierr);
1064   } else if (type==2) {
1065     PetscInt  rCellIdx[8], eCellIdx[64], iCellIdx[64], eMaxIdx = -1, iMaxIdx = -1, nr = 0, nrmax = (dim==3) ? 8 : 2;
1066     PetscReal minRad = PETSC_INFINITY, r, eMinRad = PETSC_INFINITY, iMinRad = PETSC_INFINITY;
1067     for (c = 0; c < 64; c++) { eCellIdx[c] = iCellIdx[c] = -1; }
1068     for (c = cStart; c < cEnd; c++) {
1069       PetscReal    tt, v0[LANDAU_MAX_NQ*3], detJ[LANDAU_MAX_NQ];
1070       ierr = DMPlexComputeCellGeometryFEM(plex, c, quad, v0, NULL, NULL, detJ);CHKERRQ(ierr);
1071       for (qj = 0; qj < Nq; ++qj) {
1072         tt = PetscSqr(v0[dim*qj+0]) + PetscSqr(v0[dim*qj+1]) + PetscSqr(((dim==3) ? v0[dim*qj+2] : 0));
1073         r = PetscSqrtReal(tt);
1074         if (r < minRad - PETSC_SQRT_MACHINE_EPSILON*10.) {
1075           minRad = r;
1076           nr = 0;
1077           rCellIdx[nr++]= c;
1078           ierr = PetscInfo4(sol, "\t\tPhase: adaptToleranceFEM Found first inner r=%e, cell %D, qp %D/%D\n", r, c, qj+1, Nq);CHKERRQ(ierr);
1079         } else if ((r-minRad) < PETSC_SQRT_MACHINE_EPSILON*100. && nr < nrmax) {
1080           for (k=0;k<nr;k++) if (c == rCellIdx[k]) break;
1081           if (k==nr) {
1082             rCellIdx[nr++]= c;
1083             ierr = PetscInfo5(sol, "\t\t\tPhase: adaptToleranceFEM Found another inner r=%e, cell %D, qp %D/%D, d=%e\n", r, c, qj+1, Nq, r-minRad);CHKERRQ(ierr);
1084           }
1085         }
1086         if (ctx->sphere) {
1087           if ((tt=r-ctx->e_radius) > 0) {
1088             PetscInfo2(sol, "\t\t\t %D cell r=%g\n",c,tt);
1089             if (tt < eMinRad - PETSC_SQRT_MACHINE_EPSILON*100.) {
1090               eMinRad = tt;
1091               eMaxIdx = 0;
1092               eCellIdx[eMaxIdx++] = c;
1093             } else if (eMaxIdx > 0 && (tt-eMinRad) <= PETSC_SQRT_MACHINE_EPSILON && c != eCellIdx[eMaxIdx-1]) {
1094               eCellIdx[eMaxIdx++] = c;
1095             }
1096           }
1097           if ((tt=r-ctx->i_radius) > 0) {
1098             if (tt < iMinRad - 1.e-5) {
1099               iMinRad = tt;
1100               iMaxIdx = 0;
1101               iCellIdx[iMaxIdx++] = c;
1102             } else if (iMaxIdx > 0 && (tt-iMinRad) <= PETSC_SQRT_MACHINE_EPSILON && c != iCellIdx[iMaxIdx-1]) {
1103               iCellIdx[iMaxIdx++] = c;
1104             }
1105           }
1106         }
1107       }
1108     }
1109     for (k=0;k<nr;k++) {
1110       ierr = DMLabelSetValue(adaptLabel, rCellIdx[k], DM_ADAPT_REFINE);CHKERRQ(ierr);
1111     }
1112     if (ctx->sphere) {
1113       for (c = 0; c < eMaxIdx; c++) {
1114         ierr = DMLabelSetValue(adaptLabel, eCellIdx[c], DM_ADAPT_REFINE);CHKERRQ(ierr);
1115         ierr = PetscInfo3(sol, "\t\tPhase:%s: refine sphere e cell %D r=%g\n","adaptToleranceFEM",eCellIdx[c],eMinRad);
1116       }
1117       for (c = 0; c < iMaxIdx; c++) {
1118         ierr = DMLabelSetValue(adaptLabel, iCellIdx[c], DM_ADAPT_REFINE);CHKERRQ(ierr);
1119         ierr = PetscInfo3(sol, "\t\tPhase:%s: refine sphere i cell %D r=%g\n","adaptToleranceFEM",iCellIdx[c],iMinRad);
1120       }
1121     }
1122     ierr = PetscInfo4(sol, "Phase:%s: Adaptive refine origin cells %D,%D r=%g\n","adaptToleranceFEM",rCellIdx[0],rCellIdx[1],minRad);
1123   } else if (type==0 || type==1 || type==3) { /* refine along r=0 axis */
1124     PetscScalar  *coef = NULL;
1125     Vec          coords;
1126     PetscInt     csize,Nv,d,nz;
1127     DM           cdm;
1128     PetscSection cs;
1129     ierr = DMGetCoordinatesLocal(dm, &coords);CHKERRQ(ierr);
1130     ierr = DMGetCoordinateDM(dm, &cdm);CHKERRQ(ierr);
1131     ierr = DMGetLocalSection(cdm, &cs);CHKERRQ(ierr);
1132     for (c = cStart; c < cEnd; c++) {
1133       PetscInt doit = 0, outside = 0;
1134       ierr = DMPlexVecGetClosure(cdm, cs, coords, c, &csize, &coef);CHKERRQ(ierr);
1135       Nv = csize/dim;
1136       for (nz = d = 0; d < Nv; d++) {
1137         PetscReal z = PetscRealPart(coef[d*dim + (dim-1)]), x = PetscSqr(PetscRealPart(coef[d*dim + 0])) + ((dim==3) ? PetscSqr(PetscRealPart(coef[d*dim + 1])) : 0);
1138         x = PetscSqrtReal(x);
1139         if (x < PETSC_MACHINE_EPSILON*10. && PetscAbsReal(z)<PETSC_MACHINE_EPSILON*10.) doit = 1;             /* refine origin */
1140         else if (type==0 && (z < -PETSC_MACHINE_EPSILON*10. || z > ctx->re_radius+PETSC_MACHINE_EPSILON*10.)) outside++;   /* first pass don't refine bottom */
1141         else if (type==1 && (z > ctx->vperp0_radius1 || z < -ctx->vperp0_radius1)) outside++; /* don't refine outside electron refine radius */
1142         else if (type==3 && (z > ctx->vperp0_radius2 || z < -ctx->vperp0_radius2)) outside++; /* don't refine outside ion refine radius */
1143         if (x < PETSC_MACHINE_EPSILON*10.) nz++;
1144       }
1145       ierr = DMPlexVecRestoreClosure(cdm, cs, coords, c, &csize, &coef);CHKERRQ(ierr);
1146       if (doit || (outside<Nv && nz)) {
1147         ierr = DMLabelSetValue(adaptLabel, c, DM_ADAPT_REFINE);CHKERRQ(ierr);
1148       }
1149     }
1150     ierr = PetscInfo1(sol, "Phase:%s: RE refinement\n","adaptToleranceFEM");
1151   }
1152   /* ierr = VecDestroy(&locX);CHKERRQ(ierr); */
1153   ierr = DMDestroy(&plex);CHKERRQ(ierr);
1154   ierr = DMAdaptLabel(dm, adaptLabel, &adaptedDM);CHKERRQ(ierr);
1155   ierr = DMLabelDestroy(&adaptLabel);CHKERRQ(ierr);
1156   *newDM = adaptedDM;
1157   if (adaptedDM) {
1158     if (isForest) {
1159       ierr = DMForestSetAdaptivityForest(adaptedDM,NULL);CHKERRQ(ierr);
1160     }
1161     ierr = DMConvert(adaptedDM, DMPLEX, &plex);CHKERRQ(ierr);
1162     ierr = DMPlexGetHeightStratum(plex,0,&cStart,&cEnd);CHKERRQ(ierr);
1163     ierr = PetscInfo2(sol, "\tPhase: adaptToleranceFEM: %D cells, %d total quadrature points\n",cEnd-cStart,Nq*(cEnd-cStart));CHKERRQ(ierr);
1164     ierr = DMDestroy(&plex);CHKERRQ(ierr);
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 static PetscErrorCode adapt(DM *dm, LandauCtx *ctx, Vec *uu)
1170 {
1171   PetscErrorCode  ierr;
1172   PetscInt        type, limits[5] = {ctx->numRERefine,ctx->nZRefine1,ctx->maxRefIts,ctx->nZRefine2,ctx->postAMRRefine};
1173   PetscInt        adaptIter;
1174 
1175   PetscFunctionBegin;
1176   for (type=0;type<5;type++) {
1177     for (adaptIter = 0; adaptIter<limits[type];adaptIter++) {
1178       DM  dmNew = NULL;
1179       ierr = adaptToleranceFEM(ctx->fe[0], *uu, ctx->refineTol, ctx->coarsenTol, type, ctx, &dmNew);CHKERRQ(ierr);
1180       if (!dmNew) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"should not happen");
1181       else {
1182         ierr = DMDestroy(dm);CHKERRQ(ierr);
1183         ierr = VecDestroy(uu);CHKERRQ(ierr);
1184         ierr = DMCreateGlobalVector(dmNew,uu);CHKERRQ(ierr);
1185         ierr = PetscObjectSetName((PetscObject) *uu, "u");CHKERRQ(ierr);
1186         ierr = LandauSetInitialCondition(dmNew, *uu, ctx);CHKERRQ(ierr);
1187         *dm = dmNew;
1188       }
1189     }
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode ProcessOptions(LandauCtx *ctx, const char prefix[])
1195 {
1196   PetscErrorCode    ierr;
1197   PetscBool         flg, sph_flg;
1198   PetscInt          ii,nt,nm,nc;
1199   DM                dummy;
1200 
1201   PetscFunctionBegin;
1202   ierr = DMCreate(PETSC_COMM_WORLD,&dummy);CHKERRQ(ierr);
1203   /* get options - initialize context */
1204   ctx->normJ = 0;
1205   ctx->verbose = 1;
1206   ctx->interpolate = PETSC_TRUE;
1207   ctx->gpu_assembly = PETSC_TRUE;
1208   ctx->sphere = PETSC_FALSE;
1209   ctx->inflate = PETSC_FALSE;
1210   ctx->electronShift = 0;
1211   ctx->errorIndicator = NULL;
1212   ctx->radius = 5.; /* electron thermal radius (velocity) */
1213   ctx->re_radius = 0.;
1214   ctx->vperp0_radius1 = 0;
1215   ctx->vperp0_radius2 = 0;
1216   ctx->e_radius = .1;
1217   ctx->i_radius = .01;
1218   ctx->maxRefIts = 5;
1219   ctx->postAMRRefine = 0;
1220   ctx->nZRefine1 = 0;
1221   ctx->nZRefine2 = 0;
1222   ctx->numRERefine = 0;
1223   ctx->num_sections = 3; /* 2, 3 or 4 */
1224   /* species - [0] electrons, [1] one ion species eg, duetarium, [2] heavy impurity ion, ... */
1225   ctx->charges[0] = -1;  /* electron charge (MKS) */
1226   ctx->masses[0] = 1/1835.5; /* temporary value in proton mass */
1227   ctx->n[0] = 1;
1228   ctx->thermal_temps[0] = 1;
1229   /* constants, etc. */
1230   ctx->epsilon0 = 8.8542e-12; /* permittivity of free space (MKS) F/m */
1231   ctx->k = 1.38064852e-23; /* Boltzmann constant (MKS) J/K */
1232   ctx->lnLam = 10;         /* cross section ratio large - small angle collisions */
1233   ctx->n_0 = 1.e20;        /* typical plasma n, but could set it to 1 */
1234   ctx->Ez = 0;
1235   ctx->v_0 = 1; /* in electron thermal velocity */
1236   ctx->subThreadBlockSize = 1; /* for device and maybe OMP */
1237   ierr = PetscOptionsBegin(PETSC_COMM_WORLD, prefix, "Options for Fokker-Plank-Landau collision operator", "none");CHKERRQ(ierr);
1238   {
1239     char opstring[256];
1240 #if defined(PETSC_HAVE_KOKKOS)
1241     ctx->deviceType = LANDAU_KOKKOS;
1242     ierr = PetscStrcpy(opstring,"kokkos");CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_CUDA)
1244     ctx->subThreadBlockSize = 8;
1245 #else
1246     ctx->subThreadBlockSize = 0;
1247 #endif
1248 #elif defined(PETSC_HAVE_CUDA)
1249     ctx->deviceType = LANDAU_CUDA;
1250     ierr = PetscStrcpy(opstring,"cuda");CHKERRQ(ierr);
1251 #else
1252     ctx->deviceType = LANDAU_CPU;
1253     ierr = PetscStrcpy(opstring,"cpu");CHKERRQ(ierr);
1254     ctx->subThreadBlockSize = 0;
1255 #endif
1256     ierr = PetscOptionsString("-dm_landau_device_type","Use kernels on 'cpu', 'cuda', or 'kokkos'","plexland.c",opstring,opstring,256,NULL);CHKERRQ(ierr);
1257     ierr = PetscStrcmp("cpu",opstring,&flg);CHKERRQ(ierr);
1258     if (flg) {
1259       ctx->deviceType = LANDAU_CPU;
1260       ctx->subThreadBlockSize = 0;
1261     } else {
1262       ierr = PetscStrcmp("cuda",opstring,&flg);CHKERRQ(ierr);
1263       if (flg) {
1264         ctx->deviceType = LANDAU_CUDA;
1265         ctx->subThreadBlockSize = 0;
1266       } else {
1267         ierr = PetscStrcmp("kokkos",opstring,&flg);CHKERRQ(ierr);
1268         if (flg) ctx->deviceType = LANDAU_KOKKOS;
1269         else SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"-dm_landau_device_type %s",opstring);
1270       }
1271     }
1272   }
1273   ierr = PetscOptionsBool("-dm_landau_gpu_assembly", "Assemble Jacobian on GPU", "plexland.c", ctx->gpu_assembly, &ctx->gpu_assembly, NULL);CHKERRQ(ierr);
1274   ierr = PetscOptionsReal("-dm_landau_electron_shift","Shift in thermal velocity of electrons","none",ctx->electronShift,&ctx->electronShift, NULL);CHKERRQ(ierr);
1275   ierr = PetscOptionsBool("-dm_landau_sphere", "use sphere/semi-circle domain instead of rectangle", "plexland.c", ctx->sphere, &ctx->sphere, &sph_flg);CHKERRQ(ierr);
1276   ierr = PetscOptionsBool("-dm_landau_inflate", "With sphere, inflate for curved edges (no AMR)", "plexland.c", ctx->inflate, &ctx->inflate, NULL);CHKERRQ(ierr);
1277   ierr = PetscOptionsInt("-dm_landau_amr_re_levels", "Number of levels to refine along v_perp=0, z>0", "plexland.c", ctx->numRERefine, &ctx->numRERefine, NULL);CHKERRQ(ierr);
1278   ierr = PetscOptionsInt("-dm_landau_amr_z_refine1",  "Number of levels to refine along v_perp=0", "plexland.c", ctx->nZRefine1, &ctx->nZRefine1, NULL);CHKERRQ(ierr);
1279   ierr = PetscOptionsInt("-dm_landau_amr_z_refine2",  "Number of levels to refine along v_perp=0", "plexland.c", ctx->nZRefine2, &ctx->nZRefine2, NULL);CHKERRQ(ierr);
1280   ierr = PetscOptionsInt("-dm_landau_amr_levels_max", "Number of AMR levels of refinement around origin after r=0 refinements", "plexland.c", ctx->maxRefIts, &ctx->maxRefIts, NULL);CHKERRQ(ierr);
1281   ierr = PetscOptionsInt("-dm_landau_amr_post_refine", "Number of levels to uniformly refine after AMR", "plexland.c", ctx->postAMRRefine, &ctx->postAMRRefine, NULL);CHKERRQ(ierr);
1282   ierr = PetscOptionsInt("-dm_landau_verbose", "", "plexland.c", ctx->verbose, &ctx->verbose, NULL);CHKERRQ(ierr);
1283   ierr = PetscOptionsReal("-dm_landau_re_radius","velocity range to refine on positive (z>0) r=0 axis for runaways","plexland.c",ctx->re_radius,&ctx->re_radius, &flg);CHKERRQ(ierr);
1284   ierr = PetscOptionsReal("-dm_landau_z_radius1","velocity range to refine r=0 axis (for electrons)","plexland.c",ctx->vperp0_radius1,&ctx->vperp0_radius1, &flg);CHKERRQ(ierr);
1285   ierr = PetscOptionsReal("-dm_landau_z_radius2","velocity range to refine r=0 axis (for ions) after origin AMR","plexland.c",ctx->vperp0_radius2,&ctx->vperp0_radius2, &flg);CHKERRQ(ierr);
1286   ierr = PetscOptionsReal("-dm_landau_Ez","Initial parallel electric field in unites of Conner-Hastie criticle field","plexland.c",ctx->Ez,&ctx->Ez, NULL);CHKERRQ(ierr);
1287   ierr = PetscOptionsReal("-dm_landau_n_0","Normalization constant for number density","plexland.c",ctx->n_0,&ctx->n_0, NULL);CHKERRQ(ierr);
1288   ierr = PetscOptionsReal("-dm_landau_ln_lambda","Cross section parameter","plexland.c",ctx->lnLam,&ctx->lnLam, NULL);CHKERRQ(ierr);
1289   ierr = PetscOptionsInt("-dm_landau_num_sections", "Number of tangential section in (2D) grid, 2, 3, of 4", "plexland.c", ctx->num_sections, &ctx->num_sections, NULL);CHKERRQ(ierr);
1290 
1291   /* get num species with tempurature*/
1292   {
1293     PetscReal arr[100];
1294     nt = 100;
1295     ierr = PetscOptionsRealArray("-dm_landau_thermal_temps", "Temperature of each species [e,i_0,i_1,...] in keV", "plexland.c", arr, &nt, &flg);CHKERRQ(ierr);
1296     if (flg && nt > LANDAU_MAX_SPECIES) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"-thermal_temps ,t1,t2,.. number of species %D > MAX %D",nt,LANDAU_MAX_SPECIES);
1297   }
1298   nt = LANDAU_MAX_SPECIES;
1299   for (ii=1;ii<LANDAU_MAX_SPECIES;ii++) {
1300     ctx->thermal_temps[ii] = 1.;
1301     ctx->charges[ii] = 1;
1302     ctx->masses[ii] = 1;
1303     ctx->n[ii] = (ii==1) ? 1 : 0;
1304   }
1305   ierr = PetscOptionsRealArray("-dm_landau_thermal_temps", "Temperature of each species [e,i_0,i_1,...] in keV (must be set to set number of species)", "plexland.c", ctx->thermal_temps, &nt, &flg);CHKERRQ(ierr);
1306   if (flg) {
1307     PetscInfo1(dummy, "num_species set to number of thermal temps provided (%D)\n",nt);
1308     ctx->num_species = nt;
1309   } else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"-dm_landau_thermal_temps ,t1,t2,.. must be provided to set the number of species");
1310   for (ii=0;ii<ctx->num_species;ii++) ctx->thermal_temps[ii] *= 1.1604525e7; /* convert to Kelvin */
1311   nm = LANDAU_MAX_SPECIES-1;
1312   ierr = PetscOptionsRealArray("-dm_landau_ion_masses", "Mass of each species in units of proton mass [i_0=2,i_1=40...]", "plexland.c", &ctx->masses[1], &nm, &flg);CHKERRQ(ierr);
1313   if (flg && nm != ctx->num_species-1) {
1314     SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"num ion masses %D != num species %D",nm,ctx->num_species-1);
1315   }
1316   nm = LANDAU_MAX_SPECIES;
1317   ierr = PetscOptionsRealArray("-dm_landau_n", "Normalized (by -n_0) number density of each species", "plexland.c", ctx->n, &nm, &flg);CHKERRQ(ierr);
1318   if (flg && nm != ctx->num_species) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"wrong num n: %D != num species %D",nm,ctx->num_species);
1319   ctx->n_0 *= ctx->n[0]; /* normalized number density */
1320   for (ii=1;ii<ctx->num_species;ii++) ctx->n[ii] = ctx->n[ii]/ctx->n[0];
1321   ctx->n[0] = 1;
1322   for (ii=0;ii<LANDAU_MAX_SPECIES;ii++) ctx->masses[ii] *= 1.6720e-27; /* scale by proton mass kg */
1323   ctx->masses[0] = 9.10938356e-31; /* electron mass kg (should be about right already) */
1324   ctx->m_0 = ctx->masses[0]; /* arbitrary reference mass, electrons */
1325   ierr = PetscOptionsReal("-dm_landau_v_0","Velocity to normalize with in units of initial electrons thermal velocity (not recommended to change default)","plexland.c",ctx->v_0,&ctx->v_0, NULL);CHKERRQ(ierr);
1326   ctx->v_0 *= PetscSqrtReal(ctx->k*ctx->thermal_temps[0]/(ctx->masses[0])); /* electron mean velocity in 1D (need 3D form in computing T from FE integral) */
1327   nc = LANDAU_MAX_SPECIES-1;
1328   ierr = PetscOptionsRealArray("-dm_landau_ion_charges", "Charge of each species in units of proton charge [i_0=2,i_1=18,...]", "plexland.c", &ctx->charges[1], &nc, &flg);CHKERRQ(ierr);
1329   if (flg && nc != ctx->num_species-1) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"num charges %D != num species %D",nc,ctx->num_species-1);
1330   for (ii=0;ii<LANDAU_MAX_SPECIES;ii++) ctx->charges[ii] *= 1.6022e-19; /* electron/proton charge (MKS) */
1331   ctx->t_0 = 8*PETSC_PI*PetscSqr(ctx->epsilon0*ctx->m_0/PetscSqr(ctx->charges[0]))/ctx->lnLam/ctx->n_0*PetscPowReal(ctx->v_0,3); /* note, this t_0 makes nu[0,0]=1 */
1332   /* geometry */
1333   for (ii=0;ii<ctx->num_species;ii++) ctx->refineTol[ii]  = PETSC_MAX_REAL;
1334   for (ii=0;ii<ctx->num_species;ii++) ctx->coarsenTol[ii] = 0.;
1335   ii = LANDAU_MAX_SPECIES;
1336   ierr = PetscOptionsRealArray("-dm_landau_refine_tol","tolerance for refining cells in AMR","plexland.c",ctx->refineTol, &ii, &flg);CHKERRQ(ierr);
1337   if (flg && ii != ctx->num_species) ierr = PetscInfo2(dummy, "Phase: Warning, #refine_tol %D != num_species %D\n",ii,ctx->num_species);CHKERRQ(ierr);
1338   ii = LANDAU_MAX_SPECIES;
1339   ierr = PetscOptionsRealArray("-dm_landau_coarsen_tol","tolerance for coarsening cells in AMR","plexland.c",ctx->coarsenTol, &ii, &flg);CHKERRQ(ierr);
1340   if (flg && ii != ctx->num_species) ierr = PetscInfo2(dummy, "Phase: Warning, #coarsen_tol %D != num_species %D\n",ii,ctx->num_species);CHKERRQ(ierr);
1341   ierr = PetscOptionsReal("-dm_landau_domain_radius","Phase space size in units of electron thermal velocity","plexland.c",ctx->radius,&ctx->radius, &flg);CHKERRQ(ierr);
1342   if (flg && ctx->radius <= 0) { /* negative is ratio of c */
1343     if (ctx->radius == 0) ctx->radius = 0.75;
1344     else ctx->radius = -ctx->radius;
1345     ctx->radius = ctx->radius*299792458/ctx->v_0;
1346     ierr = PetscInfo1(dummy, "Change domain radius to %e\n",ctx->radius);CHKERRQ(ierr);
1347   }
1348   ierr = PetscOptionsReal("-dm_landau_i_radius","Ion thermal velocity, used for circular meshes","plexland.c",ctx->i_radius,&ctx->i_radius, &flg);CHKERRQ(ierr);
1349   if (flg && !sph_flg) ctx->sphere = PETSC_TRUE; /* you gave me an ion radius but did not set sphere, user error really */
1350   if (!flg) {
1351     ctx->i_radius = 1.5*PetscSqrtReal(8*ctx->k*ctx->thermal_temps[1]/ctx->masses[1]/PETSC_PI)/ctx->v_0; /* normalized radius with thermal velocity of first ion */
1352   }
1353   ierr = PetscOptionsReal("-dm_landau_e_radius","Electron thermal velocity, used for circular meshes","plexland.c",ctx->e_radius,&ctx->e_radius, &flg);CHKERRQ(ierr);
1354   if (flg && !sph_flg) ctx->sphere = PETSC_TRUE; /* you gave me an e radius but did not set sphere, user error really */
1355   if (!flg) {
1356     ctx->e_radius = 1.5*PetscSqrtReal(8*ctx->k*ctx->thermal_temps[0]/ctx->masses[0]/PETSC_PI)/ctx->v_0; /* normalized radius with thermal velocity of electrons */
1357   }
1358   if (ctx->sphere && (ctx->e_radius <= ctx->i_radius || ctx->radius <= ctx->e_radius)) SETERRQ3(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"bad radii: %g < %g < %g",ctx->i_radius,ctx->e_radius,ctx->radius);
1359   ierr = PetscOptionsInt("-dm_landau_sub_thread_block_size", "Number of threads in Kokkos integration point subblock", "plexland.c", ctx->subThreadBlockSize, &ctx->subThreadBlockSize, NULL);CHKERRQ(ierr);
1360   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1361   for (ii=ctx->num_species;ii<LANDAU_MAX_SPECIES;ii++) ctx->masses[ii] = ctx->thermal_temps[ii]  = ctx->charges[ii] = 0;
1362   if (ctx->verbose > 0) {
1363     ierr = PetscPrintf(PETSC_COMM_WORLD, "masses:        e=%10.3e; ions in proton mass units:   %10.3e %10.3e ...\n",ctx->masses[0],ctx->masses[1]/1.6720e-27,ctx->num_species>2 ? ctx->masses[2]/1.6720e-27 : 0);CHKERRQ(ierr);
1364     ierr = PetscPrintf(PETSC_COMM_WORLD, "charges:       e=%10.3e; charges in elementary units: %10.3e %10.3e\n", ctx->charges[0],-ctx->charges[1]/ctx->charges[0],ctx->num_species>2 ? -ctx->charges[2]/ctx->charges[0] : 0);CHKERRQ(ierr);
1365     ierr = PetscPrintf(PETSC_COMM_WORLD, "thermal T (K): e=%10.3e i=%10.3e imp=%10.3e. v_0=%10.3e n_0=%10.3e t_0=%10.3e domain=%10.3e\n",ctx->thermal_temps[0],ctx->thermal_temps[1],ctx->num_species>2 ? ctx->thermal_temps[2] : 0,ctx->v_0,ctx->n_0,ctx->t_0,ctx->radius);CHKERRQ(ierr);
1366   }
1367   ierr = DMDestroy(&dummy);CHKERRQ(ierr);
1368   {
1369     PetscMPIInt    rank;
1370     ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRMPI(ierr);
1371     /* PetscLogStage  setup_stage; */
1372     ierr = PetscLogEventRegister("Landau Operator", DM_CLASSID, &ctx->events[0]);CHKERRQ(ierr); /* 0 */
1373     ierr = PetscLogEventRegister(" Jac-vector", DM_CLASSID, &ctx->events[1]);CHKERRQ(ierr); /* 1 */
1374     ierr = PetscLogEventRegister(" Jac-kern-init", DM_CLASSID, &ctx->events[3]);CHKERRQ(ierr); /* 3 */
1375     ierr = PetscLogEventRegister(" Jac-kernel", DM_CLASSID, &ctx->events[4]);CHKERRQ(ierr); /* 4 */
1376     ierr = PetscLogEventRegister(" Jac-kernel-post", DM_CLASSID, &ctx->events[5]);CHKERRQ(ierr); /* 5 */
1377     ierr = PetscLogEventRegister(" Jac-assemble", DM_CLASSID, &ctx->events[6]);CHKERRQ(ierr); /* 6 */
1378     ierr = PetscLogEventRegister(" Jac-end", DM_CLASSID, &ctx->events[7]);CHKERRQ(ierr); /* 7 */
1379     ierr = PetscLogEventRegister(" Jac-f-df", DM_CLASSID, &ctx->events[8]);CHKERRQ(ierr); /* 8 */
1380     ierr = PetscLogEventRegister(" Jac asmbl setup", DM_CLASSID, &ctx->events[2]);CHKERRQ(ierr); /* 2 */
1381     ierr = PetscLogEventRegister("Landau Jacobian", DM_CLASSID, &ctx->events[9]);CHKERRQ(ierr); /* 9 */
1382     if (rank) { /* turn off output stuff for duplicate runs - do we need to add the prefix to all this? */
1383       ierr = PetscOptionsClearValue(NULL,"-snes_converged_reason");CHKERRQ(ierr);
1384       ierr = PetscOptionsClearValue(NULL,"-ksp_converged_reason");CHKERRQ(ierr);
1385       ierr = PetscOptionsClearValue(NULL,"-snes_monitor");CHKERRQ(ierr);
1386       ierr = PetscOptionsClearValue(NULL,"-ksp_monitor");CHKERRQ(ierr);
1387       ierr = PetscOptionsClearValue(NULL,"-ts_monitor");CHKERRQ(ierr);
1388       ierr = PetscOptionsClearValue(NULL,"-ts_adapt_monitor");CHKERRQ(ierr);
1389       ierr = PetscOptionsClearValue(NULL,"-dm_landau_amr_dm_view");CHKERRQ(ierr);
1390       ierr = PetscOptionsClearValue(NULL,"-dm_landau_amr_vec_view");CHKERRQ(ierr);
1391       ierr = PetscOptionsClearValue(NULL,"-dm_landau_pre_dm_view");CHKERRQ(ierr);
1392       ierr = PetscOptionsClearValue(NULL,"-dm_landau_pre_vec_view");CHKERRQ(ierr);
1393       ierr = PetscOptionsClearValue(NULL,"-info");CHKERRQ(ierr);
1394     }
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 /*@C
1400   LandauCreateVelocitySpace - Create a DMPlex velocity space mesh
1401 
1402   Collective on comm
1403 
1404   Input Parameters:
1405 +   comm  - The MPI communicator
1406 .   dim - velocity space dimension (2 for axisymmetric, 3 for full 3X + 3V solver)
1407 -   prefix - prefix for options
1408 
1409   Output Parameter:
1410 .   dm  - The DM object representing the mesh
1411 +   X - A vector (user destroys)
1412 -   J - Optional matrix (object destroys)
1413 
1414   Level: beginner
1415 
1416 .keywords: mesh
1417 .seealso: DMPlexCreate(), LandauDestroyVelocitySpace()
1418 @*/
1419 PetscErrorCode LandauCreateVelocitySpace(MPI_Comm comm, PetscInt dim, const char prefix[], Vec *X, Mat *J, DM *dm)
1420 {
1421   PetscMPIInt    size;
1422   PetscErrorCode ierr;
1423   LandauCtx      *ctx;
1424   PetscBool      prealloc_only,flg;
1425 
1426   PetscFunctionBegin;
1427   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1428   if (size!=1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Velocity space meshes should be serial (but should work in parallel)");
1429   if (dim!=2 && dim!=3) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Only 2D and 3D supported");
1430   ctx = (LandauCtx*)malloc(sizeof(LandauCtx));
1431   /* process options */
1432   ierr = ProcessOptions(ctx,prefix);CHKERRQ(ierr);
1433   /* Create Mesh */
1434   ierr = LandauDMCreateVMesh(comm, dim, prefix, ctx, dm);CHKERRQ(ierr);
1435   prealloc_only = (*dm)->prealloc_only;
1436   ierr = DMViewFromOptions(*dm,NULL,"-dm_landau_pre_dm_view");CHKERRQ(ierr);
1437   ierr = DMSetApplicationContext(*dm, ctx);CHKERRQ(ierr);
1438   /* create FEM */
1439   ierr = SetupDS(*dm,dim,ctx);CHKERRQ(ierr);
1440   /* set initial state */
1441   ierr = DMCreateGlobalVector(*dm,X);CHKERRQ(ierr);
1442   ierr = PetscObjectSetName((PetscObject) *X, "u");CHKERRQ(ierr);
1443   /* initial static refinement, no solve */
1444   ierr = LandauSetInitialCondition(*dm, *X, ctx);CHKERRQ(ierr);
1445   ierr = VecViewFromOptions(*X, NULL, "-dm_landau_pre_vec_view");CHKERRQ(ierr);
1446   /* forest refinement */
1447   if (ctx->errorIndicator) {
1448     /* AMR */
1449     ierr = adapt(dm,ctx,X);CHKERRQ(ierr);
1450     if ((*dm)->prealloc_only != prealloc_only) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"(*dm)->prealloc_only != prealloc_only");
1451     ierr = DMViewFromOptions(*dm,NULL,"-dm_landau_amr_dm_view");CHKERRQ(ierr);
1452     ierr = VecViewFromOptions(*X, NULL, "-dm_landau_amr_vec_view");CHKERRQ(ierr);
1453   }
1454   ierr = DMSetApplicationContext(*dm, ctx);CHKERRQ(ierr);
1455   ctx->dmv = *dm;
1456   if (ctx->dmv->prealloc_only != prealloc_only) SETERRQ(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"ctx->dmv->prealloc_only != prealloc_only");
1457   ierr = DMCreateMatrix(ctx->dmv, &ctx->J);CHKERRQ(ierr);
1458   ierr = MatSetOption(ctx->J, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE);CHKERRQ(ierr);
1459   if (J) *J = ctx->J;
1460   /* check for types that we need */
1461 #if defined(PETSC_HAVE_KOKKOS)
1462   if (ctx->deviceType == LANDAU_CPU) {
1463     ierr = PetscObjectTypeCompareAny((PetscObject)ctx->J,&flg,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,MATAIJKOKKOS,"");CHKERRQ(ierr);
1464     if (flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"with device=cpu must not use '-dm_mat_type aijkokkos -dm_vec_type kokkos' for GPU assembly and Kokkos");
1465   }
1466 #elif defined(PETSC_HAVE_CUDA)
1467   if (ctx->deviceType == LANDAU_CPU) {
1468     ierr = PetscObjectTypeCompareAny((PetscObject)ctx->J,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
1469     if (flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"with device=cpu must not use '-dm_mat_type aijcusparse -dm_vec_type cuda' for GPU assembly and Cuda");
1470   }
1471 #endif
1472   if (ctx->gpu_assembly) { /* we need GPU object with GPU assembly */
1473     if (ctx->deviceType == LANDAU_CUDA) {
1474       ierr = PetscObjectTypeCompareAny((PetscObject)ctx->J,&flg,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,MATAIJCUSPARSE,"");CHKERRQ(ierr);
1475       if (!flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"must use '-dm_mat_type aijcusparse -dm_vec_type cuda' for GPU assembly and Cuda");
1476     } else if (ctx->deviceType == LANDAU_KOKKOS) {
1477       ierr = PetscObjectTypeCompareAny((PetscObject)ctx->J,&flg,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,MATAIJKOKKOS,"");CHKERRQ(ierr);
1478 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1479       if (!flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"must use '-dm_mat_type aijkokkos -dm_vec_type kokkos' for GPU assembly and Kokkos");
1480 #else
1481       if (!flg) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_WRONG,"must configure with '--download-kokkos-kernels=1' for GPU assembly and Kokkos");
1482 #endif
1483     }
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 /*@
1489   LandauDestroyVelocitySpace - Destroy a DMPlex velocity space mesh
1490 
1491   Collective on dm
1492 
1493   Input/Output Parameters:
1494   .   dm - the dm to destroy
1495 
1496   Level: beginner
1497 
1498 .keywords: mesh
1499 .seealso: LandauCreateVelocitySpace()
1500 @*/
1501 PetscErrorCode LandauDestroyVelocitySpace(DM *dm)
1502 {
1503   PetscErrorCode ierr,ii;
1504   LandauCtx      *ctx;
1505   PetscContainer container = NULL;
1506   PetscFunctionBegin;
1507   ierr = DMGetApplicationContext(*dm, &ctx);CHKERRQ(ierr);
1508   ierr = PetscObjectQuery((PetscObject)ctx->J,"coloring", (PetscObject*)&container);CHKERRQ(ierr);
1509   if (container) {
1510     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
1511   }
1512   ierr = MatDestroy(&ctx->M);CHKERRQ(ierr);
1513   ierr = MatDestroy(&ctx->J);CHKERRQ(ierr);
1514   for (ii=0;ii<ctx->num_species;ii++) {
1515     ierr = PetscFEDestroy(&ctx->fe[ii]);CHKERRQ(ierr);
1516   }
1517   free(ctx);
1518   ierr = DMDestroy(dm);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 /* < v, ru > */
1523 static void f0_s_den(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1524                      const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1525                      const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1526                      PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1527 {
1528   PetscInt ii = (PetscInt)PetscRealPart(constants[0]);
1529   f0[0] = u[ii];
1530 }
1531 
1532 /* < v, ru > */
1533 static void f0_s_mom(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1534                     const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1535                     const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1536                     PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1537 {
1538   PetscInt ii = (PetscInt)PetscRealPart(constants[0]), jj = (PetscInt)PetscRealPart(constants[1]);
1539   f0[0] = x[jj]*u[ii]; /* x momentum */
1540 }
1541 
1542 static void f0_s_v2(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1543                     const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1544                     const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1545                     PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1546 {
1547   PetscInt i, ii = (PetscInt)PetscRealPart(constants[0]);
1548   double tmp1 = 0.;
1549   for (i = 0; i < dim; ++i) tmp1 += x[i]*x[i];
1550   f0[0] = tmp1*u[ii];
1551 }
1552 
1553 /* < v, ru > */
1554 static void f0_s_rden(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1555                       const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1556                       const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1557                       PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1558 {
1559   PetscInt ii = (PetscInt)PetscRealPart(constants[0]);
1560   f0[0] = 2.*PETSC_PI*x[0]*u[ii];
1561 }
1562 
1563 /* < v, ru > */
1564 static void f0_s_rmom(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1565                       const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1566                       const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1567                       PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1568 {
1569   PetscInt ii = (PetscInt)PetscRealPart(constants[0]);
1570   f0[0] = 2.*PETSC_PI*x[0]*x[1]*u[ii];
1571 }
1572 
1573 static void f0_s_rv2(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1574                      const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1575                      const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1576                      PetscReal t, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar *f0)
1577 {
1578   PetscInt ii = (PetscInt)PetscRealPart(constants[0]);
1579   f0[0] =  2.*PETSC_PI*x[0]*(x[0]*x[0] + x[1]*x[1])*u[ii];
1580 }
1581 
1582 /*@
1583   LandauPrintNorms - collects moments and prints them
1584 
1585   Collective on dm
1586 
1587   Input Parameters:
1588 +   X  - the state
1589 -   stepi - current step to print
1590 
1591   Level: beginner
1592 
1593 .keywords: mesh
1594 .seealso: LandauCreateVelocitySpace()
1595 @*/
1596 PetscErrorCode LandauPrintNorms(Vec X, PetscInt stepi)
1597 {
1598   PetscErrorCode ierr;
1599   LandauCtx      *ctx;
1600   PetscDS        prob;
1601   DM             plex,dm;
1602   PetscInt       cStart, cEnd, dim, ii;
1603   PetscScalar    xmomentumtot=0, ymomentumtot=0, zmomentumtot=0, energytot=0, densitytot=0, tt[LANDAU_MAX_SPECIES];
1604   PetscScalar    xmomentum[LANDAU_MAX_SPECIES],  ymomentum[LANDAU_MAX_SPECIES],  zmomentum[LANDAU_MAX_SPECIES], energy[LANDAU_MAX_SPECIES], density[LANDAU_MAX_SPECIES];
1605 
1606   PetscFunctionBegin;
1607   ierr = VecGetDM(X, &dm);CHKERRQ(ierr);
1608   if (!dm) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "no DM");
1609   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
1610   ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr);
1611   if (!ctx) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "no context");
1612   ierr = DMConvert(ctx->dmv, DMPLEX, &plex);CHKERRQ(ierr);
1613   ierr = DMCreateDS(plex);CHKERRQ(ierr);
1614   ierr = DMGetDS(plex, &prob);CHKERRQ(ierr);
1615   /* print momentum and energy */
1616   for (ii=0;ii<ctx->num_species;ii++) {
1617     PetscScalar user[2] = { (PetscScalar)ii, (PetscScalar)ctx->charges[ii]};
1618     ierr = PetscDSSetConstants(prob, 2, user);CHKERRQ(ierr);
1619     if (dim==2) { /* 2/3X + 3V (cylindrical coordinates) */
1620       ierr = PetscDSSetObjective(prob, 0, &f0_s_rden);CHKERRQ(ierr);
1621       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1622       density[ii] = tt[0]*ctx->n_0*ctx->charges[ii];
1623       ierr = PetscDSSetObjective(prob, 0, &f0_s_rmom);CHKERRQ(ierr);
1624       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1625       zmomentum[ii] = tt[0]*ctx->n_0*ctx->v_0*ctx->masses[ii];
1626       ierr = PetscDSSetObjective(prob, 0, &f0_s_rv2);CHKERRQ(ierr);
1627       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1628       energy[ii] = tt[0]*0.5*ctx->n_0*ctx->v_0*ctx->v_0*ctx->masses[ii];
1629       zmomentumtot += zmomentum[ii];
1630       energytot  += energy[ii];
1631       densitytot += density[ii];
1632       ierr = PetscPrintf(PETSC_COMM_WORLD, "%3D) species-%D: charge density= %20.13e z-momentum= %20.13e energy= %20.13e",stepi,ii,PetscRealPart(density[ii]),PetscRealPart(zmomentum[ii]),PetscRealPart(energy[ii]));CHKERRQ(ierr);
1633     } else { /* 2/3X + 3V */
1634       ierr = PetscDSSetObjective(prob, 0, &f0_s_den);CHKERRQ(ierr);
1635       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1636       density[ii] = tt[0]*ctx->n_0*ctx->charges[ii];
1637       ierr = PetscDSSetObjective(prob, 0, &f0_s_mom);CHKERRQ(ierr);
1638       user[1] = 0;
1639       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1640       xmomentum[ii]  = tt[0]*ctx->n_0*ctx->v_0*ctx->masses[ii];
1641       user[1] = 1;
1642       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1643       ymomentum[ii] = tt[0]*ctx->n_0*ctx->v_0*ctx->masses[ii];
1644       user[1] = 2;
1645       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1646       zmomentum[ii] = tt[0]*ctx->n_0*ctx->v_0*ctx->masses[ii];
1647       ierr = PetscDSSetObjective(prob, 0, &f0_s_v2);CHKERRQ(ierr);
1648       ierr = DMPlexComputeIntegralFEM(plex,X,tt,ctx);CHKERRQ(ierr);
1649       energy[ii]    = 0.5*tt[0]*ctx->n_0*ctx->v_0*ctx->v_0*ctx->masses[ii];
1650       ierr = PetscPrintf(PETSC_COMM_WORLD, "%3D) species %D: density=%20.13e, x-momentum=%20.13e, y-momentum=%20.13e, z-momentum=%20.13e, energy=%21.13e",
1651                          stepi,ii,PetscRealPart(density[ii]),PetscRealPart(xmomentum[ii]),PetscRealPart(ymomentum[ii]),PetscRealPart(zmomentum[ii]),PetscRealPart(energy[ii]));CHKERRQ(ierr);
1652       xmomentumtot += xmomentum[ii];
1653       ymomentumtot += ymomentum[ii];
1654       zmomentumtot += zmomentum[ii];
1655       energytot  += energy[ii];
1656       densitytot += density[ii];
1657     }
1658     if (ctx->num_species>1) PetscPrintf(PETSC_COMM_WORLD, "\n");
1659   }
1660   /* totals */
1661   ierr = DMPlexGetHeightStratum(plex,0,&cStart,&cEnd);CHKERRQ(ierr);
1662   ierr = DMDestroy(&plex);CHKERRQ(ierr);
1663   if (ctx->num_species>1) {
1664     if (dim==2) {
1665       ierr = PetscPrintf(PETSC_COMM_WORLD, "\t%3D) Total: charge density=%21.13e, momentum=%21.13e, energy=%21.13e (m_i[0]/m_e = %g, %D cells)",
1666                   stepi,PetscRealPart(densitytot),PetscRealPart(zmomentumtot),PetscRealPart(energytot),ctx->masses[1]/ctx->masses[0],cEnd-cStart);CHKERRQ(ierr);
1667     } else {
1668       ierr = PetscPrintf(PETSC_COMM_WORLD, "\t%3D) Total: charge density=%21.13e, x-momentum=%21.13e, y-momentum=%21.13e, z-momentum=%21.13e, energy=%21.13e (m_i[0]/m_e = %g, %D cells)",
1669                   stepi,PetscRealPart(densitytot),PetscRealPart(xmomentumtot),PetscRealPart(ymomentumtot),PetscRealPart(zmomentumtot),PetscRealPart(energytot),ctx->masses[1]/ctx->masses[0],cEnd-cStart);CHKERRQ(ierr);
1670     }
1671   } else {
1672     ierr = PetscPrintf(PETSC_COMM_WORLD, " -- %D cells",cEnd-cStart);CHKERRQ(ierr);
1673   }
1674   if (ctx->verbose > 1) {ierr = PetscPrintf(PETSC_COMM_WORLD,", %D sub (vector) threads\n",ctx->subThreadBlockSize);CHKERRQ(ierr);}
1675   else {ierr = PetscPrintf(PETSC_COMM_WORLD,"\n");CHKERRQ(ierr);}
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 static PetscErrorCode destroy_coloring (void *is)
1680 {
1681   ISColoring tmp = (ISColoring)is;
1682   return ISColoringDestroy(&tmp);
1683 }
1684 
1685 /*@
1686   LandauCreateColoring - create a coloring and add to matrix (Landau context used just for 'print' flag, should be in DMPlex)
1687 
1688   Collective on JacP
1689 
1690   Input Parameters:
1691 +   JacP  - matrix to add coloring to
1692 -   plex - The DM
1693 
1694   Output Parameter:
1695 .   container  - Container with coloring
1696 
1697   Level: beginner
1698 
1699 .keywords: mesh
1700 .seealso: LandauCreateVelocitySpace()
1701 @*/
1702 PetscErrorCode LandauCreateColoring(Mat JacP, DM plex, PetscContainer *container)
1703 {
1704   PetscErrorCode  ierr;
1705   PetscInt        dim,cell,i,ej,nc,Nv,totDim,numGCells,cStart,cEnd;
1706   ISColoring      iscoloring = NULL;
1707   Mat             G,Q;
1708   PetscScalar     ones[128];
1709   MatColoring     mc;
1710   IS             *is;
1711   PetscInt        csize,colour,j,k;
1712   const PetscInt *indices;
1713   PetscInt       numComp[1];
1714   PetscInt       numDof[4];
1715   PetscFE        fe;
1716   DM             colordm;
1717   PetscSection   csection, section, globalSection;
1718   PetscDS        prob;
1719   LandauCtx      *ctx;
1720 
1721   PetscFunctionBegin;
1722   ierr = DMGetApplicationContext(plex, &ctx);CHKERRQ(ierr);
1723   ierr = DMGetLocalSection(plex, &section);CHKERRQ(ierr);
1724   ierr = DMGetGlobalSection(plex, &globalSection);CHKERRQ(ierr);
1725   ierr = DMGetDimension(plex, &dim);CHKERRQ(ierr);
1726   ierr = DMGetDS(plex, &prob);CHKERRQ(ierr);
1727   ierr = PetscDSGetTotalDimension(prob, &totDim);CHKERRQ(ierr);
1728   ierr = DMPlexGetHeightStratum(plex,0,&cStart,&cEnd);CHKERRQ(ierr);
1729   numGCells = cEnd - cStart;
1730   /* create cell centered DM */
1731   ierr = DMClone(plex, &colordm);CHKERRQ(ierr);
1732   ierr = PetscFECreateDefault(PetscObjectComm((PetscObject) plex), dim, 1, PETSC_FALSE, "color_", PETSC_DECIDE, &fe);CHKERRQ(ierr);
1733   ierr = PetscObjectSetName((PetscObject) fe, "color");CHKERRQ(ierr);
1734   ierr = DMSetField(colordm, 0, NULL, (PetscObject)fe);CHKERRQ(ierr);
1735   ierr = PetscFEDestroy(&fe);CHKERRQ(ierr);
1736   for (i = 0; i < (dim+1); ++i) numDof[i] = 0;
1737   numDof[dim] = 1;
1738   numComp[0] = 1;
1739   ierr = DMPlexCreateSection(colordm, NULL, numComp, numDof, 0, NULL, NULL, NULL, NULL, &csection);CHKERRQ(ierr);
1740   ierr = PetscSectionSetFieldName(csection, 0, "color");CHKERRQ(ierr);
1741   ierr = DMSetLocalSection(colordm, csection);CHKERRQ(ierr);
1742   ierr = DMViewFromOptions(colordm,NULL,"-color_dm_view");CHKERRQ(ierr);
1743   /* get vertex to element map Q and colroing graph G */
1744   ierr = MatGetSize(JacP,NULL,&Nv);CHKERRQ(ierr);
1745   ierr = MatCreateAIJ(PETSC_COMM_SELF,PETSC_DECIDE,PETSC_DECIDE,numGCells,Nv,totDim,NULL,0,NULL,&Q);CHKERRQ(ierr);
1746   for (i=0;i<128;i++) ones[i] = 1.0;
1747   for (cell = cStart, ej = 0 ; cell < cEnd; ++cell, ++ej) {
1748     PetscInt numindices,*indices;
1749     ierr = DMPlexGetClosureIndices(plex, section, globalSection, cell, PETSC_TRUE, &numindices, &indices, NULL, NULL);CHKERRQ(ierr);
1750     if (numindices>128) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "too many indices. %D > %D",numindices,128);
1751     ierr = MatSetValues(Q,1,&ej,numindices,indices,ones,ADD_VALUES);CHKERRQ(ierr);
1752     ierr = DMPlexRestoreClosureIndices(plex, section, globalSection, cell, PETSC_TRUE, &numindices, &indices, NULL, NULL);CHKERRQ(ierr);
1753   }
1754   ierr = MatAssemblyBegin(Q, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1755   ierr = MatAssemblyEnd(Q, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   ierr = MatMatTransposeMult(Q,Q,MAT_INITIAL_MATRIX,4.0,&G);CHKERRQ(ierr);
1757   ierr = PetscObjectSetName((PetscObject) Q, "Q");CHKERRQ(ierr);
1758   ierr = PetscObjectSetName((PetscObject) G, "coloring graph");CHKERRQ(ierr);
1759   ierr = MatViewFromOptions(G,NULL,"-coloring_mat_view");CHKERRQ(ierr);
1760   ierr = MatViewFromOptions(Q,NULL,"-coloring_mat_view");CHKERRQ(ierr);
1761   ierr = MatDestroy(&Q);CHKERRQ(ierr);
1762   /* coloring */
1763   ierr = MatColoringCreate(G,&mc);CHKERRQ(ierr);
1764   ierr = MatColoringSetDistance(mc,1);CHKERRQ(ierr);
1765   ierr = MatColoringSetType(mc,MATCOLORINGJP);CHKERRQ(ierr);
1766   ierr = MatColoringSetFromOptions(mc);CHKERRQ(ierr);
1767   ierr = MatColoringApply(mc,&iscoloring);CHKERRQ(ierr);
1768   ierr = MatColoringDestroy(&mc);CHKERRQ(ierr);
1769   /* view */
1770   ierr = ISColoringViewFromOptions(iscoloring,NULL,"-coloring_is_view");CHKERRQ(ierr);
1771   ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,&nc,&is);CHKERRQ(ierr);
1772   if (ctx && ctx->verbose > 5) {
1773     PetscViewer    viewer;
1774     Vec            color_vec, eidx_vec;
1775     ierr = DMGetGlobalVector(colordm, &color_vec);CHKERRQ(ierr);
1776     ierr = DMGetGlobalVector(colordm, &eidx_vec);CHKERRQ(ierr);
1777     for (colour=0; colour<nc; colour++) {
1778       ierr = ISGetLocalSize(is[colour],&csize);CHKERRQ(ierr);
1779       ierr = ISGetIndices(is[colour],&indices);CHKERRQ(ierr);
1780       for (j=0; j<csize; j++) {
1781         PetscScalar v = (PetscScalar)colour;
1782         k = indices[j];
1783         ierr = VecSetValues(color_vec,1,&k,&v,INSERT_VALUES);
1784         v = (PetscScalar)k;
1785         ierr = VecSetValues(eidx_vec,1,&k,&v,INSERT_VALUES);
1786       }
1787       ierr = ISRestoreIndices(is[colour],&indices);CHKERRQ(ierr);
1788     }
1789     /* view */
1790     ierr = PetscViewerVTKOpen(PETSC_COMM_WORLD, "color.vtu", FILE_MODE_WRITE, &viewer);CHKERRQ(ierr);
1791     ierr = PetscObjectSetName((PetscObject) color_vec, "color");CHKERRQ(ierr);
1792     ierr = VecView(color_vec, viewer);CHKERRQ(ierr);
1793     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1794     ierr = PetscViewerVTKOpen(PETSC_COMM_WORLD, "eidx.vtu", FILE_MODE_WRITE, &viewer);CHKERRQ(ierr);
1795     ierr = PetscObjectSetName((PetscObject) eidx_vec, "element-idx");CHKERRQ(ierr);
1796     ierr = VecView(eidx_vec, viewer);CHKERRQ(ierr);
1797     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1798     ierr = DMRestoreGlobalVector(colordm, &color_vec);CHKERRQ(ierr);
1799     ierr = DMRestoreGlobalVector(colordm, &eidx_vec);CHKERRQ(ierr);
1800   }
1801   ierr = PetscSectionDestroy(&csection);CHKERRQ(ierr);
1802   ierr = DMDestroy(&colordm);CHKERRQ(ierr);
1803   ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&is);CHKERRQ(ierr);
1804   ierr = MatDestroy(&G);CHKERRQ(ierr);
1805   /* stash coloring */
1806   ierr = PetscContainerCreate(PETSC_COMM_SELF, container);CHKERRQ(ierr);
1807   ierr = PetscContainerSetPointer(*container,(void*)iscoloring);CHKERRQ(ierr);
1808   ierr = PetscContainerSetUserDestroy(*container, destroy_coloring);CHKERRQ(ierr);
1809   ierr = PetscObjectCompose((PetscObject)JacP,"coloring",(PetscObject)*container);CHKERRQ(ierr);
1810   if (ctx && ctx->verbose > 0) {
1811     ierr = PetscPrintf(PETSC_COMM_WORLD, "Made coloring with %D colors\n", nc);CHKERRQ(ierr);
1812   }
1813   PetscFunctionReturn(0);
1814   }
1815 
1816 PetscErrorCode LandauAssembleOpenMP(PetscInt cStart, PetscInt cEnd, PetscInt totDim, DM plex, PetscSection section, PetscSection globalSection, Mat JacP, PetscScalar elemMats[], PetscContainer container)
1817 {
1818   PetscErrorCode  ierr;
1819   IS             *is;
1820   PetscInt        nc,colour,j;
1821   const PetscInt *clr_idxs;
1822   ISColoring      iscoloring;
1823   PetscFunctionBegin;
1824   ierr = PetscContainerGetPointer(container,(void**)&iscoloring);CHKERRQ(ierr);
1825   ierr = ISColoringGetIS(iscoloring,PETSC_USE_POINTER,&nc,&is);CHKERRQ(ierr);
1826   for (colour=0; colour<nc; colour++) {
1827     PetscInt    *idx_arr[1024]; /* need to make dynamic for general use */
1828     PetscScalar *new_el_mats[1024];
1829     PetscInt     idx_size[1024],csize;
1830     ierr = ISGetLocalSize(is[colour],&csize);CHKERRQ(ierr);
1831     if (csize>1024) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "too many elements in color. %D > %D",csize,1024);
1832     ierr = ISGetIndices(is[colour],&clr_idxs);CHKERRQ(ierr);
1833     /* get indices and mats */
1834     for (j=0; j<csize; j++) {
1835       PetscInt    cell = cStart + clr_idxs[j];
1836       PetscInt    numindices,*indices;
1837       PetscScalar *elMat = &elemMats[clr_idxs[j]*totDim*totDim];
1838       PetscScalar *valuesOrig = elMat;
1839       ierr = DMPlexGetClosureIndices(plex, section, globalSection, cell, PETSC_TRUE, &numindices, &indices, NULL, (PetscScalar **) &elMat);CHKERRQ(ierr);
1840       idx_size[j] = numindices;
1841       ierr = PetscMalloc2(numindices,&idx_arr[j],numindices*numindices,&new_el_mats[j]);CHKERRQ(ierr);
1842       ierr = PetscMemcpy(idx_arr[j],indices,numindices*sizeof(PetscInt));CHKERRQ(ierr);
1843       ierr = PetscMemcpy(new_el_mats[j],elMat,numindices*numindices*sizeof(PetscScalar));CHKERRQ(ierr);
1844       ierr = DMPlexRestoreClosureIndices(plex, section, globalSection, cell, PETSC_TRUE, &numindices, &indices, NULL, (PetscScalar **) &elMat);CHKERRQ(ierr);
1845       if (elMat != valuesOrig) {ierr = DMRestoreWorkArray(plex, numindices*numindices, MPIU_SCALAR, &elMat);}
1846     }
1847     /* assemble matrix - pragmas break CI ? */
1848     //#pragma omp parallel default(JacP,idx_size,idx_arr,new_el_mats,colour,clr_idxs)  private(j)
1849     //#pragma omp parallel for private(j)
1850     for (j=0; j<csize; j++) {
1851       PetscInt    numindices = idx_size[j], *indices = idx_arr[j];
1852       PetscScalar *elMat = new_el_mats[j];
1853       MatSetValues(JacP,numindices,indices,numindices,indices,elMat,ADD_VALUES);
1854     }
1855     /* free */
1856     ierr = ISRestoreIndices(is[colour],&clr_idxs);CHKERRQ(ierr);
1857     for (j=0; j<csize; j++) {
1858       ierr = PetscFree2(idx_arr[j],new_el_mats[j]);CHKERRQ(ierr);
1859     }
1860   }
1861   ierr = ISColoringRestoreIS(iscoloring,PETSC_USE_POINTER,&is);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 /* < v, u > */
1866 static void g0_1(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1867                   const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1868                   const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1869                   PetscReal t, PetscReal u_tShift, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar g0[])
1870 {
1871   g0[0] = 1.;
1872 }
1873 
1874 /* < v, u > */
1875 static void g0_r(PetscInt dim, PetscInt Nf, PetscInt NfAux,
1876                   const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[],
1877                   const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[],
1878                   PetscReal t, PetscReal u_tShift, const PetscReal x[],  PetscInt numConstants, const PetscScalar constants[], PetscScalar g0[])
1879 {
1880   g0[0] = 2.*PETSC_PI*x[0] + 1.e-14; // seems to fix funny Q2 bug
1881 }
1882 
1883 /*@
1884   LandauCreateMassMatrix - Create mass matrix for Landau
1885 
1886   Collective on dm
1887 
1888   Input Parameters:
1889 . dm     - the DM object
1890 
1891  Output Parameters:
1892 . Amat - The mass matrix (optional), mass matrix is added to the DM context
1893 
1894   Level: beginner
1895 
1896 .keywords: mesh
1897 .seealso: LandauCreateVelocitySpace()
1898 @*/
1899 PetscErrorCode LandauCreateMassMatrix(DM dm, Mat *Amat)
1900 {
1901   DM             massDM;
1902   PetscDS        prob;
1903   PetscInt       ii,dim,N1=1,N2;
1904   PetscErrorCode ierr;
1905   LandauCtx      *ctx;
1906   Mat            M;
1907 
1908   PetscFunctionBegin;
1909   PetscValidHeaderSpecific(dm,DM_CLASSID,1);
1910   if (Amat) PetscValidPointer(Amat,3);
1911   ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr);
1912   if (!ctx) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "no context");
1913   ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
1914   ierr = DMClone(dm, &massDM);CHKERRQ(ierr);
1915   ierr = DMCopyFields(dm, massDM);CHKERRQ(ierr);
1916   ierr = DMCreateDS(massDM);CHKERRQ(ierr);
1917   ierr = DMGetDS(massDM, &prob);CHKERRQ(ierr);
1918   for (ii=0;ii<ctx->num_species;ii++) {
1919     if (dim==3) {ierr = PetscDSSetJacobian(prob, ii, ii, g0_1, NULL, NULL, NULL);CHKERRQ(ierr);}
1920     else        {ierr = PetscDSSetJacobian(prob, ii, ii, g0_r, NULL, NULL, NULL);CHKERRQ(ierr);}
1921   }
1922   ierr = DMViewFromOptions(massDM,NULL,"-dm_landau_mass_dm_view");CHKERRQ(ierr);
1923   ierr = DMCreateMatrix(massDM, &M);CHKERRQ(ierr);
1924   ierr = MatSetOption(M, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE);CHKERRQ(ierr);
1925   {
1926     Vec locX;
1927     DM  plex;
1928     ierr = DMConvert(massDM, DMPLEX, &plex);CHKERRQ(ierr);
1929     ierr = DMGetLocalVector(massDM, &locX);CHKERRQ(ierr);
1930     /* Mass matrix is independent of the input, so no need to fill locX */
1931     if (plex->prealloc_only != dm->prealloc_only) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "plex->prealloc_only = massDM->prealloc_only %D, =%D",plex->prealloc_only,massDM->prealloc_only);
1932     ierr = DMPlexSNESComputeJacobianFEM(plex, locX, M, M, ctx);CHKERRQ(ierr);
1933     ierr = DMRestoreLocalVector(massDM, &locX);CHKERRQ(ierr);
1934     ierr = DMDestroy(&plex);CHKERRQ(ierr);
1935   }
1936   ierr = DMDestroy(&massDM);CHKERRQ(ierr);
1937   ierr = MatGetSize(ctx->J, &N1, NULL);CHKERRQ(ierr);
1938   ierr = MatGetSize(M, &N2, NULL);CHKERRQ(ierr);
1939   if (N1 != N2) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "Incorrect matrix sizes: |Jacobian| = %D, |Mass|=%D",N1,N2);
1940   ierr = PetscObjectSetName((PetscObject)M, "mass");CHKERRQ(ierr);
1941   ierr = MatViewFromOptions(M,NULL,"-dm_landau_mass_mat_view");CHKERRQ(ierr);
1942   ctx->M = M; /* this could be a noop, a = a */
1943   if (Amat) *Amat = M;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 /*@
1948   LandauIFunction - TS residual calculation
1949 
1950   Collective on ts
1951 
1952   Input Parameters:
1953 +   TS  - The time stepping context
1954 .   time_dummy - current time (not used)
1955 -   X - Current state
1956 +   X_t - Time derivative of current state
1957 .   actx - Landau context
1958 
1959   Output Parameter:
1960 .   F  - The residual
1961 
1962   Level: beginner
1963 
1964 .keywords: mesh
1965 .seealso: LandauCreateVelocitySpace(), LandauIJacobian()
1966 @*/
1967 PetscErrorCode LandauIFunction(TS ts, PetscReal time_dummy, Vec X, Vec X_t, Vec F, void *actx)
1968 {
1969   PetscErrorCode ierr;
1970   LandauCtx      *ctx=(LandauCtx*)actx;
1971   PetscReal      unorm;
1972   PetscInt       dim;
1973   DM dm;
1974 
1975   PetscFunctionBegin;
1976   ierr = TSGetDM(ts,&dm);CHKERRQ(ierr);
1977   ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr);
1978   if (!ctx) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "no context");
1979   ierr = VecNorm(X,NORM_2,&unorm);CHKERRQ(ierr);
1980   ierr = PetscLogEventBegin(ctx->events[0],0,0,0,0);CHKERRQ(ierr);
1981   ierr = DMGetDimension(ctx->dmv, &dim);CHKERRQ(ierr);
1982   if (ctx->normJ != unorm) {
1983     ctx->normJ = unorm;
1984     ierr = PetscInfo1(ts, "Create Landau Jacobian t=%g\n",time_dummy);CHKERRQ(ierr);
1985     ierr = LandauFormJacobian_Internal(X,ctx->J,dim,(void*)ctx);CHKERRQ(ierr);
1986     ctx->aux_bool = PETSC_TRUE; /* debug: set flag that we made a new Jacobian */
1987   } else {
1988     ctx->aux_bool = PETSC_FALSE;
1989     ierr = PetscInfo1(ts, "Skip Landau Jacobian t=%g\n",(double)time_dummy);CHKERRQ(ierr);
1990   }
1991   /* mat vec for op */
1992   ierr = MatMult(ctx->J,X,F);CHKERRQ(ierr);CHKERRQ(ierr); /* C*f */
1993   /* add time term */
1994   if (X_t) {
1995     ierr = MatMultAdd(ctx->M,X_t,F,F);CHKERRQ(ierr);
1996   }
1997   ierr = PetscLogEventEnd(ctx->events[0],0,0,0,0);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 /*@
2002   LandauIJacobian - TS Jacobian construction
2003 
2004   Collective on ts
2005 
2006   Input Parameters:
2007 +   TS  - The time stepping context
2008 .   time_dummy - current time (not used)
2009 -   X - Current state
2010 +   U_tdummy - Time derivative of current state (not used)
2011 .   shift - shift for du/dt term
2012 -   actx - Landau context
2013 
2014   Output Parameter:
2015 .   Amat  - Jacobian
2016 +   Pmat  - same as Amat
2017 
2018   Level: beginner
2019 
2020 .keywords: mesh
2021 .seealso: LandauCreateVelocitySpace(), LandauIFunction()
2022 @*/
2023 PetscErrorCode LandauIJacobian(TS ts, PetscReal time_dummy, Vec X, Vec U_tdummy, PetscReal shift, Mat Amat, Mat Pmat, void *actx)
2024 {
2025   PetscErrorCode ierr;
2026   LandauCtx      *ctx=(LandauCtx*)actx;
2027   PetscReal      unorm;
2028   PetscInt       dim;
2029   DM dm;
2030 
2031   PetscFunctionBegin;
2032   ierr = TSGetDM(ts,&dm);CHKERRQ(ierr);
2033   ierr = DMGetApplicationContext(dm, &ctx);CHKERRQ(ierr);
2034   if (!ctx) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "no context");
2035   if (Amat!=Pmat || Amat!=ctx->J) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Amat!=Pmat || Amat!=ctx->J");
2036   ierr = DMGetDimension(ctx->dmv, &dim);CHKERRQ(ierr);
2037   /* get collision Jacobian into A */
2038   ierr = PetscLogEventBegin(ctx->events[9],0,0,0,0);CHKERRQ(ierr);
2039   ierr = VecNorm(X,NORM_2,&unorm);CHKERRQ(ierr);
2040   if (ctx->normJ!=unorm) {
2041     ierr = PetscInfo2(ts, "Create Landau Jacobian t=%g, shift=%g\n",(double)time_dummy,(double)shift);CHKERRQ(ierr);
2042     ierr = LandauFormJacobian_Internal(X,ctx->J,dim,(void*)ctx);CHKERRQ(ierr);
2043     ctx->normJ = unorm;
2044     ctx->aux_bool = PETSC_TRUE; /* debug: set flag that we made a new Jacobian */
2045   } else {
2046     ctx->aux_bool = PETSC_FALSE;
2047     ierr = PetscInfo3(ts, "Skip Landau Jacobian t=%g, shift=%g shift*|u|=%20.12e\n",(double)time_dummy,(double)shift,(double)shift*unorm);CHKERRQ(ierr);
2048   }
2049   {
2050     MatInfo info1, info2;
2051     double  nz_1, nz_2;
2052     ierr = MatGetInfo(Pmat,MAT_LOCAL,&info1);CHKERRQ(ierr);
2053     nz_1 = info1.nz_allocated;
2054     ierr = MatGetInfo(ctx->J,MAT_LOCAL,&info2);CHKERRQ(ierr);
2055     nz_2 = info2.nz_allocated;
2056     /* add mass */
2057     if (nz_1 != nz_2) { /* there is a bug wit Q2 elements and this generates an error message instead of diverged solution, need to fix */
2058       ierr = MatAXPY(Pmat,shift,ctx->M,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
2059       SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "nz_1 != nz_2 %g %g",(double)nz_1,(double)nz_2);
2060     } else {
2061       ierr = MatAXPY(Pmat,shift,ctx->M,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
2062     }
2063   }
2064   ierr = PetscLogEventEnd(ctx->events[9],0,0,0,0);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067