xref: /libCEED/examples/deal.II/bps.h (revision 4566048fb6713186ca3ec70b6f0dfa7b673ba3a9)
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2023 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 //  Authors: Peter Munch, Martin Kronbichler
15 //
16 // ---------------------------------------------------------------------
17 
18 // deal.II includes
19 #include <deal.II/dofs/dof_tools.h>
20 
21 #include <deal.II/fe/mapping.h>
22 
23 #include <deal.II/lac/la_parallel_vector.h>
24 
25 #include <deal.II/matrix_free/fe_evaluation.h>
26 #include <deal.II/matrix_free/matrix_free.h>
27 #include <deal.II/matrix_free/tools.h>
28 
29 // libCEED includes
30 #include <ceed.h>
31 #include <ceed/backend.h>
32 
33 // QFunction source
34 #include "bps-qfunctions.h"
35 
36 using namespace dealii;
37 
38 /**
39  * BP types. For more details, see https://ceed.exascaleproject.org/bps/.
40  */
41 enum class BPType : unsigned int
42 {
43   BP1,
44   BP2,
45   BP3,
46   BP4,
47   BP5,
48   BP6
49 };
50 
51 
52 
53 /**
54  * Struct storing relevant information regarding each BP.
55  */
56 struct BPInfo
57 {
58   BPInfo(const BPType type, const int dim, const int fe_degree)
59     : type(type)
60     , dim(dim)
61     , fe_degree(fe_degree)
62   {
63     if (type == BPType::BP1)
64       type_string = "BP1";
65     else if (type == BPType::BP2)
66       type_string = "BP2";
67     else if (type == BPType::BP3)
68       type_string = "BP3";
69     else if (type == BPType::BP4)
70       type_string = "BP4";
71     else if (type == BPType::BP5)
72       type_string = "BP5";
73     else if (type == BPType::BP6)
74       type_string = "BP6";
75 
76     this->n_q_points_1d = (type <= BPType::BP4) ? (fe_degree + 2) : (fe_degree + 1);
77 
78     this->n_components =
79       (type == BPType::BP1 || type == BPType::BP3 || type == BPType::BP5) ? 1 : dim;
80   }
81 
82 
83   BPType       type;
84   std::string  type_string;
85   unsigned int dim;
86   unsigned int fe_degree;
87   unsigned int n_q_points_1d;
88   unsigned int n_components;
89 };
90 
91 
92 
93 /**
94  * Base class of operators.
95  */
96 template <typename Number>
97 class OperatorBase
98 {
99 public:
100   /**
101    * deal.II vector type
102    */
103   using VectorType = LinearAlgebra::distributed::Vector<Number>;
104 
105   /**
106    * Initialize vector.
107    */
108   virtual void
109   reinit() = 0;
110 
111   /**
112    * Perform matrix-vector product
113    */
114   virtual void
115   vmult(VectorType &dst, const VectorType &src) const = 0;
116 
117   /**
118    * Initialize vector.
119    */
120   virtual void
121   initialize_dof_vector(VectorType &vec) const = 0;
122 
123   /**
124    * Compute inverse of diagonal.
125    */
126   virtual void
127   compute_inverse_diagonal(VectorType &diagonal) const = 0;
128 };
129 
130 
131 /**
132  * Operator implementation using libCEED.
133  */
134 template <int dim, typename Number>
135 class OperatorCeed : public OperatorBase<Number>
136 {
137 public:
138   using VectorType = typename OperatorBase<Number>::VectorType;
139 
140   /**
141    * Constructor.
142    */
143   OperatorCeed(const Mapping<dim>              &mapping,
144                const DoFHandler<dim>           &dof_handler,
145                const AffineConstraints<Number> &constraints,
146                const Quadrature<dim>           &quadrature,
147                const BPType                    &bp,
148                const std::string               &resource)
149     : mapping(mapping)
150     , dof_handler(dof_handler)
151     , constraints(constraints)
152     , quadrature(quadrature)
153     , bp(bp)
154     , resource(resource)
155   {
156     reinit();
157   }
158 
159   /**
160    * Destructor.
161    */
162   ~OperatorCeed()
163   {
164     CeedVectorDestroy(&src_ceed);
165     CeedVectorDestroy(&dst_ceed);
166     CeedOperatorDestroy(&op_apply);
167     CeedDestroy(&ceed);
168   }
169 
170   /**
171    * Initialized internal data structures, particularly, libCEED.
172    */
173   void
174   reinit() override
175   {
176     CeedVector           q_data;
177     CeedBasis            sol_basis;
178     CeedElemRestriction  sol_restriction;
179     CeedElemRestriction  q_data_restriction;
180     BuildContext         build_ctx_data;
181     CeedQFunctionContext build_ctx;
182     CeedQFunction        qf_apply;
183 
184     const auto &tria = dof_handler.get_triangulation();
185     const auto &fe   = dof_handler.get_fe();
186 
187     const auto n_components = fe.n_components();
188 
189     if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5)
190       {
191         AssertThrow(n_components == 1, ExcInternalError());
192       }
193     else
194       {
195         AssertThrow(n_components == dim, ExcInternalError());
196       }
197 
198     // 1) create CEED instance -> "MatrixFree"
199     const char *ceed_spec = resource.c_str();
200     CeedInit(ceed_spec, &ceed);
201 
202     // 2) create shape functions -> "ShapeInfo"
203     const unsigned int fe_degree  = fe.tensor_degree();
204     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
205     {
206       FE_Q<1>            fe_1d{FE_Q<1>(fe_degree)};
207       const unsigned int array_size  = (fe_degree + 1) * (n_q_points);
208       double            *q_ref_1d    = new double[n_q_points];
209       double            *q_weight_1d = new double[n_q_points];
210       double            *interp_1d   = new double[array_size];
211       double            *grad_1d     = new double[array_size];
212       for (unsigned int i = 0; i < n_q_points; i++)
213         {
214           // Retrieve quadrature info
215           // Converting from [0, 1] to [-1, 1]
216           Point point    = quadrature.get_tensor_basis()[0].point(i);
217           q_ref_1d[i]    = 2.0 * (point(0) - 0.5);
218           q_weight_1d[i] = 2.0 * quadrature.get_tensor_basis()[0].weight(i);
219 
220           // Retrieve 1D shape function values
221           for (unsigned int j = 0; j < fe_degree + 1; j++)
222             {
223               // Shuffle index of DoF
224               const int k                        = j == 0 ? 0 : ((j % fe_degree) + 1);
225               interp_1d[j + i * (fe_degree + 1)] = fe_1d.shape_value_component(k, point, 0);
226               grad_1d[j + i * (fe_degree + 1)]   = 0.5 * fe_1d.shape_grad_component(k, point, 0)[0];
227             }
228         }
229 
230       CeedBasisCreateTensorH1(ceed,
231                               dim,
232                               n_components,
233                               fe_degree + 1,
234                               n_q_points,
235                               interp_1d,
236                               grad_1d,
237                               q_ref_1d,
238                               q_weight_1d,
239                               &sol_basis);
240     }
241 
242     // 3) create restriction matrix -> DoFInfo
243     unsigned int n_local_active_cells = 0;
244 
245     for (const auto &cell : dof_handler.active_cell_iterators())
246       if (cell->is_locally_owned())
247         n_local_active_cells++;
248 
249     partitioner =
250       std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(),
251                                                     DoFTools::extract_locally_active_dofs(
252                                                       dof_handler),
253                                                     dof_handler.get_communicator());
254 
255     std::vector<CeedInt> indices;
256     indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components);
257 
258     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
259 
260     std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell());
261 
262     for (const auto &cell : dof_handler.active_cell_iterators())
263       if (cell->is_locally_owned())
264         {
265           cell->get_dof_indices(local_indices);
266 
267           for (const auto i : dof_mapping)
268             indices.emplace_back(
269               partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)]) /
270               n_components);
271         }
272 
273     CeedElemRestrictionCreate(ceed,
274                               n_local_active_cells,
275                               fe.n_dofs_per_cell() / n_components,
276                               n_components,
277                               std::max<unsigned int>(this->extended_local_size() / n_components, 1),
278                               this->extended_local_size(),
279                               CEED_MEM_HOST,
280                               CEED_COPY_VALUES,
281                               indices.data(),
282                               &sol_restriction);
283 
284     // 4) create mapping -> MappingInfo
285     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
286 
287     this->weights = compute_metric_data(ceed, mapping, tria, quadrature, bp);
288 
289     strides = {{1,
290                 static_cast<int>(quadrature.size()),
291                 static_cast<int>(quadrature.size() * n_components_metric)}};
292     CeedVectorCreate(ceed, weights.size(), &q_data);
293     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
294     CeedElemRestrictionCreateStrided(ceed,
295                                      n_local_active_cells,
296                                      quadrature.size(),
297                                      n_components_metric,
298                                      weights.size(),
299                                      strides.data(),
300                                      &q_data_restriction);
301 
302     build_ctx_data.dim       = dim;
303     build_ctx_data.space_dim = dim;
304 
305     CeedQFunctionContextCreate(ceed, &build_ctx);
306     CeedQFunctionContextSetData(
307       build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data);
308 
309     // 5) create q operation
310     if (bp == BPType::BP1)
311       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply);
312     else if (bp == BPType::BP2)
313       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply);
314     else if (bp == BPType::BP3 || bp == BPType::BP5)
315       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply);
316     else if (bp == BPType::BP4 || bp == BPType::BP6)
317       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply);
318     else
319       AssertThrow(false, ExcInternalError());
320 
321     if (bp <= BPType::BP2)
322       CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP);
323     else
324       CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD);
325 
326     CeedQFunctionAddInput(qf_apply, "qdata", n_components_metric, CEED_EVAL_NONE);
327 
328     if (bp <= BPType::BP2)
329       CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP);
330     else
331       CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD);
332 
333     CeedQFunctionSetContext(qf_apply, build_ctx);
334 
335     // 6) put everything together
336     CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
337 
338     CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
339     CeedOperatorSetField(op_apply, "qdata", q_data_restriction, CEED_BASIS_NONE, q_data);
340     CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
341 
342     // 7) libCEED vectors
343     CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL);
344     CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL);
345 
346     // 8) cleanup
347     CeedVectorDestroy(&q_data);
348     CeedElemRestrictionDestroy(&q_data_restriction);
349     CeedElemRestrictionDestroy(&sol_restriction);
350     CeedBasisDestroy(&sol_basis);
351     CeedQFunctionContextDestroy(&build_ctx);
352     CeedQFunctionDestroy(&qf_apply);
353   }
354 
355   /**
356    * Perform matrix-vector product.
357    */
358   void
359   vmult(VectorType &dst, const VectorType &src) const override
360   {
361     // communicate: update ghost values
362     src.update_ghost_values();
363 
364     if (dof_handler.get_fe().n_components() == 1)
365       {
366         // pass memory buffers to libCEED
367         VectorTypeCeed x(src_ceed);
368         VectorTypeCeed y(dst_ceed);
369         x.import_array(src, CEED_MEM_HOST);
370         y.import_array(dst, CEED_MEM_HOST);
371 
372         // apply operator
373         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
374 
375         // pull arrays back to deal.II
376         x.sync_array();
377         y.sync_array();
378       }
379     else // TODO: needed for multiple components
380       {
381         // allocate space for block vectors
382         src_tmp.reinit(this->extended_local_size(), true);
383         dst_tmp.reinit(this->extended_local_size(), true);
384 
385         // copy to block vector
386         copy_to_block_vector(src_tmp, src);
387 
388         // pass memory buffers to libCEED
389         VectorTypeCeed x(src_ceed);
390         VectorTypeCeed y(dst_ceed);
391         x.import_array(src_tmp, CEED_MEM_HOST);
392         y.import_array(dst_tmp, CEED_MEM_HOST);
393 
394         // apply operator
395         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
396 
397         // pull arrays back to deal.II
398         x.sync_array();
399         y.sync_array();
400 
401         // copy from block vector
402         copy_from_block_vector(dst, dst_tmp);
403       }
404 
405     // communicate: compress
406     src.zero_out_ghost_values();
407     dst.compress(VectorOperation::add);
408 
409     // apply constraints: we assume homogeneous DBC
410     constraints.set_zero(dst);
411   }
412 
413   /**
414    * Initialized vector.
415    */
416   void
417   initialize_dof_vector(VectorType &vec) const override
418   {
419     vec.reinit(partitioner);
420   }
421 
422   /**
423    * Compute inverse of diagonal.
424    */
425   void
426   compute_inverse_diagonal(VectorType &diagonal) const override
427   {
428     this->initialize_dof_vector(diagonal);
429 
430     // pass memory buffer to libCEED
431     VectorTypeCeed y(dst_ceed);
432     y.import_array(diagonal, CEED_MEM_HOST);
433 
434     CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE);
435 
436     // pull array back to deal.II
437     y.sync_array();
438 
439     const unsigned int n_components = dof_handler.get_fe().n_components();
440 
441     if (n_components > 1) // TODO: needed for multiple components
442       {
443         VectorType tmp(diagonal);
444 
445         copy_from_block_vector(tmp, diagonal);
446 
447         std::swap(tmp, diagonal);
448       }
449 
450     diagonal.compress(VectorOperation::add);
451 
452     for (auto &i : diagonal)
453       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
454   }
455 
456 private:
457   /**
458    * Wrapper around a deal.II vector to create a libCEED vector view.
459    */
460   class VectorTypeCeed
461   {
462   public:
463     /**
464      * Constructor.
465      */
466     VectorTypeCeed(const CeedVector &vec_orig)
467     {
468       vec_ceed = NULL;
469       CeedVectorReferenceCopy(vec_orig, &vec_ceed);
470     }
471 
472     /**
473      * Return libCEED vector view.
474      */
475     CeedVector &
476     operator()()
477     {
478       return vec_ceed;
479     }
480 
481     /**
482      * Set deal.II memory in libCEED vector.
483      */
484     void
485     import_array(const VectorType &vec, const CeedMemType space)
486     {
487       mem_space = space;
488       CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values());
489     }
490 
491     /**
492      * Sync memory from device to host.
493      */
494     void
495     sync_array()
496     {
497       CeedVectorSyncArray(vec_ceed, mem_space);
498     }
499 
500     /**
501      * Destructor: destroy vector view.
502      */
503     ~VectorTypeCeed()
504     {
505       bool has_array;
506       CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array);
507       if (has_array)
508         {
509           CeedScalar *ptr;
510           CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
511         }
512       CeedVectorDestroy(&vec_ceed);
513     }
514 
515   private:
516     /**
517      * libCEED vector view.
518      */
519     CeedMemType mem_space;
520     CeedVector  vec_ceed;
521   };
522 
523   /**
524    * Copy from block vector.
525    *
526    * @note Only needed for multiple components.
527    */
528   void
529   copy_from_block_vector(VectorType &dst, const VectorType &src) const
530   {
531     const unsigned int scalar_size = this->extended_local_size() / dim;
532 
533     for (unsigned int i = 0; i < scalar_size; ++i)
534       for (unsigned int j = 0; j < dim; ++j)
535         dst.get_values()[j + i * dim] = src.get_values()[j * scalar_size + i];
536   }
537 
538   /**
539    * Copy to block vector.
540    *
541    * @note Only needed for multiple components.
542    */
543   void
544   copy_to_block_vector(VectorType &dst, const VectorType &src) const
545   {
546     const unsigned int scalar_size = this->extended_local_size() / dim;
547 
548     for (unsigned int i = 0; i < scalar_size; ++i)
549       for (unsigned int j = 0; j < dim; ++j)
550         dst.get_values()[j * scalar_size + i] = src.get_values()[j + i * dim];
551   }
552 
553   /**
554    * Number of locally active DoFs.
555    */
556   unsigned int
557   extended_local_size() const
558   {
559     return partitioner->locally_owned_size() + partitioner->n_ghost_indices();
560   }
561 
562   /**
563    * Compute metric data: Jacobian, ...
564    */
565   static std::vector<double>
566   compute_metric_data(const Ceed               &ceed,
567                       const Mapping<dim>       &mapping,
568                       const Triangulation<dim> &tria,
569                       const Quadrature<dim>    &quadrature,
570                       const BPType              bp)
571   {
572     std::vector<double> weights;
573 
574     if (false)
575       {
576         FE_Nothing<dim> dummy_fe;
577         FEValues<dim>   fe_values(mapping, dummy_fe, quadrature, update_JxW_values);
578 
579         for (const auto &cell : tria.active_cell_iterators())
580           if (cell->is_locally_owned())
581             {
582               fe_values.reinit(cell);
583 
584               for (const auto q : fe_values.quadrature_point_indices())
585                 weights.emplace_back(fe_values.JxW(q));
586             }
587 
588         return weights;
589       }
590 
591     CeedBasis            geo_basis;
592     CeedVector           q_data;
593     CeedElemRestriction  q_data_restriction;
594     CeedVector           node_coords;
595     CeedElemRestriction  geo_restriction;
596     CeedQFunctionContext build_ctx;
597     CeedQFunction        qf_build;
598     CeedOperator         op_build;
599 
600     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
601 
602     const unsigned int n_components = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
603 
604     const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping);
605 
606     AssertThrow(mapping_q, ExcMessage("Wrong mapping!"));
607 
608     const unsigned int fe_degree = mapping_q->get_degree();
609 
610     CeedBasisCreateTensorH1Lagrange(
611       ceed, dim, dim, fe_degree + 1, n_q_points, CEED_GAUSS, &geo_basis);
612 
613     unsigned int n_local_active_cells = 0;
614 
615     for (const auto &cell : tria.active_cell_iterators())
616       if (cell->is_locally_owned())
617         n_local_active_cells++;
618 
619     std::vector<double>  geo_support_points;
620     std::vector<CeedInt> geo_indices;
621 
622     FE_Q<dim> geo_fe(fe_degree);
623 
624     DoFHandler<dim> geo_dof_handler(tria);
625     geo_dof_handler.distribute_dofs(geo_fe);
626 
627     const auto geo_partitioner =
628       std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(),
629                                                     DoFTools::extract_locally_active_dofs(
630                                                       geo_dof_handler),
631                                                     geo_dof_handler.get_communicator());
632 
633     geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell());
634 
635     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
636 
637     FEValues<dim> fe_values(mapping,
638                             geo_fe,
639                             geo_fe.get_unit_support_points(),
640                             update_quadrature_points);
641 
642     std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell());
643 
644     const unsigned int n_points =
645       geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices();
646 
647     geo_support_points.resize(dim * n_points);
648 
649     for (const auto &cell : geo_dof_handler.active_cell_iterators())
650       if (cell->is_locally_owned())
651         {
652           fe_values.reinit(cell);
653           cell->get_dof_indices(local_indices);
654 
655           for (const auto i : dof_mapping)
656             {
657               const auto index = geo_partitioner->global_to_local(local_indices[i]);
658               geo_indices.emplace_back(index);
659 
660               const auto point = fe_values.quadrature_point(i);
661 
662               for (unsigned int d = 0; d < dim; ++d)
663                 geo_support_points[index + d * n_points] = point[d];
664             }
665         }
666 
667     weights.resize(n_local_active_cells * quadrature.size() * n_components);
668 
669     CeedInt strides[3] = {1,
670                           static_cast<int>(quadrature.size()),
671                           static_cast<int>(quadrature.size() * n_components)};
672 
673     CeedVectorCreate(ceed, weights.size(), &q_data);
674     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
675     CeedElemRestrictionCreateStrided(ceed,
676                                      n_local_active_cells,
677                                      quadrature.size(),
678                                      n_components,
679                                      weights.size(),
680                                      strides,
681                                      &q_data_restriction);
682 
683     CeedVectorCreate(ceed, geo_support_points.size(), &node_coords);
684     CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data());
685 
686     CeedElemRestrictionCreate(ceed,
687                               n_local_active_cells,
688                               geo_fe.n_dofs_per_cell(),
689                               dim,
690                               std::max<unsigned int>(geo_support_points.size() / dim, 1),
691                               geo_support_points.size(),
692                               CEED_MEM_HOST,
693                               CEED_COPY_VALUES,
694                               geo_indices.data(),
695                               &geo_restriction);
696 
697     BuildContext build_ctx_data;
698     build_ctx_data.dim       = dim;
699     build_ctx_data.space_dim = dim;
700 
701     CeedQFunctionContextCreate(ceed, &build_ctx);
702     CeedQFunctionContextSetData(
703       build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data);
704 
705     // 5) create q operation
706     if (bp <= BPType::BP2)
707       CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build);
708     else
709       CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build);
710 
711     CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD);
712     CeedQFunctionAddInput(qf_build, "weights", 1, CEED_EVAL_WEIGHT);
713     CeedQFunctionAddOutput(qf_build, "qdata", n_components, CEED_EVAL_NONE);
714     CeedQFunctionSetContext(qf_build, build_ctx);
715 
716     // 6) put everything together
717     CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build);
718     CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE);
719     CeedOperatorSetField(
720       op_build, "weights", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE);
721     CeedOperatorSetField(
722       op_build, "qdata", q_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
723 
724     CeedOperatorApply(op_build, node_coords, q_data, CEED_REQUEST_IMMEDIATE);
725 
726     CeedVectorDestroy(&node_coords);
727     CeedVectorSyncArray(q_data, CEED_MEM_HOST);
728     CeedVectorDestroy(&q_data);
729     CeedElemRestrictionDestroy(&geo_restriction);
730     CeedElemRestrictionDestroy(&q_data_restriction);
731     CeedBasisDestroy(&geo_basis);
732     CeedQFunctionContextDestroy(&build_ctx);
733     CeedQFunctionDestroy(&qf_build);
734     CeedOperatorDestroy(&op_build);
735 
736     return weights;
737   }
738 
739   /**
740    * Mapping object passed to the constructor.
741    */
742   const Mapping<dim> &mapping;
743 
744   /**
745    * DoFHandler object passed to the constructor.
746    */
747   const DoFHandler<dim> &dof_handler;
748 
749   /**
750    * Constraints object passed to the constructor.
751    */
752   const AffineConstraints<Number> &constraints;
753 
754   /**
755    * Quadrature rule object passed to the constructor.
756    */
757   const Quadrature<dim> &quadrature;
758 
759   /**
760    * Selected BP.
761    */
762   const BPType bp;
763 
764   /**
765    * Resource name.
766    */
767   const std::string resource;
768 
769   /**
770    * Partitioner for distributed vectors.
771    */
772   std::shared_ptr<Utilities::MPI::Partitioner> partitioner;
773 
774   /**
775    * libCEED data structures.
776    */
777   Ceed                   ceed;
778   std::vector<double>    weights;
779   std::array<CeedInt, 3> strides;
780   CeedVector             src_ceed;
781   CeedVector             dst_ceed;
782   CeedOperator           op_apply;
783 
784   /**
785    * Temporal (tempral) vectors.
786    *
787    * @note Only needed for multiple components.
788    */
789   mutable VectorType src_tmp;
790   mutable VectorType dst_tmp;
791 };
792 
793 
794 
795 template <int dim, typename Number>
796 class OperatorDealii : public OperatorBase<Number>
797 {
798 public:
799   using VectorType = typename OperatorBase<Number>::VectorType;
800 
801   /**
802    * Constructor.
803    */
804   OperatorDealii(const Mapping<dim>              &mapping,
805                  const DoFHandler<dim>           &dof_handler,
806                  const AffineConstraints<Number> &constraints,
807                  const Quadrature<dim>           &quadrature,
808                  const BPType                    &bp)
809     : mapping(mapping)
810     , dof_handler(dof_handler)
811     , constraints(constraints)
812     , quadrature(quadrature)
813     , bp(bp)
814   {
815     reinit();
816   }
817 
818   /**
819    * Destructor.
820    */
821   ~OperatorDealii() = default;
822 
823   /**
824    * Initialized internal data structures, particularly, MatrixFree.
825    */
826   void
827   reinit() override
828   {
829     // configure MatrixFree
830     typename MatrixFree<dim, Number>::AdditionalData additional_data;
831     additional_data.tasks_parallel_scheme =
832       MatrixFree<dim, Number>::AdditionalData::TasksParallelScheme::none;
833 
834     // create MatrixFree
835     matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data);
836   }
837 
838   /**
839    * Matrix-vector product.
840    */
841   void
842   vmult(VectorType &dst, const VectorType &src) const override
843   {
844     if (dof_handler.get_fe().n_components() == 1)
845       {
846         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true);
847       }
848     else
849       {
850         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
851 
852         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<dim>, this, dst, src, true);
853       }
854   }
855 
856   /**
857    * Initialize vector.
858    */
859   void
860   initialize_dof_vector(VectorType &vec) const override
861   {
862     matrix_free.initialize_dof_vector(vec);
863   }
864 
865   /**
866    * Compute inverse of diagonal.
867    */
868   void
869   compute_inverse_diagonal(VectorType &diagonal) const override
870   {
871     this->initialize_dof_vector(diagonal);
872 
873     if (dof_handler.get_fe().n_components() == 1)
874       {
875         MatrixFreeTools::compute_diagonal(matrix_free,
876                                           diagonal,
877                                           &OperatorDealii::do_cell_integral_local<1>,
878                                           this);
879       }
880     else
881       {
882         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
883 
884         MatrixFreeTools::compute_diagonal(matrix_free,
885                                           diagonal,
886                                           &OperatorDealii::do_cell_integral_local<dim>,
887                                           this);
888       }
889 
890     for (auto &i : diagonal)
891       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
892   }
893 
894 private:
895   /**
896    * Cell integral without vector access.
897    */
898   template <int n_components>
899   void
900   do_cell_integral_local(FEEvaluation<dim, -1, 0, n_components, Number> &phi) const
901   {
902     if (bp <= BPType::BP2) // mass matrix
903       {
904         phi.evaluate(EvaluationFlags::values);
905         for (const auto q : phi.quadrature_point_indices())
906           phi.submit_value(phi.get_value(q), q);
907         phi.integrate(EvaluationFlags::values);
908       }
909     else // Poisson operator
910       {
911         phi.evaluate(EvaluationFlags::gradients);
912         for (const auto q : phi.quadrature_point_indices())
913           phi.submit_gradient(phi.get_gradient(q), q);
914         phi.integrate(EvaluationFlags::gradients);
915       }
916   }
917 
918   /**
919    * Cell integral on a range of cells.
920    */
921   template <int n_components>
922   void
923   do_cell_integral_range(const MatrixFree<dim, Number>               &matrix_free,
924                          VectorType                                  &dst,
925                          const VectorType                            &src,
926                          const std::pair<unsigned int, unsigned int> &range) const
927   {
928     FEEvaluation<dim, -1, 0, n_components, Number> phi(matrix_free, range);
929 
930     for (unsigned cell = range.first; cell < range.second; ++cell)
931       {
932         phi.reinit(cell);
933         phi.read_dof_values(src);            // read source vector
934         do_cell_integral_local(phi);         // cell integral
935         phi.distribute_local_to_global(dst); // write to destination vector
936       }
937   }
938 
939   /**
940    * Mapping object passed to the constructor.
941    */
942   const Mapping<dim> &mapping;
943 
944   /**
945    * DoFHandler object passed to the constructor.
946    */
947   const DoFHandler<dim> &dof_handler;
948 
949   /**
950    * Constraints object passed to the constructor.
951    */
952   const AffineConstraints<Number> &constraints;
953 
954   /**
955    * Quadrature rule object passed to the constructor.
956    */
957   const Quadrature<dim> &quadrature;
958 
959   /**
960    * Selected BP.
961    */
962   const BPType bp;
963 
964   /**
965    * MatrixFree object.
966    */
967   MatrixFree<dim, Number> matrix_free;
968 };
969