xref: /libCEED/examples/deal.II/bps.h (revision 0dd71613d151b524f4c19d05b5d2bbb37bac7329)
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2023 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 //  Authors: Peter Munch, Martin Kronbichler
15 //
16 // ---------------------------------------------------------------------
17 
18 // deal.II includes
19 #include <deal.II/dofs/dof_tools.h>
20 
21 #include <deal.II/fe/mapping.h>
22 
23 #include <deal.II/lac/la_parallel_vector.h>
24 
25 #include <deal.II/matrix_free/fe_evaluation.h>
26 #include <deal.II/matrix_free/matrix_free.h>
27 #include <deal.II/matrix_free/shape_info.h>
28 #include <deal.II/matrix_free/tools.h>
29 
30 // libCEED includes
31 #include <ceed.h>
32 #include <ceed/backend.h>
33 
34 // QFunction source
35 #include "bps-qfunctions.h"
36 
37 using namespace dealii;
38 
39 /**
40  * BP types. For more details, see https://ceed.exascaleproject.org/bps/.
41  */
42 enum class BPType : unsigned int
43 {
44   BP1,
45   BP2,
46   BP3,
47   BP4,
48   BP5,
49   BP6
50 };
51 
52 
53 
54 /**
55  * Struct storing relevant information regarding each BP.
56  */
57 struct BPInfo
58 {
59   BPInfo(const BPType type, const int dim, const int fe_degree)
60     : type(type)
61     , dim(dim)
62     , fe_degree(fe_degree)
63   {
64     if (type == BPType::BP1)
65       type_string = "BP1";
66     else if (type == BPType::BP2)
67       type_string = "BP2";
68     else if (type == BPType::BP3)
69       type_string = "BP3";
70     else if (type == BPType::BP4)
71       type_string = "BP4";
72     else if (type == BPType::BP5)
73       type_string = "BP5";
74     else if (type == BPType::BP6)
75       type_string = "BP6";
76 
77     this->n_q_points_1d = (type <= BPType::BP4) ? (fe_degree + 2) : (fe_degree + 1);
78 
79     this->n_components =
80       (type == BPType::BP1 || type == BPType::BP3 || type == BPType::BP5) ? 1 : dim;
81   }
82 
83 
84   BPType       type;
85   std::string  type_string;
86   unsigned int dim;
87   unsigned int fe_degree;
88   unsigned int n_q_points_1d;
89   unsigned int n_components;
90 };
91 
92 
93 
94 /**
95  * Base class of operators.
96  */
97 template <typename Number>
98 class OperatorBase
99 {
100 public:
101   /**
102    * deal.II vector type
103    */
104   using VectorType = LinearAlgebra::distributed::Vector<Number>;
105 
106   /**
107    * Initialize vector.
108    */
109   virtual void
110   reinit() = 0;
111 
112   /**
113    * Perform matrix-vector product
114    */
115   virtual void
116   vmult(VectorType &dst, const VectorType &src) const = 0;
117 
118   /**
119    * Initialize vector.
120    */
121   virtual void
122   initialize_dof_vector(VectorType &vec) const = 0;
123 
124   /**
125    * Compute inverse of diagonal.
126    */
127   virtual void
128   compute_inverse_diagonal(VectorType &diagonal) const = 0;
129 };
130 
131 
132 /**
133  * Operator implementation using libCEED.
134  */
135 template <int dim, typename Number>
136 class OperatorCeed : public OperatorBase<Number>
137 {
138 public:
139   using VectorType = typename OperatorBase<Number>::VectorType;
140 
141   /**
142    * Constructor.
143    */
144   OperatorCeed(const Mapping<dim>              &mapping,
145                const DoFHandler<dim>           &dof_handler,
146                const AffineConstraints<Number> &constraints,
147                const Quadrature<dim>           &quadrature,
148                const BPType                    &bp,
149                const std::string               &resource)
150     : mapping(mapping)
151     , dof_handler(dof_handler)
152     , constraints(constraints)
153     , quadrature(quadrature)
154     , bp(bp)
155     , resource(resource)
156   {
157     reinit();
158   }
159 
160   /**
161    * Destructor.
162    */
163   ~OperatorCeed()
164   {
165     CeedVectorDestroy(&src_ceed);
166     CeedVectorDestroy(&dst_ceed);
167     CeedOperatorDestroy(&op_apply);
168     CeedDestroy(&ceed);
169   }
170 
171   /**
172    * Initialized internal data structures, particularly, libCEED.
173    */
174   void
175   reinit() override
176   {
177     CeedVector           q_data;
178     CeedBasis            sol_basis;
179     CeedElemRestriction  sol_restriction;
180     CeedElemRestriction  q_data_restriction;
181     BuildContext         build_ctx_data;
182     CeedQFunctionContext build_ctx;
183     CeedQFunction        qf_apply;
184 
185     const auto &tria = dof_handler.get_triangulation();
186     const auto &fe   = dof_handler.get_fe();
187 
188     const auto n_components = fe.n_components();
189 
190     if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5)
191       {
192         AssertThrow(n_components == 1, ExcInternalError());
193       }
194     else
195       {
196         AssertThrow(n_components == dim, ExcInternalError());
197       }
198 
199     // 1) create CEED instance -> "MatrixFree"
200     const char *ceed_spec = resource.c_str();
201     CeedInit(ceed_spec, &ceed);
202 
203     // 2) create shape functions -> "ShapeInfo"
204     const unsigned int fe_degree  = fe.tensor_degree();
205     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
206     {
207       dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info;
208       shape_info.reinit(quadrature, fe, 0);
209       dealii::internal::MatrixFreeFunctions::UnivariateShapeData<double> shape_data =
210         shape_info.get_shape_data();
211       std::vector<CeedScalar> q_ref_1d;
212       for (const auto q : quadrature.get_tensor_basis()[0].get_points())
213         q_ref_1d.push_back(q(0));
214 
215       CeedBasisCreateTensorH1(ceed,
216                               dim,
217                               n_components,
218                               fe_degree + 1,
219                               n_q_points,
220                               shape_data.shape_values.data(),
221                               shape_data.shape_gradients.data(),
222                               q_ref_1d.data(),
223                               quadrature.get_tensor_basis()[0].get_weights().data(),
224                               &sol_basis);
225     }
226 
227     // 3) create restriction matrix -> DoFInfo
228     unsigned int n_local_active_cells = 0;
229 
230     for (const auto &cell : dof_handler.active_cell_iterators())
231       if (cell->is_locally_owned())
232         n_local_active_cells++;
233 
234     partitioner =
235       std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(),
236                                                     DoFTools::extract_locally_active_dofs(
237                                                       dof_handler),
238                                                     dof_handler.get_communicator());
239 
240     std::vector<CeedInt> indices;
241     indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components);
242 
243     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
244 
245     std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell());
246 
247     for (const auto &cell : dof_handler.active_cell_iterators())
248       if (cell->is_locally_owned())
249         {
250           cell->get_dof_indices(local_indices);
251 
252           for (const auto i : dof_mapping)
253             indices.emplace_back(
254               partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)]) /
255               n_components);
256         }
257 
258     CeedElemRestrictionCreate(ceed,
259                               n_local_active_cells,
260                               fe.n_dofs_per_cell() / n_components,
261                               n_components,
262                               std::max<unsigned int>(this->extended_local_size() / n_components, 1),
263                               this->extended_local_size(),
264                               CEED_MEM_HOST,
265                               CEED_COPY_VALUES,
266                               indices.data(),
267                               &sol_restriction);
268 
269     // 4) create mapping -> MappingInfo
270     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
271 
272     this->weights = compute_metric_data(ceed, mapping, tria, quadrature, bp);
273 
274     strides = {{1,
275                 static_cast<int>(quadrature.size()),
276                 static_cast<int>(quadrature.size() * n_components_metric)}};
277     CeedVectorCreate(ceed, weights.size(), &q_data);
278     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
279     CeedElemRestrictionCreateStrided(ceed,
280                                      n_local_active_cells,
281                                      quadrature.size(),
282                                      n_components_metric,
283                                      weights.size(),
284                                      strides.data(),
285                                      &q_data_restriction);
286 
287     build_ctx_data.dim       = dim;
288     build_ctx_data.space_dim = dim;
289 
290     CeedQFunctionContextCreate(ceed, &build_ctx);
291     CeedQFunctionContextSetData(
292       build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data);
293 
294     // 5) create q operation
295     if (bp == BPType::BP1)
296       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply);
297     else if (bp == BPType::BP2)
298       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply);
299     else if (bp == BPType::BP3 || bp == BPType::BP5)
300       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply);
301     else if (bp == BPType::BP4 || bp == BPType::BP6)
302       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply);
303     else
304       AssertThrow(false, ExcInternalError());
305 
306     if (bp <= BPType::BP2)
307       CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP);
308     else
309       CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD);
310 
311     CeedQFunctionAddInput(qf_apply, "qdata", n_components_metric, CEED_EVAL_NONE);
312 
313     if (bp <= BPType::BP2)
314       CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP);
315     else
316       CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD);
317 
318     CeedQFunctionSetContext(qf_apply, build_ctx);
319 
320     // 6) put everything together
321     CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
322 
323     CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
324     CeedOperatorSetField(op_apply, "qdata", q_data_restriction, CEED_BASIS_NONE, q_data);
325     CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
326 
327     // 7) libCEED vectors
328     CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL);
329     CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL);
330 
331     // 8) cleanup
332     CeedVectorDestroy(&q_data);
333     CeedElemRestrictionDestroy(&q_data_restriction);
334     CeedElemRestrictionDestroy(&sol_restriction);
335     CeedBasisDestroy(&sol_basis);
336     CeedQFunctionContextDestroy(&build_ctx);
337     CeedQFunctionDestroy(&qf_apply);
338   }
339 
340   /**
341    * Perform matrix-vector product.
342    */
343   void
344   vmult(VectorType &dst, const VectorType &src) const override
345   {
346     // communicate: update ghost values
347     src.update_ghost_values();
348 
349     if (dof_handler.get_fe().n_components() == 1)
350       {
351         // pass memory buffers to libCEED
352         VectorTypeCeed x(src_ceed);
353         VectorTypeCeed y(dst_ceed);
354         x.import_array(src, CEED_MEM_HOST);
355         y.import_array(dst, CEED_MEM_HOST);
356 
357         // apply operator
358         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
359 
360         // pull arrays back to deal.II
361         x.sync_array();
362         y.sync_array();
363       }
364     else // TODO: needed for multiple components
365       {
366         // allocate space for block vectors
367         src_tmp.reinit(this->extended_local_size(), true);
368         dst_tmp.reinit(this->extended_local_size(), true);
369 
370         // copy to block vector
371         copy_to_block_vector(src_tmp, src);
372 
373         // pass memory buffers to libCEED
374         VectorTypeCeed x(src_ceed);
375         VectorTypeCeed y(dst_ceed);
376         x.import_array(src_tmp, CEED_MEM_HOST);
377         y.import_array(dst_tmp, CEED_MEM_HOST);
378 
379         // apply operator
380         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
381 
382         // pull arrays back to deal.II
383         x.sync_array();
384         y.sync_array();
385 
386         // copy from block vector
387         copy_from_block_vector(dst, dst_tmp);
388       }
389 
390     // communicate: compress
391     src.zero_out_ghost_values();
392     dst.compress(VectorOperation::add);
393 
394     // apply constraints: we assume homogeneous DBC
395     constraints.set_zero(dst);
396   }
397 
398   /**
399    * Initialized vector.
400    */
401   void
402   initialize_dof_vector(VectorType &vec) const override
403   {
404     vec.reinit(partitioner);
405   }
406 
407   /**
408    * Compute inverse of diagonal.
409    */
410   void
411   compute_inverse_diagonal(VectorType &diagonal) const override
412   {
413     this->initialize_dof_vector(diagonal);
414 
415     // pass memory buffer to libCEED
416     VectorTypeCeed y(dst_ceed);
417     y.import_array(diagonal, CEED_MEM_HOST);
418 
419     CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE);
420 
421     // pull array back to deal.II
422     y.sync_array();
423 
424     const unsigned int n_components = dof_handler.get_fe().n_components();
425 
426     if (n_components > 1) // TODO: needed for multiple components
427       {
428         VectorType tmp(diagonal);
429 
430         copy_from_block_vector(tmp, diagonal);
431 
432         std::swap(tmp, diagonal);
433       }
434 
435     diagonal.compress(VectorOperation::add);
436 
437     for (auto &i : diagonal)
438       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
439   }
440 
441 private:
442   /**
443    * Wrapper around a deal.II vector to create a libCEED vector view.
444    */
445   class VectorTypeCeed
446   {
447   public:
448     /**
449      * Constructor.
450      */
451     VectorTypeCeed(const CeedVector &vec_orig)
452     {
453       vec_ceed = NULL;
454       CeedVectorReferenceCopy(vec_orig, &vec_ceed);
455     }
456 
457     /**
458      * Return libCEED vector view.
459      */
460     CeedVector &
461     operator()()
462     {
463       return vec_ceed;
464     }
465 
466     /**
467      * Set deal.II memory in libCEED vector.
468      */
469     void
470     import_array(const VectorType &vec, const CeedMemType space)
471     {
472       mem_space = space;
473       CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values());
474     }
475 
476     /**
477      * Sync memory from device to host.
478      */
479     void
480     sync_array()
481     {
482       CeedVectorSyncArray(vec_ceed, mem_space);
483     }
484 
485     /**
486      * Destructor: destroy vector view.
487      */
488     ~VectorTypeCeed()
489     {
490       bool has_array;
491       CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array);
492       if (has_array)
493         {
494           CeedScalar *ptr;
495           CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
496         }
497       CeedVectorDestroy(&vec_ceed);
498     }
499 
500   private:
501     /**
502      * libCEED vector view.
503      */
504     CeedMemType mem_space;
505     CeedVector  vec_ceed;
506   };
507 
508   /**
509    * Copy from block vector.
510    *
511    * @note Only needed for multiple components.
512    */
513   void
514   copy_from_block_vector(VectorType &dst, const VectorType &src) const
515   {
516     const unsigned int scalar_size = this->extended_local_size() / dim;
517 
518     for (unsigned int i = 0; i < scalar_size; ++i)
519       for (unsigned int j = 0; j < dim; ++j)
520         dst.get_values()[j + i * dim] = src.get_values()[j * scalar_size + i];
521   }
522 
523   /**
524    * Copy to block vector.
525    *
526    * @note Only needed for multiple components.
527    */
528   void
529   copy_to_block_vector(VectorType &dst, const VectorType &src) const
530   {
531     const unsigned int scalar_size = this->extended_local_size() / dim;
532 
533     for (unsigned int i = 0; i < scalar_size; ++i)
534       for (unsigned int j = 0; j < dim; ++j)
535         dst.get_values()[j * scalar_size + i] = src.get_values()[j + i * dim];
536   }
537 
538   /**
539    * Number of locally active DoFs.
540    */
541   unsigned int
542   extended_local_size() const
543   {
544     return partitioner->locally_owned_size() + partitioner->n_ghost_indices();
545   }
546 
547   /**
548    * Compute metric data: Jacobian, ...
549    */
550   static std::vector<double>
551   compute_metric_data(const Ceed               &ceed,
552                       const Mapping<dim>       &mapping,
553                       const Triangulation<dim> &tria,
554                       const Quadrature<dim>    &quadrature,
555                       const BPType              bp)
556   {
557     std::vector<double> weights;
558 
559     if (false)
560       {
561         FE_Nothing<dim> dummy_fe;
562         FEValues<dim>   fe_values(mapping, dummy_fe, quadrature, update_JxW_values);
563 
564         for (const auto &cell : tria.active_cell_iterators())
565           if (cell->is_locally_owned())
566             {
567               fe_values.reinit(cell);
568 
569               for (const auto q : fe_values.quadrature_point_indices())
570                 weights.emplace_back(fe_values.JxW(q));
571             }
572 
573         return weights;
574       }
575 
576     CeedBasis            geo_basis;
577     CeedVector           q_data;
578     CeedElemRestriction  q_data_restriction;
579     CeedVector           node_coords;
580     CeedElemRestriction  geo_restriction;
581     CeedQFunctionContext build_ctx;
582     CeedQFunction        qf_build;
583     CeedOperator         op_build;
584 
585     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
586 
587     const unsigned int n_components = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
588 
589     const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping);
590 
591     AssertThrow(mapping_q, ExcMessage("Wrong mapping!"));
592 
593     const unsigned int fe_degree = mapping_q->get_degree();
594 
595     FE_Q<dim> geo_fe(fe_degree);
596 
597     {
598       dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info;
599       shape_info.reinit(quadrature, geo_fe, 0);
600       dealii::internal::MatrixFreeFunctions::UnivariateShapeData<double> shape_data =
601         shape_info.get_shape_data();
602       std::vector<CeedScalar> q_ref_1d;
603       for (const auto q : quadrature.get_tensor_basis()[0].get_points())
604         q_ref_1d.push_back(q(0));
605 
606       CeedBasisCreateTensorH1(ceed,
607                               dim,
608                               dim,
609                               fe_degree + 1,
610                               n_q_points,
611                               shape_data.shape_values.data(),
612                               shape_data.shape_gradients.data(),
613                               q_ref_1d.data(),
614                               quadrature.get_tensor_basis()[0].get_weights().data(),
615                               &geo_basis);
616     }
617 
618     unsigned int n_local_active_cells = 0;
619 
620     for (const auto &cell : tria.active_cell_iterators())
621       if (cell->is_locally_owned())
622         n_local_active_cells++;
623 
624     std::vector<double>  geo_support_points;
625     std::vector<CeedInt> geo_indices;
626 
627     DoFHandler<dim> geo_dof_handler(tria);
628     geo_dof_handler.distribute_dofs(geo_fe);
629 
630     const auto geo_partitioner =
631       std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(),
632                                                     DoFTools::extract_locally_active_dofs(
633                                                       geo_dof_handler),
634                                                     geo_dof_handler.get_communicator());
635 
636     geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell());
637 
638     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
639 
640     FEValues<dim> fe_values(mapping,
641                             geo_fe,
642                             geo_fe.get_unit_support_points(),
643                             update_quadrature_points);
644 
645     std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell());
646 
647     const unsigned int n_points =
648       geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices();
649 
650     geo_support_points.resize(dim * n_points);
651 
652     for (const auto &cell : geo_dof_handler.active_cell_iterators())
653       if (cell->is_locally_owned())
654         {
655           fe_values.reinit(cell);
656           cell->get_dof_indices(local_indices);
657 
658           for (const auto i : dof_mapping)
659             {
660               const auto index = geo_partitioner->global_to_local(local_indices[i]);
661               geo_indices.emplace_back(index);
662 
663               const auto point = fe_values.quadrature_point(i);
664 
665               for (unsigned int d = 0; d < dim; ++d)
666                 geo_support_points[index + d * n_points] = point[d];
667             }
668         }
669 
670     weights.resize(n_local_active_cells * quadrature.size() * n_components);
671 
672     CeedInt strides[3] = {1,
673                           static_cast<int>(quadrature.size()),
674                           static_cast<int>(quadrature.size() * n_components)};
675 
676     CeedVectorCreate(ceed, weights.size(), &q_data);
677     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
678     CeedElemRestrictionCreateStrided(ceed,
679                                      n_local_active_cells,
680                                      quadrature.size(),
681                                      n_components,
682                                      weights.size(),
683                                      strides,
684                                      &q_data_restriction);
685 
686     CeedVectorCreate(ceed, geo_support_points.size(), &node_coords);
687     CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data());
688 
689     CeedElemRestrictionCreate(ceed,
690                               n_local_active_cells,
691                               geo_fe.n_dofs_per_cell(),
692                               dim,
693                               std::max<unsigned int>(geo_support_points.size() / dim, 1),
694                               geo_support_points.size(),
695                               CEED_MEM_HOST,
696                               CEED_COPY_VALUES,
697                               geo_indices.data(),
698                               &geo_restriction);
699 
700     BuildContext build_ctx_data;
701     build_ctx_data.dim       = dim;
702     build_ctx_data.space_dim = dim;
703 
704     CeedQFunctionContextCreate(ceed, &build_ctx);
705     CeedQFunctionContextSetData(
706       build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data);
707 
708     // 5) create q operation
709     if (bp <= BPType::BP2)
710       CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build);
711     else
712       CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build);
713 
714     CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD);
715     CeedQFunctionAddInput(qf_build, "weights", 1, CEED_EVAL_WEIGHT);
716     CeedQFunctionAddOutput(qf_build, "qdata", n_components, CEED_EVAL_NONE);
717     CeedQFunctionSetContext(qf_build, build_ctx);
718 
719     // 6) put everything together
720     CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build);
721     CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE);
722     CeedOperatorSetField(
723       op_build, "weights", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE);
724     CeedOperatorSetField(
725       op_build, "qdata", q_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
726 
727     CeedOperatorApply(op_build, node_coords, q_data, CEED_REQUEST_IMMEDIATE);
728 
729     CeedVectorDestroy(&node_coords);
730     CeedVectorSyncArray(q_data, CEED_MEM_HOST);
731     CeedVectorDestroy(&q_data);
732     CeedElemRestrictionDestroy(&geo_restriction);
733     CeedElemRestrictionDestroy(&q_data_restriction);
734     CeedBasisDestroy(&geo_basis);
735     CeedQFunctionContextDestroy(&build_ctx);
736     CeedQFunctionDestroy(&qf_build);
737     CeedOperatorDestroy(&op_build);
738 
739     return weights;
740   }
741 
742   /**
743    * Mapping object passed to the constructor.
744    */
745   const Mapping<dim> &mapping;
746 
747   /**
748    * DoFHandler object passed to the constructor.
749    */
750   const DoFHandler<dim> &dof_handler;
751 
752   /**
753    * Constraints object passed to the constructor.
754    */
755   const AffineConstraints<Number> &constraints;
756 
757   /**
758    * Quadrature rule object passed to the constructor.
759    */
760   const Quadrature<dim> &quadrature;
761 
762   /**
763    * Selected BP.
764    */
765   const BPType bp;
766 
767   /**
768    * Resource name.
769    */
770   const std::string resource;
771 
772   /**
773    * Partitioner for distributed vectors.
774    */
775   std::shared_ptr<Utilities::MPI::Partitioner> partitioner;
776 
777   /**
778    * libCEED data structures.
779    */
780   Ceed                   ceed;
781   std::vector<double>    weights;
782   std::array<CeedInt, 3> strides;
783   CeedVector             src_ceed;
784   CeedVector             dst_ceed;
785   CeedOperator           op_apply;
786 
787   /**
788    * Temporal (tempral) vectors.
789    *
790    * @note Only needed for multiple components.
791    */
792   mutable VectorType src_tmp;
793   mutable VectorType dst_tmp;
794 };
795 
796 
797 
798 template <int dim, typename Number>
799 class OperatorDealii : public OperatorBase<Number>
800 {
801 public:
802   using VectorType = typename OperatorBase<Number>::VectorType;
803 
804   /**
805    * Constructor.
806    */
807   OperatorDealii(const Mapping<dim>              &mapping,
808                  const DoFHandler<dim>           &dof_handler,
809                  const AffineConstraints<Number> &constraints,
810                  const Quadrature<dim>           &quadrature,
811                  const BPType                    &bp)
812     : mapping(mapping)
813     , dof_handler(dof_handler)
814     , constraints(constraints)
815     , quadrature(quadrature)
816     , bp(bp)
817   {
818     reinit();
819   }
820 
821   /**
822    * Destructor.
823    */
824   ~OperatorDealii() = default;
825 
826   /**
827    * Initialized internal data structures, particularly, MatrixFree.
828    */
829   void
830   reinit() override
831   {
832     // configure MatrixFree
833     typename MatrixFree<dim, Number>::AdditionalData additional_data;
834     additional_data.tasks_parallel_scheme =
835       MatrixFree<dim, Number>::AdditionalData::TasksParallelScheme::none;
836 
837     // create MatrixFree
838     matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data);
839   }
840 
841   /**
842    * Matrix-vector product.
843    */
844   void
845   vmult(VectorType &dst, const VectorType &src) const override
846   {
847     if (dof_handler.get_fe().n_components() == 1)
848       {
849         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true);
850       }
851     else
852       {
853         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
854 
855         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<dim>, this, dst, src, true);
856       }
857   }
858 
859   /**
860    * Initialize vector.
861    */
862   void
863   initialize_dof_vector(VectorType &vec) const override
864   {
865     matrix_free.initialize_dof_vector(vec);
866   }
867 
868   /**
869    * Compute inverse of diagonal.
870    */
871   void
872   compute_inverse_diagonal(VectorType &diagonal) const override
873   {
874     this->initialize_dof_vector(diagonal);
875 
876     if (dof_handler.get_fe().n_components() == 1)
877       {
878         MatrixFreeTools::compute_diagonal(matrix_free,
879                                           diagonal,
880                                           &OperatorDealii::do_cell_integral_local<1>,
881                                           this);
882       }
883     else
884       {
885         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
886 
887         MatrixFreeTools::compute_diagonal(matrix_free,
888                                           diagonal,
889                                           &OperatorDealii::do_cell_integral_local<dim>,
890                                           this);
891       }
892 
893     for (auto &i : diagonal)
894       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
895   }
896 
897 private:
898   /**
899    * Cell integral without vector access.
900    */
901   template <int n_components>
902   void
903   do_cell_integral_local(FEEvaluation<dim, -1, 0, n_components, Number> &phi) const
904   {
905     if (bp <= BPType::BP2) // mass matrix
906       {
907         phi.evaluate(EvaluationFlags::values);
908         for (const auto q : phi.quadrature_point_indices())
909           phi.submit_value(phi.get_value(q), q);
910         phi.integrate(EvaluationFlags::values);
911       }
912     else // Poisson operator
913       {
914         phi.evaluate(EvaluationFlags::gradients);
915         for (const auto q : phi.quadrature_point_indices())
916           phi.submit_gradient(phi.get_gradient(q), q);
917         phi.integrate(EvaluationFlags::gradients);
918       }
919   }
920 
921   /**
922    * Cell integral on a range of cells.
923    */
924   template <int n_components>
925   void
926   do_cell_integral_range(const MatrixFree<dim, Number>               &matrix_free,
927                          VectorType                                  &dst,
928                          const VectorType                            &src,
929                          const std::pair<unsigned int, unsigned int> &range) const
930   {
931     FEEvaluation<dim, -1, 0, n_components, Number> phi(matrix_free, range);
932 
933     for (unsigned cell = range.first; cell < range.second; ++cell)
934       {
935         phi.reinit(cell);
936         phi.read_dof_values(src);            // read source vector
937         do_cell_integral_local(phi);         // cell integral
938         phi.distribute_local_to_global(dst); // write to destination vector
939       }
940   }
941 
942   /**
943    * Mapping object passed to the constructor.
944    */
945   const Mapping<dim> &mapping;
946 
947   /**
948    * DoFHandler object passed to the constructor.
949    */
950   const DoFHandler<dim> &dof_handler;
951 
952   /**
953    * Constraints object passed to the constructor.
954    */
955   const AffineConstraints<Number> &constraints;
956 
957   /**
958    * Quadrature rule object passed to the constructor.
959    */
960   const Quadrature<dim> &quadrature;
961 
962   /**
963    * Selected BP.
964    */
965   const BPType bp;
966 
967   /**
968    * MatrixFree object.
969    */
970   MatrixFree<dim, Number> matrix_free;
971 };
972