xref: /libCEED/examples/deal.II/bps.h (revision d6c19ee8504c74d8f30ec67127f069a58291b3ac)
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2023 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 //  Authors: Peter Munch, Martin Kronbichler
15 //
16 // ---------------------------------------------------------------------
17 
18 // deal.II includes
19 #include <deal.II/dofs/dof_tools.h>
20 
21 #include <deal.II/fe/mapping.h>
22 
23 #include <deal.II/lac/la_parallel_vector.h>
24 
25 #include <deal.II/matrix_free/fe_evaluation.h>
26 #include <deal.II/matrix_free/matrix_free.h>
27 #include <deal.II/matrix_free/shape_info.h>
28 #include <deal.II/matrix_free/tools.h>
29 
30 // libCEED includes
31 #include <ceed.h>
32 #include <ceed/backend.h>
33 
34 // QFunction source
35 #include "bps-qfunctions.h"
36 
37 using namespace dealii;
38 
39 /**
40  * BP types. For more details, see https://ceed.exascaleproject.org/bps/.
41  */
42 enum class BPType : unsigned int
43 {
44   BP1,
45   BP2,
46   BP3,
47   BP4,
48   BP5,
49   BP6
50 };
51 
52 
53 
54 /**
55  * Struct storing relevant information regarding each BP.
56  */
57 struct BPInfo
58 {
59   BPInfo(const BPType type, const int dim, const int fe_degree)
60     : type(type)
61     , dim(dim)
62     , fe_degree(fe_degree)
63   {
64     if (type == BPType::BP1)
65       type_string = "BP1";
66     else if (type == BPType::BP2)
67       type_string = "BP2";
68     else if (type == BPType::BP3)
69       type_string = "BP3";
70     else if (type == BPType::BP4)
71       type_string = "BP4";
72     else if (type == BPType::BP5)
73       type_string = "BP5";
74     else if (type == BPType::BP6)
75       type_string = "BP6";
76 
77     this->n_q_points_1d = (type <= BPType::BP4) ? (fe_degree + 2) : (fe_degree + 1);
78 
79     this->n_components =
80       (type == BPType::BP1 || type == BPType::BP3 || type == BPType::BP5) ? 1 : dim;
81   }
82 
83 
84   BPType       type;
85   std::string  type_string;
86   unsigned int dim;
87   unsigned int fe_degree;
88   unsigned int n_q_points_1d;
89   unsigned int n_components;
90 };
91 
92 
93 
94 /**
95  * Base class of operators.
96  */
97 template <typename Number>
98 class OperatorBase
99 {
100 public:
101   /**
102    * deal.II vector type
103    */
104   using VectorType = LinearAlgebra::distributed::Vector<Number>;
105 
106   /**
107    * Initialize vector.
108    */
109   virtual void
110   reinit() = 0;
111 
112   /**
113    * Perform matrix-vector product
114    */
115   virtual void
116   vmult(VectorType &dst, const VectorType &src) const = 0;
117 
118   /**
119    * Initialize vector.
120    */
121   virtual void
122   initialize_dof_vector(VectorType &vec) const = 0;
123 
124   /**
125    * Compute inverse of diagonal.
126    */
127   virtual void
128   compute_inverse_diagonal(VectorType &diagonal) const = 0;
129 };
130 
131 
132 /**
133  * Operator implementation using libCEED.
134  */
135 template <int dim, typename Number>
136 class OperatorCeed : public OperatorBase<Number>
137 {
138 public:
139   using VectorType = typename OperatorBase<Number>::VectorType;
140 
141   /**
142    * Constructor.
143    */
144   OperatorCeed(const Mapping<dim>              &mapping,
145                const DoFHandler<dim>           &dof_handler,
146                const AffineConstraints<Number> &constraints,
147                const Quadrature<dim>           &quadrature,
148                const BPType                    &bp,
149                const std::string               &resource)
150     : mapping(mapping)
151     , dof_handler(dof_handler)
152     , constraints(constraints)
153     , quadrature(quadrature)
154     , bp(bp)
155     , resource(resource)
156   {
157     reinit();
158   }
159 
160   /**
161    * Destructor.
162    */
163   ~OperatorCeed()
164   {
165     CeedVectorDestroy(&src_ceed);
166     CeedVectorDestroy(&dst_ceed);
167     CeedOperatorDestroy(&op_apply);
168     CeedDestroy(&ceed);
169   }
170 
171   /**
172    * Initialized internal data structures, particularly, libCEED.
173    */
174   void
175   reinit() override
176   {
177     CeedVector           q_data;
178     CeedBasis            sol_basis;
179     CeedElemRestriction  sol_restriction;
180     CeedElemRestriction  q_data_restriction;
181     BuildContext         build_ctx_data;
182     CeedQFunctionContext build_ctx;
183     CeedQFunction        qf_apply;
184 
185     const auto &tria = dof_handler.get_triangulation();
186     const auto &fe   = dof_handler.get_fe();
187 
188     const auto n_components = fe.n_components();
189 
190     if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5)
191       {
192         AssertThrow(n_components == 1, ExcInternalError());
193       }
194     else
195       {
196         AssertThrow(n_components == dim, ExcInternalError());
197       }
198 
199     // 1) create CEED instance -> "MatrixFree"
200     const char *ceed_spec = resource.c_str();
201     CeedInit(ceed_spec, &ceed);
202 
203     // 2) create shape functions -> "ShapeInfo"
204     const unsigned int fe_degree  = fe.tensor_degree();
205     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
206     {
207       const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature, fe, 0);
208       const auto             &shape_data = shape_info.get_shape_data();
209       std::vector<CeedScalar> q_ref_1d;
210       for (const auto q : shape_data.quadrature.get_points())
211         q_ref_1d.push_back(q(0));
212 
213       CeedBasisCreateTensorH1(ceed,
214                               dim,
215                               n_components,
216                               fe_degree + 1,
217                               n_q_points,
218                               shape_data.shape_values.data(),
219                               shape_data.shape_gradients.data(),
220                               q_ref_1d.data(),
221                               quadrature.get_tensor_basis()[0].get_weights().data(),
222                               &sol_basis);
223     }
224 
225     // 3) create restriction matrix -> DoFInfo
226     unsigned int n_local_active_cells = 0;
227 
228     for (const auto &cell : dof_handler.active_cell_iterators())
229       if (cell->is_locally_owned())
230         n_local_active_cells++;
231 
232     partitioner =
233       std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(),
234                                                     DoFTools::extract_locally_active_dofs(
235                                                       dof_handler),
236                                                     dof_handler.get_communicator());
237 
238     std::vector<CeedInt> indices;
239     indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components);
240 
241     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
242 
243     std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell());
244 
245     for (const auto &cell : dof_handler.active_cell_iterators())
246       if (cell->is_locally_owned())
247         {
248           cell->get_dof_indices(local_indices);
249 
250           for (const auto i : dof_mapping)
251             indices.emplace_back(
252               partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)]) /
253               n_components);
254         }
255 
256     CeedElemRestrictionCreate(ceed,
257                               n_local_active_cells,
258                               fe.n_dofs_per_cell() / n_components,
259                               n_components,
260                               std::max<unsigned int>(this->extended_local_size() / n_components, 1),
261                               this->extended_local_size(),
262                               CEED_MEM_HOST,
263                               CEED_COPY_VALUES,
264                               indices.data(),
265                               &sol_restriction);
266 
267     // 4) create mapping -> MappingInfo
268     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
269 
270     this->weights = compute_metric_data(ceed, mapping, tria, quadrature, bp);
271 
272     strides = {{1,
273                 static_cast<int>(quadrature.size()),
274                 static_cast<int>(quadrature.size() * n_components_metric)}};
275     CeedVectorCreate(ceed, weights.size(), &q_data);
276     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
277     CeedElemRestrictionCreateStrided(ceed,
278                                      n_local_active_cells,
279                                      quadrature.size(),
280                                      n_components_metric,
281                                      weights.size(),
282                                      strides.data(),
283                                      &q_data_restriction);
284 
285     build_ctx_data.dim       = dim;
286     build_ctx_data.space_dim = dim;
287 
288     CeedQFunctionContextCreate(ceed, &build_ctx);
289     CeedQFunctionContextSetData(
290       build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data);
291 
292     // 5) create q operation
293     if (bp == BPType::BP1)
294       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply);
295     else if (bp == BPType::BP2)
296       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply);
297     else if (bp == BPType::BP3 || bp == BPType::BP5)
298       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply);
299     else if (bp == BPType::BP4 || bp == BPType::BP6)
300       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply);
301     else
302       AssertThrow(false, ExcInternalError());
303 
304     if (bp <= BPType::BP2)
305       CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP);
306     else
307       CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD);
308 
309     CeedQFunctionAddInput(qf_apply, "qdata", n_components_metric, CEED_EVAL_NONE);
310 
311     if (bp <= BPType::BP2)
312       CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP);
313     else
314       CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD);
315 
316     CeedQFunctionSetContext(qf_apply, build_ctx);
317 
318     // 6) put everything together
319     CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
320 
321     CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
322     CeedOperatorSetField(op_apply, "qdata", q_data_restriction, CEED_BASIS_NONE, q_data);
323     CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
324 
325     // 7) libCEED vectors
326     CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL);
327     CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL);
328 
329     // 8) cleanup
330     CeedVectorDestroy(&q_data);
331     CeedElemRestrictionDestroy(&q_data_restriction);
332     CeedElemRestrictionDestroy(&sol_restriction);
333     CeedBasisDestroy(&sol_basis);
334     CeedQFunctionContextDestroy(&build_ctx);
335     CeedQFunctionDestroy(&qf_apply);
336   }
337 
338   /**
339    * Perform matrix-vector product.
340    */
341   void
342   vmult(VectorType &dst, const VectorType &src) const override
343   {
344     // communicate: update ghost values
345     src.update_ghost_values();
346 
347     if (dof_handler.get_fe().n_components() == 1)
348       {
349         // pass memory buffers to libCEED
350         VectorTypeCeed x(src_ceed);
351         VectorTypeCeed y(dst_ceed);
352         x.import_array(src, CEED_MEM_HOST);
353         y.import_array(dst, CEED_MEM_HOST);
354 
355         // apply operator
356         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
357 
358         // pull arrays back to deal.II
359         x.sync_array();
360         y.sync_array();
361       }
362     else // TODO: needed for multiple components
363       {
364         // allocate space for block vectors
365         src_tmp.reinit(this->extended_local_size(), true);
366         dst_tmp.reinit(this->extended_local_size(), true);
367 
368         // copy to block vector
369         copy_to_block_vector(src_tmp, src);
370 
371         // pass memory buffers to libCEED
372         VectorTypeCeed x(src_ceed);
373         VectorTypeCeed y(dst_ceed);
374         x.import_array(src_tmp, CEED_MEM_HOST);
375         y.import_array(dst_tmp, CEED_MEM_HOST);
376 
377         // apply operator
378         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
379 
380         // pull arrays back to deal.II
381         x.sync_array();
382         y.sync_array();
383 
384         // copy from block vector
385         copy_from_block_vector(dst, dst_tmp);
386       }
387 
388     // communicate: compress
389     src.zero_out_ghost_values();
390     dst.compress(VectorOperation::add);
391 
392     // apply constraints: we assume homogeneous DBC
393     constraints.set_zero(dst);
394   }
395 
396   /**
397    * Initialized vector.
398    */
399   void
400   initialize_dof_vector(VectorType &vec) const override
401   {
402     vec.reinit(partitioner);
403   }
404 
405   /**
406    * Compute inverse of diagonal.
407    */
408   void
409   compute_inverse_diagonal(VectorType &diagonal) const override
410   {
411     this->initialize_dof_vector(diagonal);
412 
413     // pass memory buffer to libCEED
414     VectorTypeCeed y(dst_ceed);
415     y.import_array(diagonal, CEED_MEM_HOST);
416 
417     CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE);
418 
419     // pull array back to deal.II
420     y.sync_array();
421 
422     const unsigned int n_components = dof_handler.get_fe().n_components();
423 
424     if (n_components > 1) // TODO: needed for multiple components
425       {
426         VectorType tmp(diagonal);
427 
428         copy_from_block_vector(tmp, diagonal);
429 
430         std::swap(tmp, diagonal);
431       }
432 
433     diagonal.compress(VectorOperation::add);
434 
435     for (auto &i : diagonal)
436       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
437   }
438 
439 private:
440   /**
441    * Wrapper around a deal.II vector to create a libCEED vector view.
442    */
443   class VectorTypeCeed
444   {
445   public:
446     /**
447      * Constructor.
448      */
449     VectorTypeCeed(const CeedVector &vec_orig)
450     {
451       vec_ceed = NULL;
452       CeedVectorReferenceCopy(vec_orig, &vec_ceed);
453     }
454 
455     /**
456      * Return libCEED vector view.
457      */
458     CeedVector &
459     operator()()
460     {
461       return vec_ceed;
462     }
463 
464     /**
465      * Set deal.II memory in libCEED vector.
466      */
467     void
468     import_array(const VectorType &vec, const CeedMemType space)
469     {
470       mem_space = space;
471       CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values());
472     }
473 
474     /**
475      * Sync memory from device to host.
476      */
477     void
478     sync_array()
479     {
480       CeedVectorSyncArray(vec_ceed, mem_space);
481     }
482 
483     /**
484      * Destructor: destroy vector view.
485      */
486     ~VectorTypeCeed()
487     {
488       bool has_array;
489       CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array);
490       if (has_array)
491         {
492           CeedScalar *ptr;
493           CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
494         }
495       CeedVectorDestroy(&vec_ceed);
496     }
497 
498   private:
499     /**
500      * libCEED vector view.
501      */
502     CeedMemType mem_space;
503     CeedVector  vec_ceed;
504   };
505 
506   /**
507    * Copy from block vector.
508    *
509    * @note Only needed for multiple components.
510    */
511   void
512   copy_from_block_vector(VectorType &dst, const VectorType &src) const
513   {
514     const unsigned int scalar_size = this->extended_local_size() / dim;
515 
516     for (unsigned int i = 0; i < scalar_size; ++i)
517       for (unsigned int j = 0; j < dim; ++j)
518         dst.get_values()[j + i * dim] = src.get_values()[j * scalar_size + i];
519   }
520 
521   /**
522    * Copy to block vector.
523    *
524    * @note Only needed for multiple components.
525    */
526   void
527   copy_to_block_vector(VectorType &dst, const VectorType &src) const
528   {
529     const unsigned int scalar_size = this->extended_local_size() / dim;
530 
531     for (unsigned int i = 0; i < scalar_size; ++i)
532       for (unsigned int j = 0; j < dim; ++j)
533         dst.get_values()[j * scalar_size + i] = src.get_values()[j + i * dim];
534   }
535 
536   /**
537    * Number of locally active DoFs.
538    */
539   unsigned int
540   extended_local_size() const
541   {
542     return partitioner->locally_owned_size() + partitioner->n_ghost_indices();
543   }
544 
545   /**
546    * Compute metric data: Jacobian, ...
547    */
548   static std::vector<double>
549   compute_metric_data(const Ceed               &ceed,
550                       const Mapping<dim>       &mapping,
551                       const Triangulation<dim> &tria,
552                       const Quadrature<dim>    &quadrature,
553                       const BPType              bp)
554   {
555     std::vector<double> weights;
556 
557     if (false)
558       {
559         FE_Nothing<dim> dummy_fe;
560         FEValues<dim>   fe_values(mapping, dummy_fe, quadrature, update_JxW_values);
561 
562         for (const auto &cell : tria.active_cell_iterators())
563           if (cell->is_locally_owned())
564             {
565               fe_values.reinit(cell);
566 
567               for (const auto q : fe_values.quadrature_point_indices())
568                 weights.emplace_back(fe_values.JxW(q));
569             }
570 
571         return weights;
572       }
573 
574     CeedBasis            geo_basis;
575     CeedVector           q_data;
576     CeedElemRestriction  q_data_restriction;
577     CeedVector           node_coords;
578     CeedElemRestriction  geo_restriction;
579     CeedQFunctionContext build_ctx;
580     CeedQFunction        qf_build;
581     CeedOperator         op_build;
582 
583     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
584 
585     const unsigned int n_components = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
586 
587     const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping);
588 
589     AssertThrow(mapping_q, ExcMessage("Wrong mapping!"));
590 
591     const unsigned int fe_degree = mapping_q->get_degree();
592 
593     FE_Q<dim> geo_fe(fe_degree);
594 
595     {
596       const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature,
597                                                                                 geo_fe,
598                                                                                 0);
599       const auto             &shape_data = shape_info.get_shape_data();
600       std::vector<CeedScalar> q_ref_1d;
601       for (const auto q : shape_data.quadrature.get_points())
602         q_ref_1d.push_back(q(0));
603 
604       CeedBasisCreateTensorH1(ceed,
605                               dim,
606                               dim,
607                               fe_degree + 1,
608                               n_q_points,
609                               shape_data.shape_values.data(),
610                               shape_data.shape_gradients.data(),
611                               q_ref_1d.data(),
612                               quadrature.get_tensor_basis()[0].get_weights().data(),
613                               &geo_basis);
614     }
615 
616     unsigned int n_local_active_cells = 0;
617 
618     for (const auto &cell : tria.active_cell_iterators())
619       if (cell->is_locally_owned())
620         n_local_active_cells++;
621 
622     std::vector<double>  geo_support_points;
623     std::vector<CeedInt> geo_indices;
624 
625     DoFHandler<dim> geo_dof_handler(tria);
626     geo_dof_handler.distribute_dofs(geo_fe);
627 
628     const auto geo_partitioner =
629       std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(),
630                                                     DoFTools::extract_locally_active_dofs(
631                                                       geo_dof_handler),
632                                                     geo_dof_handler.get_communicator());
633 
634     geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell());
635 
636     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
637 
638     FEValues<dim> fe_values(mapping,
639                             geo_fe,
640                             geo_fe.get_unit_support_points(),
641                             update_quadrature_points);
642 
643     std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell());
644 
645     const unsigned int n_points =
646       geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices();
647 
648     geo_support_points.resize(dim * n_points);
649 
650     for (const auto &cell : geo_dof_handler.active_cell_iterators())
651       if (cell->is_locally_owned())
652         {
653           fe_values.reinit(cell);
654           cell->get_dof_indices(local_indices);
655 
656           for (const auto i : dof_mapping)
657             {
658               const auto index = geo_partitioner->global_to_local(local_indices[i]);
659               geo_indices.emplace_back(index);
660 
661               const auto point = fe_values.quadrature_point(i);
662 
663               for (unsigned int d = 0; d < dim; ++d)
664                 geo_support_points[index + d * n_points] = point[d];
665             }
666         }
667 
668     weights.resize(n_local_active_cells * quadrature.size() * n_components);
669 
670     CeedInt strides[3] = {1,
671                           static_cast<int>(quadrature.size()),
672                           static_cast<int>(quadrature.size() * n_components)};
673 
674     CeedVectorCreate(ceed, weights.size(), &q_data);
675     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
676     CeedElemRestrictionCreateStrided(ceed,
677                                      n_local_active_cells,
678                                      quadrature.size(),
679                                      n_components,
680                                      weights.size(),
681                                      strides,
682                                      &q_data_restriction);
683 
684     CeedVectorCreate(ceed, geo_support_points.size(), &node_coords);
685     CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data());
686 
687     CeedElemRestrictionCreate(ceed,
688                               n_local_active_cells,
689                               geo_fe.n_dofs_per_cell(),
690                               dim,
691                               std::max<unsigned int>(geo_support_points.size() / dim, 1),
692                               geo_support_points.size(),
693                               CEED_MEM_HOST,
694                               CEED_COPY_VALUES,
695                               geo_indices.data(),
696                               &geo_restriction);
697 
698     BuildContext build_ctx_data;
699     build_ctx_data.dim       = dim;
700     build_ctx_data.space_dim = dim;
701 
702     CeedQFunctionContextCreate(ceed, &build_ctx);
703     CeedQFunctionContextSetData(
704       build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data);
705 
706     // 5) create q operation
707     if (bp <= BPType::BP2)
708       CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build);
709     else
710       CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build);
711 
712     CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD);
713     CeedQFunctionAddInput(qf_build, "weights", 1, CEED_EVAL_WEIGHT);
714     CeedQFunctionAddOutput(qf_build, "qdata", n_components, CEED_EVAL_NONE);
715     CeedQFunctionSetContext(qf_build, build_ctx);
716 
717     // 6) put everything together
718     CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build);
719     CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE);
720     CeedOperatorSetField(
721       op_build, "weights", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE);
722     CeedOperatorSetField(
723       op_build, "qdata", q_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
724 
725     CeedOperatorApply(op_build, node_coords, q_data, CEED_REQUEST_IMMEDIATE);
726 
727     CeedVectorDestroy(&node_coords);
728     CeedVectorSyncArray(q_data, CEED_MEM_HOST);
729     CeedVectorDestroy(&q_data);
730     CeedElemRestrictionDestroy(&geo_restriction);
731     CeedElemRestrictionDestroy(&q_data_restriction);
732     CeedBasisDestroy(&geo_basis);
733     CeedQFunctionContextDestroy(&build_ctx);
734     CeedQFunctionDestroy(&qf_build);
735     CeedOperatorDestroy(&op_build);
736 
737     return weights;
738   }
739 
740   /**
741    * Mapping object passed to the constructor.
742    */
743   const Mapping<dim> &mapping;
744 
745   /**
746    * DoFHandler object passed to the constructor.
747    */
748   const DoFHandler<dim> &dof_handler;
749 
750   /**
751    * Constraints object passed to the constructor.
752    */
753   const AffineConstraints<Number> &constraints;
754 
755   /**
756    * Quadrature rule object passed to the constructor.
757    */
758   const Quadrature<dim> &quadrature;
759 
760   /**
761    * Selected BP.
762    */
763   const BPType bp;
764 
765   /**
766    * Resource name.
767    */
768   const std::string resource;
769 
770   /**
771    * Partitioner for distributed vectors.
772    */
773   std::shared_ptr<Utilities::MPI::Partitioner> partitioner;
774 
775   /**
776    * libCEED data structures.
777    */
778   Ceed                   ceed;
779   std::vector<double>    weights;
780   std::array<CeedInt, 3> strides;
781   CeedVector             src_ceed;
782   CeedVector             dst_ceed;
783   CeedOperator           op_apply;
784 
785   /**
786    * Temporal (tempral) vectors.
787    *
788    * @note Only needed for multiple components.
789    */
790   mutable VectorType src_tmp;
791   mutable VectorType dst_tmp;
792 };
793 
794 
795 
796 template <int dim, typename Number>
797 class OperatorDealii : public OperatorBase<Number>
798 {
799 public:
800   using VectorType = typename OperatorBase<Number>::VectorType;
801 
802   /**
803    * Constructor.
804    */
805   OperatorDealii(const Mapping<dim>              &mapping,
806                  const DoFHandler<dim>           &dof_handler,
807                  const AffineConstraints<Number> &constraints,
808                  const Quadrature<dim>           &quadrature,
809                  const BPType                    &bp)
810     : mapping(mapping)
811     , dof_handler(dof_handler)
812     , constraints(constraints)
813     , quadrature(quadrature)
814     , bp(bp)
815   {
816     reinit();
817   }
818 
819   /**
820    * Destructor.
821    */
822   ~OperatorDealii() = default;
823 
824   /**
825    * Initialized internal data structures, particularly, MatrixFree.
826    */
827   void
828   reinit() override
829   {
830     // configure MatrixFree
831     typename MatrixFree<dim, Number>::AdditionalData additional_data;
832     additional_data.tasks_parallel_scheme =
833       MatrixFree<dim, Number>::AdditionalData::TasksParallelScheme::none;
834 
835     // create MatrixFree
836     matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data);
837   }
838 
839   /**
840    * Matrix-vector product.
841    */
842   void
843   vmult(VectorType &dst, const VectorType &src) const override
844   {
845     if (dof_handler.get_fe().n_components() == 1)
846       {
847         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true);
848       }
849     else
850       {
851         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
852 
853         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<dim>, this, dst, src, true);
854       }
855   }
856 
857   /**
858    * Initialize vector.
859    */
860   void
861   initialize_dof_vector(VectorType &vec) const override
862   {
863     matrix_free.initialize_dof_vector(vec);
864   }
865 
866   /**
867    * Compute inverse of diagonal.
868    */
869   void
870   compute_inverse_diagonal(VectorType &diagonal) const override
871   {
872     this->initialize_dof_vector(diagonal);
873 
874     if (dof_handler.get_fe().n_components() == 1)
875       {
876         MatrixFreeTools::compute_diagonal(matrix_free,
877                                           diagonal,
878                                           &OperatorDealii::do_cell_integral_local<1>,
879                                           this);
880       }
881     else
882       {
883         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
884 
885         MatrixFreeTools::compute_diagonal(matrix_free,
886                                           diagonal,
887                                           &OperatorDealii::do_cell_integral_local<dim>,
888                                           this);
889       }
890 
891     for (auto &i : diagonal)
892       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
893   }
894 
895 private:
896   /**
897    * Cell integral without vector access.
898    */
899   template <int n_components>
900   void
901   do_cell_integral_local(FEEvaluation<dim, -1, 0, n_components, Number> &phi) const
902   {
903     if (bp <= BPType::BP2) // mass matrix
904       {
905         phi.evaluate(EvaluationFlags::values);
906         for (const auto q : phi.quadrature_point_indices())
907           phi.submit_value(phi.get_value(q), q);
908         phi.integrate(EvaluationFlags::values);
909       }
910     else // Poisson operator
911       {
912         phi.evaluate(EvaluationFlags::gradients);
913         for (const auto q : phi.quadrature_point_indices())
914           phi.submit_gradient(phi.get_gradient(q), q);
915         phi.integrate(EvaluationFlags::gradients);
916       }
917   }
918 
919   /**
920    * Cell integral on a range of cells.
921    */
922   template <int n_components>
923   void
924   do_cell_integral_range(const MatrixFree<dim, Number>               &matrix_free,
925                          VectorType                                  &dst,
926                          const VectorType                            &src,
927                          const std::pair<unsigned int, unsigned int> &range) const
928   {
929     FEEvaluation<dim, -1, 0, n_components, Number> phi(matrix_free, range);
930 
931     for (unsigned cell = range.first; cell < range.second; ++cell)
932       {
933         phi.reinit(cell);
934         phi.read_dof_values(src);            // read source vector
935         do_cell_integral_local(phi);         // cell integral
936         phi.distribute_local_to_global(dst); // write to destination vector
937       }
938   }
939 
940   /**
941    * Mapping object passed to the constructor.
942    */
943   const Mapping<dim> &mapping;
944 
945   /**
946    * DoFHandler object passed to the constructor.
947    */
948   const DoFHandler<dim> &dof_handler;
949 
950   /**
951    * Constraints object passed to the constructor.
952    */
953   const AffineConstraints<Number> &constraints;
954 
955   /**
956    * Quadrature rule object passed to the constructor.
957    */
958   const Quadrature<dim> &quadrature;
959 
960   /**
961    * Selected BP.
962    */
963   const BPType bp;
964 
965   /**
966    * MatrixFree object.
967    */
968   MatrixFree<dim, Number> matrix_free;
969 };
970