xref: /libCEED/examples/deal.II/bps.h (revision 571e8cf012cd9ba55521730d89ff29ce614d2bcd)
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2023 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 //  Authors: Peter Munch, Martin Kronbichler
15 //
16 // ---------------------------------------------------------------------
17 
18 // deal.II includes
19 #include <deal.II/dofs/dof_tools.h>
20 
21 #include <deal.II/fe/mapping.h>
22 
23 #include <deal.II/lac/la_parallel_vector.h>
24 
25 #include <deal.II/matrix_free/fe_evaluation.h>
26 #include <deal.II/matrix_free/matrix_free.h>
27 #include <deal.II/matrix_free/shape_info.h>
28 #include <deal.II/matrix_free/tools.h>
29 
30 // libCEED includes
31 #include <ceed.h>
32 #include <ceed/backend.h>
33 
34 // QFunction source
35 #include "bps-qfunctions.h"
36 
37 using namespace dealii;
38 
39 /**
40  * BP types. For more details, see https://ceed.exascaleproject.org/bps/.
41  */
42 enum class BPType : unsigned int
43 {
44   BP1,
45   BP2,
46   BP3,
47   BP4,
48   BP5,
49   BP6
50 };
51 
52 
53 
54 /**
55  * Struct storing relevant information regarding each BP.
56  */
57 struct BPInfo
58 {
59   BPInfo(const BPType type, const int dim, const int fe_degree)
60     : type(type)
61     , dim(dim)
62     , fe_degree(fe_degree)
63   {
64     if (type == BPType::BP1)
65       type_string = "BP1";
66     else if (type == BPType::BP2)
67       type_string = "BP2";
68     else if (type == BPType::BP3)
69       type_string = "BP3";
70     else if (type == BPType::BP4)
71       type_string = "BP4";
72     else if (type == BPType::BP5)
73       type_string = "BP5";
74     else if (type == BPType::BP6)
75       type_string = "BP6";
76 
77     this->n_q_points_1d = (type <= BPType::BP4) ? (fe_degree + 2) : (fe_degree + 1);
78 
79     this->n_components =
80       (type == BPType::BP1 || type == BPType::BP3 || type == BPType::BP5) ? 1 : dim;
81   }
82 
83 
84   BPType       type;
85   std::string  type_string;
86   unsigned int dim;
87   unsigned int fe_degree;
88   unsigned int n_q_points_1d;
89   unsigned int n_components;
90 };
91 
92 
93 
94 /**
95  * Base class of operators.
96  */
97 template <typename Number>
98 class OperatorBase
99 {
100 public:
101   /**
102    * deal.II vector type
103    */
104   using VectorType = LinearAlgebra::distributed::Vector<Number>;
105 
106   /**
107    * Initialize vector.
108    */
109   virtual void
110   reinit() = 0;
111 
112   /**
113    * Perform matrix-vector product
114    */
115   virtual void
116   vmult(VectorType &dst, const VectorType &src) const = 0;
117 
118   /**
119    * Initialize vector.
120    */
121   virtual void
122   initialize_dof_vector(VectorType &vec) const = 0;
123 
124   /**
125    * Compute inverse of diagonal.
126    */
127   virtual void
128   compute_inverse_diagonal(VectorType &diagonal) const = 0;
129 };
130 
131 
132 /**
133  * Operator implementation using libCEED.
134  */
135 template <int dim, typename Number>
136 class OperatorCeed : public OperatorBase<Number>
137 {
138 public:
139   using VectorType = typename OperatorBase<Number>::VectorType;
140 
141   /**
142    * Constructor.
143    */
144   OperatorCeed(const Mapping<dim>              &mapping,
145                const DoFHandler<dim>           &dof_handler,
146                const AffineConstraints<Number> &constraints,
147                const Quadrature<dim>           &quadrature,
148                const BPType                    &bp,
149                const std::string               &resource)
150     : mapping(mapping)
151     , dof_handler(dof_handler)
152     , constraints(constraints)
153     , quadrature(quadrature)
154     , bp(bp)
155     , resource(resource)
156   {
157     reinit();
158   }
159 
160   /**
161    * Destructor.
162    */
163   ~OperatorCeed()
164   {
165     CeedVectorDestroy(&src_ceed);
166     CeedVectorDestroy(&dst_ceed);
167     CeedOperatorDestroy(&op_apply);
168     CeedDestroy(&ceed);
169   }
170 
171   /**
172    * Initialized internal data structures, particularly, libCEED.
173    */
174   void
175   reinit() override
176   {
177     CeedVector           q_data;
178     CeedBasis            sol_basis;
179     CeedElemRestriction  sol_restriction;
180     CeedElemRestriction  q_data_restriction;
181     BuildContext         build_ctx_data;
182     CeedQFunctionContext build_ctx;
183     CeedQFunction        qf_apply;
184 
185     const auto &tria = dof_handler.get_triangulation();
186     const auto &fe   = dof_handler.get_fe();
187 
188     const auto n_components = fe.n_components();
189 
190     if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5)
191       {
192         AssertThrow(n_components == 1, ExcInternalError());
193       }
194     else
195       {
196         AssertThrow(n_components == dim, ExcInternalError());
197       }
198 
199     // 1) create CEED instance -> "MatrixFree"
200     const char *ceed_spec = resource.c_str();
201     CeedInit(ceed_spec, &ceed);
202 
203     // 2) create shape functions -> "ShapeInfo"
204     const unsigned int fe_degree  = fe.tensor_degree();
205     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
206     {
207       dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info;
208       shape_info.reinit(quadrature, fe, 0);
209       dealii::internal::MatrixFreeFunctions::UnivariateShapeData<double> shape_data =
210         shape_info.get_shape_data();
211 
212       std::vector<CeedScalar> q_ref_1d(n_q_points);
213       std::vector<CeedScalar> q_weight_1d(n_q_points);
214       const unsigned int      array_size = (fe_degree + 1) * (n_q_points);
215       std::vector<CeedScalar> interp_1d(array_size);
216       std::vector<CeedScalar> grad_1d(array_size);
217       for (unsigned int i = 0; i < n_q_points; i++)
218         {
219           // Retrieve quadrature info
220           // Convert reference element from [0, 1] to [-1, 1]
221           q_ref_1d[i]    = 2.0 * (quadrature.get_tensor_basis()[0].point(i)(0) - 0.5);
222           q_weight_1d[i] = 2.0 * quadrature.get_tensor_basis()[0].weight(i);
223 
224           // Retrieve 1D shape function values
225           for (unsigned int j = 0; j < fe_degree + 1; j++)
226             {
227               interp_1d[j + i * (fe_degree + 1)] = shape_data.shape_values[j + i * (fe_degree + 1)];
228               // Scale derivatives by 1/2 for new reference element length
229               grad_1d[j + i * (fe_degree + 1)] =
230                 0.5 * shape_data.shape_gradients[j + i * (fe_degree + 1)];
231             }
232         }
233 
234       CeedBasisCreateTensorH1(ceed,
235                               dim,
236                               n_components,
237                               fe_degree + 1,
238                               n_q_points,
239                               interp_1d.data(),
240                               grad_1d.data(),
241                               q_ref_1d.data(),
242                               q_weight_1d.data(),
243                               &sol_basis);
244     }
245 
246     // 3) create restriction matrix -> DoFInfo
247     unsigned int n_local_active_cells = 0;
248 
249     for (const auto &cell : dof_handler.active_cell_iterators())
250       if (cell->is_locally_owned())
251         n_local_active_cells++;
252 
253     partitioner =
254       std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(),
255                                                     DoFTools::extract_locally_active_dofs(
256                                                       dof_handler),
257                                                     dof_handler.get_communicator());
258 
259     std::vector<CeedInt> indices;
260     indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components);
261 
262     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
263 
264     std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell());
265 
266     for (const auto &cell : dof_handler.active_cell_iterators())
267       if (cell->is_locally_owned())
268         {
269           cell->get_dof_indices(local_indices);
270 
271           for (const auto i : dof_mapping)
272             indices.emplace_back(
273               partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)]) /
274               n_components);
275         }
276 
277     CeedElemRestrictionCreate(ceed,
278                               n_local_active_cells,
279                               fe.n_dofs_per_cell() / n_components,
280                               n_components,
281                               std::max<unsigned int>(this->extended_local_size() / n_components, 1),
282                               this->extended_local_size(),
283                               CEED_MEM_HOST,
284                               CEED_COPY_VALUES,
285                               indices.data(),
286                               &sol_restriction);
287 
288     // 4) create mapping -> MappingInfo
289     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
290 
291     this->weights = compute_metric_data(ceed, mapping, tria, quadrature, bp);
292 
293     strides = {{1,
294                 static_cast<int>(quadrature.size()),
295                 static_cast<int>(quadrature.size() * n_components_metric)}};
296     CeedVectorCreate(ceed, weights.size(), &q_data);
297     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
298     CeedElemRestrictionCreateStrided(ceed,
299                                      n_local_active_cells,
300                                      quadrature.size(),
301                                      n_components_metric,
302                                      weights.size(),
303                                      strides.data(),
304                                      &q_data_restriction);
305 
306     build_ctx_data.dim       = dim;
307     build_ctx_data.space_dim = dim;
308 
309     CeedQFunctionContextCreate(ceed, &build_ctx);
310     CeedQFunctionContextSetData(
311       build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data);
312 
313     // 5) create q operation
314     if (bp == BPType::BP1)
315       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply);
316     else if (bp == BPType::BP2)
317       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply);
318     else if (bp == BPType::BP3 || bp == BPType::BP5)
319       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply);
320     else if (bp == BPType::BP4 || bp == BPType::BP6)
321       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply);
322     else
323       AssertThrow(false, ExcInternalError());
324 
325     if (bp <= BPType::BP2)
326       CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP);
327     else
328       CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD);
329 
330     CeedQFunctionAddInput(qf_apply, "qdata", n_components_metric, CEED_EVAL_NONE);
331 
332     if (bp <= BPType::BP2)
333       CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP);
334     else
335       CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD);
336 
337     CeedQFunctionSetContext(qf_apply, build_ctx);
338 
339     // 6) put everything together
340     CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
341 
342     CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
343     CeedOperatorSetField(op_apply, "qdata", q_data_restriction, CEED_BASIS_NONE, q_data);
344     CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
345 
346     // 7) libCEED vectors
347     CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL);
348     CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL);
349 
350     // 8) cleanup
351     CeedVectorDestroy(&q_data);
352     CeedElemRestrictionDestroy(&q_data_restriction);
353     CeedElemRestrictionDestroy(&sol_restriction);
354     CeedBasisDestroy(&sol_basis);
355     CeedQFunctionContextDestroy(&build_ctx);
356     CeedQFunctionDestroy(&qf_apply);
357   }
358 
359   /**
360    * Perform matrix-vector product.
361    */
362   void
363   vmult(VectorType &dst, const VectorType &src) const override
364   {
365     // communicate: update ghost values
366     src.update_ghost_values();
367 
368     if (dof_handler.get_fe().n_components() == 1)
369       {
370         // pass memory buffers to libCEED
371         VectorTypeCeed x(src_ceed);
372         VectorTypeCeed y(dst_ceed);
373         x.import_array(src, CEED_MEM_HOST);
374         y.import_array(dst, CEED_MEM_HOST);
375 
376         // apply operator
377         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
378 
379         // pull arrays back to deal.II
380         x.sync_array();
381         y.sync_array();
382       }
383     else // TODO: needed for multiple components
384       {
385         // allocate space for block vectors
386         src_tmp.reinit(this->extended_local_size(), true);
387         dst_tmp.reinit(this->extended_local_size(), true);
388 
389         // copy to block vector
390         copy_to_block_vector(src_tmp, src);
391 
392         // pass memory buffers to libCEED
393         VectorTypeCeed x(src_ceed);
394         VectorTypeCeed y(dst_ceed);
395         x.import_array(src_tmp, CEED_MEM_HOST);
396         y.import_array(dst_tmp, CEED_MEM_HOST);
397 
398         // apply operator
399         CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
400 
401         // pull arrays back to deal.II
402         x.sync_array();
403         y.sync_array();
404 
405         // copy from block vector
406         copy_from_block_vector(dst, dst_tmp);
407       }
408 
409     // communicate: compress
410     src.zero_out_ghost_values();
411     dst.compress(VectorOperation::add);
412 
413     // apply constraints: we assume homogeneous DBC
414     constraints.set_zero(dst);
415   }
416 
417   /**
418    * Initialized vector.
419    */
420   void
421   initialize_dof_vector(VectorType &vec) const override
422   {
423     vec.reinit(partitioner);
424   }
425 
426   /**
427    * Compute inverse of diagonal.
428    */
429   void
430   compute_inverse_diagonal(VectorType &diagonal) const override
431   {
432     this->initialize_dof_vector(diagonal);
433 
434     // pass memory buffer to libCEED
435     VectorTypeCeed y(dst_ceed);
436     y.import_array(diagonal, CEED_MEM_HOST);
437 
438     CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE);
439 
440     // pull array back to deal.II
441     y.sync_array();
442 
443     const unsigned int n_components = dof_handler.get_fe().n_components();
444 
445     if (n_components > 1) // TODO: needed for multiple components
446       {
447         VectorType tmp(diagonal);
448 
449         copy_from_block_vector(tmp, diagonal);
450 
451         std::swap(tmp, diagonal);
452       }
453 
454     diagonal.compress(VectorOperation::add);
455 
456     for (auto &i : diagonal)
457       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
458   }
459 
460 private:
461   /**
462    * Wrapper around a deal.II vector to create a libCEED vector view.
463    */
464   class VectorTypeCeed
465   {
466   public:
467     /**
468      * Constructor.
469      */
470     VectorTypeCeed(const CeedVector &vec_orig)
471     {
472       vec_ceed = NULL;
473       CeedVectorReferenceCopy(vec_orig, &vec_ceed);
474     }
475 
476     /**
477      * Return libCEED vector view.
478      */
479     CeedVector &
480     operator()()
481     {
482       return vec_ceed;
483     }
484 
485     /**
486      * Set deal.II memory in libCEED vector.
487      */
488     void
489     import_array(const VectorType &vec, const CeedMemType space)
490     {
491       mem_space = space;
492       CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values());
493     }
494 
495     /**
496      * Sync memory from device to host.
497      */
498     void
499     sync_array()
500     {
501       CeedVectorSyncArray(vec_ceed, mem_space);
502     }
503 
504     /**
505      * Destructor: destroy vector view.
506      */
507     ~VectorTypeCeed()
508     {
509       bool has_array;
510       CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array);
511       if (has_array)
512         {
513           CeedScalar *ptr;
514           CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
515         }
516       CeedVectorDestroy(&vec_ceed);
517     }
518 
519   private:
520     /**
521      * libCEED vector view.
522      */
523     CeedMemType mem_space;
524     CeedVector  vec_ceed;
525   };
526 
527   /**
528    * Copy from block vector.
529    *
530    * @note Only needed for multiple components.
531    */
532   void
533   copy_from_block_vector(VectorType &dst, const VectorType &src) const
534   {
535     const unsigned int scalar_size = this->extended_local_size() / dim;
536 
537     for (unsigned int i = 0; i < scalar_size; ++i)
538       for (unsigned int j = 0; j < dim; ++j)
539         dst.get_values()[j + i * dim] = src.get_values()[j * scalar_size + i];
540   }
541 
542   /**
543    * Copy to block vector.
544    *
545    * @note Only needed for multiple components.
546    */
547   void
548   copy_to_block_vector(VectorType &dst, const VectorType &src) const
549   {
550     const unsigned int scalar_size = this->extended_local_size() / dim;
551 
552     for (unsigned int i = 0; i < scalar_size; ++i)
553       for (unsigned int j = 0; j < dim; ++j)
554         dst.get_values()[j * scalar_size + i] = src.get_values()[j + i * dim];
555   }
556 
557   /**
558    * Number of locally active DoFs.
559    */
560   unsigned int
561   extended_local_size() const
562   {
563     return partitioner->locally_owned_size() + partitioner->n_ghost_indices();
564   }
565 
566   /**
567    * Compute metric data: Jacobian, ...
568    */
569   static std::vector<double>
570   compute_metric_data(const Ceed               &ceed,
571                       const Mapping<dim>       &mapping,
572                       const Triangulation<dim> &tria,
573                       const Quadrature<dim>    &quadrature,
574                       const BPType              bp)
575   {
576     std::vector<double> weights;
577 
578     if (false)
579       {
580         FE_Nothing<dim> dummy_fe;
581         FEValues<dim>   fe_values(mapping, dummy_fe, quadrature, update_JxW_values);
582 
583         for (const auto &cell : tria.active_cell_iterators())
584           if (cell->is_locally_owned())
585             {
586               fe_values.reinit(cell);
587 
588               for (const auto q : fe_values.quadrature_point_indices())
589                 weights.emplace_back(fe_values.JxW(q));
590             }
591 
592         return weights;
593       }
594 
595     CeedBasis            geo_basis;
596     CeedVector           q_data;
597     CeedElemRestriction  q_data_restriction;
598     CeedVector           node_coords;
599     CeedElemRestriction  geo_restriction;
600     CeedQFunctionContext build_ctx;
601     CeedQFunction        qf_build;
602     CeedOperator         op_build;
603 
604     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
605 
606     const unsigned int n_components = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
607 
608     const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping);
609 
610     AssertThrow(mapping_q, ExcMessage("Wrong mapping!"));
611 
612     const unsigned int fe_degree = mapping_q->get_degree();
613 
614     CeedBasisCreateTensorH1Lagrange(
615       ceed, dim, dim, fe_degree + 1, n_q_points, CEED_GAUSS, &geo_basis);
616 
617     unsigned int n_local_active_cells = 0;
618 
619     for (const auto &cell : tria.active_cell_iterators())
620       if (cell->is_locally_owned())
621         n_local_active_cells++;
622 
623     std::vector<double>  geo_support_points;
624     std::vector<CeedInt> geo_indices;
625 
626     FE_Q<dim> geo_fe(fe_degree);
627 
628     DoFHandler<dim> geo_dof_handler(tria);
629     geo_dof_handler.distribute_dofs(geo_fe);
630 
631     const auto geo_partitioner =
632       std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(),
633                                                     DoFTools::extract_locally_active_dofs(
634                                                       geo_dof_handler),
635                                                     geo_dof_handler.get_communicator());
636 
637     geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell());
638 
639     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
640 
641     FEValues<dim> fe_values(mapping,
642                             geo_fe,
643                             geo_fe.get_unit_support_points(),
644                             update_quadrature_points);
645 
646     std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell());
647 
648     const unsigned int n_points =
649       geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices();
650 
651     geo_support_points.resize(dim * n_points);
652 
653     for (const auto &cell : geo_dof_handler.active_cell_iterators())
654       if (cell->is_locally_owned())
655         {
656           fe_values.reinit(cell);
657           cell->get_dof_indices(local_indices);
658 
659           for (const auto i : dof_mapping)
660             {
661               const auto index = geo_partitioner->global_to_local(local_indices[i]);
662               geo_indices.emplace_back(index);
663 
664               const auto point = fe_values.quadrature_point(i);
665 
666               for (unsigned int d = 0; d < dim; ++d)
667                 geo_support_points[index + d * n_points] = point[d];
668             }
669         }
670 
671     weights.resize(n_local_active_cells * quadrature.size() * n_components);
672 
673     CeedInt strides[3] = {1,
674                           static_cast<int>(quadrature.size()),
675                           static_cast<int>(quadrature.size() * n_components)};
676 
677     CeedVectorCreate(ceed, weights.size(), &q_data);
678     CeedVectorSetArray(q_data, CEED_MEM_HOST, CEED_USE_POINTER, weights.data());
679     CeedElemRestrictionCreateStrided(ceed,
680                                      n_local_active_cells,
681                                      quadrature.size(),
682                                      n_components,
683                                      weights.size(),
684                                      strides,
685                                      &q_data_restriction);
686 
687     CeedVectorCreate(ceed, geo_support_points.size(), &node_coords);
688     CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data());
689 
690     CeedElemRestrictionCreate(ceed,
691                               n_local_active_cells,
692                               geo_fe.n_dofs_per_cell(),
693                               dim,
694                               std::max<unsigned int>(geo_support_points.size() / dim, 1),
695                               geo_support_points.size(),
696                               CEED_MEM_HOST,
697                               CEED_COPY_VALUES,
698                               geo_indices.data(),
699                               &geo_restriction);
700 
701     BuildContext build_ctx_data;
702     build_ctx_data.dim       = dim;
703     build_ctx_data.space_dim = dim;
704 
705     CeedQFunctionContextCreate(ceed, &build_ctx);
706     CeedQFunctionContextSetData(
707       build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data);
708 
709     // 5) create q operation
710     if (bp <= BPType::BP2)
711       CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build);
712     else
713       CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build);
714 
715     CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD);
716     CeedQFunctionAddInput(qf_build, "weights", 1, CEED_EVAL_WEIGHT);
717     CeedQFunctionAddOutput(qf_build, "qdata", n_components, CEED_EVAL_NONE);
718     CeedQFunctionSetContext(qf_build, build_ctx);
719 
720     // 6) put everything together
721     CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build);
722     CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE);
723     CeedOperatorSetField(
724       op_build, "weights", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE);
725     CeedOperatorSetField(
726       op_build, "qdata", q_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
727 
728     CeedOperatorApply(op_build, node_coords, q_data, CEED_REQUEST_IMMEDIATE);
729 
730     CeedVectorDestroy(&node_coords);
731     CeedVectorSyncArray(q_data, CEED_MEM_HOST);
732     CeedVectorDestroy(&q_data);
733     CeedElemRestrictionDestroy(&geo_restriction);
734     CeedElemRestrictionDestroy(&q_data_restriction);
735     CeedBasisDestroy(&geo_basis);
736     CeedQFunctionContextDestroy(&build_ctx);
737     CeedQFunctionDestroy(&qf_build);
738     CeedOperatorDestroy(&op_build);
739 
740     return weights;
741   }
742 
743   /**
744    * Mapping object passed to the constructor.
745    */
746   const Mapping<dim> &mapping;
747 
748   /**
749    * DoFHandler object passed to the constructor.
750    */
751   const DoFHandler<dim> &dof_handler;
752 
753   /**
754    * Constraints object passed to the constructor.
755    */
756   const AffineConstraints<Number> &constraints;
757 
758   /**
759    * Quadrature rule object passed to the constructor.
760    */
761   const Quadrature<dim> &quadrature;
762 
763   /**
764    * Selected BP.
765    */
766   const BPType bp;
767 
768   /**
769    * Resource name.
770    */
771   const std::string resource;
772 
773   /**
774    * Partitioner for distributed vectors.
775    */
776   std::shared_ptr<Utilities::MPI::Partitioner> partitioner;
777 
778   /**
779    * libCEED data structures.
780    */
781   Ceed                   ceed;
782   std::vector<double>    weights;
783   std::array<CeedInt, 3> strides;
784   CeedVector             src_ceed;
785   CeedVector             dst_ceed;
786   CeedOperator           op_apply;
787 
788   /**
789    * Temporal (tempral) vectors.
790    *
791    * @note Only needed for multiple components.
792    */
793   mutable VectorType src_tmp;
794   mutable VectorType dst_tmp;
795 };
796 
797 
798 
799 template <int dim, typename Number>
800 class OperatorDealii : public OperatorBase<Number>
801 {
802 public:
803   using VectorType = typename OperatorBase<Number>::VectorType;
804 
805   /**
806    * Constructor.
807    */
808   OperatorDealii(const Mapping<dim>              &mapping,
809                  const DoFHandler<dim>           &dof_handler,
810                  const AffineConstraints<Number> &constraints,
811                  const Quadrature<dim>           &quadrature,
812                  const BPType                    &bp)
813     : mapping(mapping)
814     , dof_handler(dof_handler)
815     , constraints(constraints)
816     , quadrature(quadrature)
817     , bp(bp)
818   {
819     reinit();
820   }
821 
822   /**
823    * Destructor.
824    */
825   ~OperatorDealii() = default;
826 
827   /**
828    * Initialized internal data structures, particularly, MatrixFree.
829    */
830   void
831   reinit() override
832   {
833     // configure MatrixFree
834     typename MatrixFree<dim, Number>::AdditionalData additional_data;
835     additional_data.tasks_parallel_scheme =
836       MatrixFree<dim, Number>::AdditionalData::TasksParallelScheme::none;
837 
838     // create MatrixFree
839     matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data);
840   }
841 
842   /**
843    * Matrix-vector product.
844    */
845   void
846   vmult(VectorType &dst, const VectorType &src) const override
847   {
848     if (dof_handler.get_fe().n_components() == 1)
849       {
850         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true);
851       }
852     else
853       {
854         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
855 
856         matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<dim>, this, dst, src, true);
857       }
858   }
859 
860   /**
861    * Initialize vector.
862    */
863   void
864   initialize_dof_vector(VectorType &vec) const override
865   {
866     matrix_free.initialize_dof_vector(vec);
867   }
868 
869   /**
870    * Compute inverse of diagonal.
871    */
872   void
873   compute_inverse_diagonal(VectorType &diagonal) const override
874   {
875     this->initialize_dof_vector(diagonal);
876 
877     if (dof_handler.get_fe().n_components() == 1)
878       {
879         MatrixFreeTools::compute_diagonal(matrix_free,
880                                           diagonal,
881                                           &OperatorDealii::do_cell_integral_local<1>,
882                                           this);
883       }
884     else
885       {
886         AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError());
887 
888         MatrixFreeTools::compute_diagonal(matrix_free,
889                                           diagonal,
890                                           &OperatorDealii::do_cell_integral_local<dim>,
891                                           this);
892       }
893 
894     for (auto &i : diagonal)
895       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
896   }
897 
898 private:
899   /**
900    * Cell integral without vector access.
901    */
902   template <int n_components>
903   void
904   do_cell_integral_local(FEEvaluation<dim, -1, 0, n_components, Number> &phi) const
905   {
906     if (bp <= BPType::BP2) // mass matrix
907       {
908         phi.evaluate(EvaluationFlags::values);
909         for (const auto q : phi.quadrature_point_indices())
910           phi.submit_value(phi.get_value(q), q);
911         phi.integrate(EvaluationFlags::values);
912       }
913     else // Poisson operator
914       {
915         phi.evaluate(EvaluationFlags::gradients);
916         for (const auto q : phi.quadrature_point_indices())
917           phi.submit_gradient(phi.get_gradient(q), q);
918         phi.integrate(EvaluationFlags::gradients);
919       }
920   }
921 
922   /**
923    * Cell integral on a range of cells.
924    */
925   template <int n_components>
926   void
927   do_cell_integral_range(const MatrixFree<dim, Number>               &matrix_free,
928                          VectorType                                  &dst,
929                          const VectorType                            &src,
930                          const std::pair<unsigned int, unsigned int> &range) const
931   {
932     FEEvaluation<dim, -1, 0, n_components, Number> phi(matrix_free, range);
933 
934     for (unsigned cell = range.first; cell < range.second; ++cell)
935       {
936         phi.reinit(cell);
937         phi.read_dof_values(src);            // read source vector
938         do_cell_integral_local(phi);         // cell integral
939         phi.distribute_local_to_global(dst); // write to destination vector
940       }
941   }
942 
943   /**
944    * Mapping object passed to the constructor.
945    */
946   const Mapping<dim> &mapping;
947 
948   /**
949    * DoFHandler object passed to the constructor.
950    */
951   const DoFHandler<dim> &dof_handler;
952 
953   /**
954    * Constraints object passed to the constructor.
955    */
956   const AffineConstraints<Number> &constraints;
957 
958   /**
959    * Quadrature rule object passed to the constructor.
960    */
961   const Quadrature<dim> &quadrature;
962 
963   /**
964    * Selected BP.
965    */
966   const BPType bp;
967 
968   /**
969    * MatrixFree object.
970    */
971   MatrixFree<dim, Number> matrix_free;
972 };
973