xref: /libCEED/examples/deal.II/bps-ceed.h (revision 6261a418bdb3efd56d67795785948f0f7b436c3e)
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2023 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 //  Authors: Peter Munch, Martin Kronbichler
15 //
16 // ---------------------------------------------------------------------
17 
18 #pragma once
19 #ifndef bps_ceed_h
20 #  define bps_ceed_h
21 
22 // deal.II includes
23 #  include <deal.II/dofs/dof_tools.h>
24 
25 #  include <deal.II/fe/mapping.h>
26 
27 #  include <deal.II/lac/la_parallel_vector.h>
28 
29 #  include <deal.II/matrix_free/fe_evaluation.h>
30 #  include <deal.II/matrix_free/matrix_free.h>
31 #  include <deal.II/matrix_free/shape_info.h>
32 #  include <deal.II/matrix_free/tools.h>
33 
34 // local includes
35 #  include "bps.h"
36 
37 // libCEED includes
38 #  include <ceed.h>
39 #  include <ceed/backend.h>
40 
41 // QFunction source
42 #  include "bps-qfunctions.h"
43 
44 using namespace dealii;
45 
46 
47 /**
48  * Operator implementation using libCEED.
49  */
50 template <int dim, typename Number, typename MemorySpace = MemorySpace::Host>
51 class OperatorCeed : public OperatorBase<Number, MemorySpace>
52 {
53 public:
54   using VectorType = typename OperatorBase<Number, MemorySpace>::VectorType;
55 
56   /**
57    * Constructor.
58    */
59   OperatorCeed(const Mapping<dim>              &mapping,
60                const DoFHandler<dim>           &dof_handler,
61                const AffineConstraints<Number> &constraints,
62                const Quadrature<dim>           &quadrature,
63                const BPType                    &bp,
64                const std::string               &resource)
65     : mapping(mapping)
66     , dof_handler(dof_handler)
67     , constraints(constraints)
68     , quadrature(quadrature)
69     , bp(bp)
70     , resource(resource)
71   {
72     reinit();
73   }
74 
75   /**
76    * Destructor.
77    */
78   ~OperatorCeed()
79   {
80     CeedVectorDestroy(&src_ceed);
81     CeedVectorDestroy(&dst_ceed);
82     CeedOperatorDestroy(&op_apply);
83     CeedDestroy(&ceed);
84   }
85 
86   /**
87    * Initialized internal data structures, particularly, libCEED.
88    */
89   void
90   reinit() override
91   {
92     CeedVector           metric_data;
93     CeedBasis            sol_basis;
94     CeedElemRestriction  sol_restriction;
95     CeedElemRestriction  metric_data_restriction;
96     BuildContext         build_ctx_data;
97     CeedQFunctionContext build_ctx;
98     CeedQFunction        qf_apply;
99 
100     const auto &tria = dof_handler.get_triangulation();
101     const auto &fe   = dof_handler.get_fe();
102 
103     const auto n_components = fe.n_components();
104 
105     if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5)
106       {
107         AssertThrow(n_components == 1, ExcInternalError());
108       }
109     else
110       {
111         AssertThrow(n_components == dim, ExcInternalError());
112       }
113 
114     // 1) create CEED instance -> "MatrixFree"
115     const char *ceed_spec = resource.c_str();
116     CeedInit(ceed_spec, &ceed);
117 
118     // 2) create shape functions -> "ShapeInfo"
119     const unsigned int fe_degree  = fe.tensor_degree();
120     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
121     {
122       const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature, fe, 0);
123       const auto             &shape_data = shape_info.get_shape_data();
124       std::vector<CeedScalar> q_ref_1d;
125       for (const auto q : shape_data.quadrature.get_points())
126         q_ref_1d.push_back(q(0));
127 
128       // transpose bases for compatibility with restriction
129       std::vector<CeedScalar> interp_1d(shape_data.shape_values.size());
130       std::vector<CeedScalar> grad_1d(shape_data.shape_gradients.size());
131       for (unsigned int i = 0; i < n_q_points; ++i)
132         for (unsigned int j = 0; j < fe_degree + 1; ++j)
133           {
134             interp_1d[j + i * (fe_degree + 1)] = shape_data.shape_values[j * n_q_points + i];
135             grad_1d[j + i * (fe_degree + 1)]   = shape_data.shape_gradients[j * n_q_points + i];
136           }
137 
138       CeedBasisCreateTensorH1(ceed,
139                               dim,
140                               n_components,
141                               fe_degree + 1,
142                               n_q_points,
143                               interp_1d.data(),
144                               grad_1d.data(),
145                               q_ref_1d.data(),
146                               quadrature.get_tensor_basis()[0].get_weights().data(),
147                               &sol_basis);
148     }
149 
150     // 3) create restriction matrix -> DoFInfo
151     unsigned int n_local_active_cells = 0;
152 
153     for (const auto &cell : dof_handler.active_cell_iterators())
154       if (cell->is_locally_owned())
155         n_local_active_cells++;
156 
157     partitioner =
158       std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(),
159                                                     DoFTools::extract_locally_active_dofs(
160                                                       dof_handler),
161                                                     dof_handler.get_communicator());
162 
163     std::vector<CeedInt> indices;
164     indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components);
165 
166     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
167 
168     std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell());
169 
170     for (const auto &cell : dof_handler.active_cell_iterators())
171       if (cell->is_locally_owned())
172         {
173           cell->get_dof_indices(local_indices);
174 
175           for (const auto i : dof_mapping)
176             indices.emplace_back(
177               partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)]));
178         }
179 
180     CeedElemRestrictionCreate(ceed,
181                               n_local_active_cells,
182                               fe.n_dofs_per_cell() / n_components,
183                               n_components,
184                               1,
185                               this->extended_local_size(),
186                               CEED_MEM_HOST,
187                               CEED_COPY_VALUES,
188                               indices.data(),
189                               &sol_restriction);
190 
191     // 4) create mapping -> MappingInfo
192     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
193 
194     metric_data_raw = compute_metric_data(ceed, mapping, tria, quadrature, bp);
195 
196     strides = {{1,
197                 static_cast<int>(quadrature.size()),
198                 static_cast<int>(quadrature.size() * n_components_metric)}};
199     CeedVectorCreate(ceed, metric_data_raw.size(), &metric_data);
200     CeedVectorSetArray(metric_data, CEED_MEM_HOST, CEED_USE_POINTER, metric_data_raw.data());
201     CeedElemRestrictionCreateStrided(ceed,
202                                      n_local_active_cells,
203                                      quadrature.size(),
204                                      n_components_metric,
205                                      metric_data_raw.size(),
206                                      strides.data(),
207                                      &metric_data_restriction);
208 
209     build_ctx_data.dim       = dim;
210     build_ctx_data.space_dim = dim;
211 
212     CeedQFunctionContextCreate(ceed, &build_ctx);
213     CeedQFunctionContextSetData(
214       build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data);
215 
216     // 5) create q operation
217     if (bp == BPType::BP1)
218       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply);
219     else if (bp == BPType::BP2)
220       CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply);
221     else if (bp == BPType::BP3 || bp == BPType::BP5)
222       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply);
223     else if (bp == BPType::BP4 || bp == BPType::BP6)
224       CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply);
225     else
226       AssertThrow(false, ExcInternalError());
227 
228     if (bp <= BPType::BP2)
229       CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP);
230     else
231       CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD);
232 
233     CeedQFunctionAddInput(qf_apply, "metric data", n_components_metric, CEED_EVAL_NONE);
234 
235     if (bp <= BPType::BP2)
236       CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP);
237     else
238       CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD);
239 
240     CeedQFunctionSetContext(qf_apply, build_ctx);
241 
242     // 6) put everything together
243     CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply);
244 
245     CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
246     CeedOperatorSetField(
247       op_apply, "metric data", metric_data_restriction, CEED_BASIS_NONE, metric_data);
248     CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE);
249 
250     // 7) libCEED vectors
251     CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL);
252     CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL);
253 
254     // 8) cleanup
255     CeedVectorDestroy(&metric_data);
256     CeedElemRestrictionDestroy(&metric_data_restriction);
257     CeedElemRestrictionDestroy(&sol_restriction);
258     CeedBasisDestroy(&sol_basis);
259     CeedQFunctionContextDestroy(&build_ctx);
260     CeedQFunctionDestroy(&qf_apply);
261   }
262 
263   /**
264    * Perform matrix-vector product.
265    */
266   void
267   vmult(VectorType &dst, const VectorType &src) const override
268   {
269     // communicate: update ghost values
270     src.update_ghost_values();
271 
272     // pass memory buffers to libCEED
273     VectorTypeCeed x(src_ceed);
274     VectorTypeCeed y(dst_ceed);
275     x.import_array(src, CEED_MEM_HOST);
276     y.import_array(dst, CEED_MEM_HOST);
277 
278     // apply operator
279     CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE);
280 
281     // pull arrays back to deal.II
282     x.take_array();
283     y.take_array();
284 
285     // communicate: compress
286     src.zero_out_ghost_values();
287     dst.compress(VectorOperation::add);
288 
289     // apply constraints: we assume homogeneous DBC
290     constraints.set_zero(dst);
291   }
292 
293   /**
294    * Initialized vector.
295    */
296   void
297   initialize_dof_vector(VectorType &vec) const override
298   {
299     vec.reinit(partitioner);
300   }
301 
302   /**
303    * Compute inverse of diagonal.
304    */
305   void
306   compute_inverse_diagonal(VectorType &diagonal) const override
307   {
308     this->initialize_dof_vector(diagonal);
309 
310     // pass memory buffer to libCEED
311     VectorTypeCeed y(dst_ceed);
312     y.import_array(diagonal, CEED_MEM_HOST);
313 
314     CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE);
315 
316     // pull array back to deal.II
317     y.take_array();
318 
319     diagonal.compress(VectorOperation::add);
320 
321     // apply constraints: we assume homogeneous DBC
322     constraints.set_zero(diagonal);
323 
324     for (auto &i : diagonal)
325       i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0;
326   }
327 
328 private:
329   /**
330    * Wrapper around a deal.II vector to create a libCEED vector view.
331    */
332   class VectorTypeCeed
333   {
334   public:
335     /**
336      * Constructor.
337      */
338     VectorTypeCeed(const CeedVector &vec_orig)
339     {
340       vec_ceed = NULL;
341       CeedVectorReferenceCopy(vec_orig, &vec_ceed);
342     }
343 
344     /**
345      * Return libCEED vector view.
346      */
347     CeedVector &
348     operator()()
349     {
350       return vec_ceed;
351     }
352 
353     /**
354      * Set deal.II memory in libCEED vector.
355      */
356     void
357     import_array(const VectorType &vec, const CeedMemType space)
358     {
359       mem_space = space;
360       CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values());
361     }
362 
363     /**
364      * Sync memory from device to host.
365      */
366     void
367     sync_array()
368     {
369       CeedVectorSyncArray(vec_ceed, mem_space);
370     }
371 
372     /**
373      * Take previously set deal.II array from libCEED vector
374      */
375     void
376     take_array()
377     {
378       CeedScalar *ptr;
379       CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
380     }
381 
382     /**
383      * Destructor: destroy vector view.
384      */
385     ~VectorTypeCeed()
386     {
387       bool has_array;
388       CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array);
389       if (has_array)
390         {
391           CeedScalar *ptr;
392           CeedVectorTakeArray(vec_ceed, mem_space, &ptr);
393         }
394       CeedVectorDestroy(&vec_ceed);
395     }
396 
397   private:
398     /**
399      * libCEED vector view.
400      */
401     CeedMemType mem_space;
402     CeedVector  vec_ceed;
403   };
404 
405   /**
406    * Number of locally active DoFs.
407    */
408   unsigned int
409   extended_local_size() const
410   {
411     return partitioner->locally_owned_size() + partitioner->n_ghost_indices();
412   }
413 
414   /**
415    * Compute metric data: Jacobian, ...
416    */
417   static std::vector<double>
418   compute_metric_data(const Ceed               &ceed,
419                       const Mapping<dim>       &mapping,
420                       const Triangulation<dim> &tria,
421                       const Quadrature<dim>    &quadrature,
422                       const BPType              bp)
423   {
424     std::vector<double> metric_data_raw;
425 
426     CeedBasis            geo_basis;
427     CeedVector           metric_data;
428     CeedElemRestriction  metric_data_restriction;
429     CeedVector           node_coords;
430     CeedElemRestriction  geo_restriction;
431     CeedQFunctionContext build_ctx;
432     CeedQFunction        qf_build;
433     CeedOperator         op_build;
434 
435     const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size();
436 
437     const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2);
438 
439     const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping);
440 
441     AssertThrow(mapping_q, ExcMessage("Wrong mapping!"));
442 
443     const unsigned int fe_degree = mapping_q->get_degree();
444 
445     FE_Q<dim> geo_fe(fe_degree);
446 
447     {
448       const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature,
449                                                                                 geo_fe,
450                                                                                 0);
451       const auto             &shape_data = shape_info.get_shape_data();
452       std::vector<CeedScalar> q_ref_1d;
453       for (const auto q : shape_data.quadrature.get_points())
454         q_ref_1d.push_back(q(0));
455 
456       // transpose bases for compatibility with restriction
457       std::vector<CeedScalar> interp_1d(shape_data.shape_values.size());
458       std::vector<CeedScalar> grad_1d(shape_data.shape_gradients.size());
459       for (unsigned int i = 0; i < n_q_points; ++i)
460         for (unsigned int j = 0; j < fe_degree + 1; ++j)
461           {
462             interp_1d[j + i * (fe_degree + 1)] = shape_data.shape_values[j * n_q_points + i];
463             grad_1d[j + i * (fe_degree + 1)]   = shape_data.shape_gradients[j * n_q_points + i];
464           }
465 
466       CeedBasisCreateTensorH1(ceed,
467                               dim,
468                               dim,
469                               fe_degree + 1,
470                               n_q_points,
471                               interp_1d.data(),
472                               grad_1d.data(),
473                               q_ref_1d.data(),
474                               quadrature.get_tensor_basis()[0].get_weights().data(),
475                               &geo_basis);
476     }
477 
478     unsigned int n_local_active_cells = 0;
479 
480     for (const auto &cell : tria.active_cell_iterators())
481       if (cell->is_locally_owned())
482         n_local_active_cells++;
483 
484     std::vector<double>  geo_support_points;
485     std::vector<CeedInt> geo_indices;
486 
487     DoFHandler<dim> geo_dof_handler(tria);
488     geo_dof_handler.distribute_dofs(geo_fe);
489 
490     const auto geo_partitioner =
491       std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(),
492                                                     DoFTools::extract_locally_active_dofs(
493                                                       geo_dof_handler),
494                                                     geo_dof_handler.get_communicator());
495 
496     geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell());
497 
498     const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree);
499 
500     FEValues<dim> fe_values(mapping,
501                             geo_fe,
502                             geo_fe.get_unit_support_points(),
503                             update_quadrature_points);
504 
505     std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell());
506 
507     const unsigned int n_points =
508       geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices();
509 
510     geo_support_points.resize(dim * n_points);
511 
512     for (const auto &cell : geo_dof_handler.active_cell_iterators())
513       if (cell->is_locally_owned())
514         {
515           fe_values.reinit(cell);
516           cell->get_dof_indices(local_indices);
517 
518           for (const auto i : dof_mapping)
519             {
520               const auto index = geo_partitioner->global_to_local(local_indices[i]);
521               geo_indices.emplace_back(index * dim);
522 
523               const auto point = fe_values.quadrature_point(i);
524 
525               for (unsigned int d = 0; d < dim; ++d)
526                 geo_support_points[index * dim + d] = point[d];
527             }
528         }
529 
530     metric_data_raw.resize(n_local_active_cells * quadrature.size() * n_components_metric);
531 
532     CeedInt strides[3] = {1,
533                           static_cast<int>(quadrature.size()),
534                           static_cast<int>(quadrature.size() * n_components_metric)};
535 
536     CeedVectorCreate(ceed, metric_data_raw.size(), &metric_data);
537     CeedVectorSetArray(metric_data, CEED_MEM_HOST, CEED_USE_POINTER, metric_data_raw.data());
538     CeedElemRestrictionCreateStrided(ceed,
539                                      n_local_active_cells,
540                                      quadrature.size(),
541                                      n_components_metric,
542                                      metric_data_raw.size(),
543                                      strides,
544                                      &metric_data_restriction);
545 
546     CeedVectorCreate(ceed, geo_support_points.size(), &node_coords);
547     CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data());
548 
549     CeedElemRestrictionCreate(ceed,
550                               n_local_active_cells,
551                               geo_fe.n_dofs_per_cell(),
552                               dim,
553                               1,
554                               geo_support_points.size(),
555                               CEED_MEM_HOST,
556                               CEED_COPY_VALUES,
557                               geo_indices.data(),
558                               &geo_restriction);
559 
560     BuildContext build_ctx_data;
561     build_ctx_data.dim       = dim;
562     build_ctx_data.space_dim = dim;
563 
564     CeedQFunctionContextCreate(ceed, &build_ctx);
565     CeedQFunctionContextSetData(
566       build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data);
567 
568     // 5) create q operation
569     if (bp <= BPType::BP2)
570       CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build);
571     else
572       CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build);
573 
574     CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD);
575     CeedQFunctionAddInput(qf_build, "weight", 1, CEED_EVAL_WEIGHT);
576     CeedQFunctionAddOutput(qf_build, "metric data", n_components_metric, CEED_EVAL_NONE);
577     CeedQFunctionSetContext(qf_build, build_ctx);
578 
579     // 6) put everything together
580     CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build);
581     CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE);
582     CeedOperatorSetField(
583       op_build, "weight", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE);
584     CeedOperatorSetField(
585       op_build, "metric data", metric_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE);
586 
587     CeedOperatorApply(op_build, node_coords, metric_data, CEED_REQUEST_IMMEDIATE);
588 
589     CeedVectorDestroy(&node_coords);
590     CeedVectorSyncArray(metric_data, CEED_MEM_HOST);
591     CeedVectorDestroy(&metric_data);
592     CeedElemRestrictionDestroy(&geo_restriction);
593     CeedElemRestrictionDestroy(&metric_data_restriction);
594     CeedBasisDestroy(&geo_basis);
595     CeedQFunctionContextDestroy(&build_ctx);
596     CeedQFunctionDestroy(&qf_build);
597     CeedOperatorDestroy(&op_build);
598 
599     return metric_data_raw;
600   }
601 
602   /**
603    * Mapping object passed to the constructor.
604    */
605   const Mapping<dim> &mapping;
606 
607   /**
608    * DoFHandler object passed to the constructor.
609    */
610   const DoFHandler<dim> &dof_handler;
611 
612   /**
613    * Constraints object passed to the constructor.
614    */
615   const AffineConstraints<Number> &constraints;
616 
617   /**
618    * Quadrature rule object passed to the constructor.
619    */
620   const Quadrature<dim> &quadrature;
621 
622   /**
623    * Selected BP.
624    */
625   const BPType bp;
626 
627   /**
628    * Resource name.
629    */
630   const std::string resource;
631 
632   /**
633    * Partitioner for distributed vectors.
634    */
635   std::shared_ptr<Utilities::MPI::Partitioner> partitioner;
636 
637   /**
638    * libCEED data structures.
639    */
640   Ceed                   ceed;
641   std::vector<double>    metric_data_raw;
642   std::array<CeedInt, 3> strides;
643   CeedVector             src_ceed;
644   CeedVector             dst_ceed;
645   CeedOperator           op_apply;
646 };
647 
648 #endif
649