1 // --------------------------------------------------------------------- 2 // 3 // Copyright (C) 2023 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 // Authors: Peter Munch, Martin Kronbichler 15 // 16 // --------------------------------------------------------------------- 17 18 #pragma once 19 #ifndef bps_ceed_h 20 # define bps_ceed_h 21 22 // deal.II includes 23 # include <deal.II/dofs/dof_tools.h> 24 25 # include <deal.II/fe/mapping.h> 26 27 # include <deal.II/lac/la_parallel_vector.h> 28 29 # include <deal.II/matrix_free/fe_evaluation.h> 30 # include <deal.II/matrix_free/matrix_free.h> 31 # include <deal.II/matrix_free/shape_info.h> 32 # include <deal.II/matrix_free/tools.h> 33 34 // local includes 35 # include "bps.h" 36 37 // libCEED includes 38 # include <ceed.h> 39 # include <ceed/backend.h> 40 41 // QFunction source 42 # include "bps-qfunctions.h" 43 44 using namespace dealii; 45 46 47 /** 48 * Operator implementation using libCEED. 49 */ 50 template <int dim, typename Number, typename MemorySpace = MemorySpace::Host> 51 class OperatorCeed : public OperatorBase<Number, MemorySpace> 52 { 53 public: 54 using VectorType = typename OperatorBase<Number, MemorySpace>::VectorType; 55 56 /** 57 * Constructor. 58 */ OperatorCeed(const Mapping<dim> & mapping,const DoFHandler<dim> & dof_handler,const AffineConstraints<Number> & constraints,const Quadrature<dim> & quadrature,const BPType & bp,const std::string & resource)59 OperatorCeed(const Mapping<dim> &mapping, 60 const DoFHandler<dim> &dof_handler, 61 const AffineConstraints<Number> &constraints, 62 const Quadrature<dim> &quadrature, 63 const BPType &bp, 64 const std::string &resource) 65 : mapping(mapping) 66 , dof_handler(dof_handler) 67 , constraints(constraints) 68 , quadrature(quadrature) 69 , bp(bp) 70 , resource(resource) 71 { 72 reinit(); 73 } 74 75 /** 76 * Destructor. 77 */ ~OperatorCeed()78 ~OperatorCeed() 79 { 80 CeedVectorDestroy(&src_ceed); 81 CeedVectorDestroy(&dst_ceed); 82 CeedOperatorDestroy(&op_apply); 83 CeedDestroy(&ceed); 84 } 85 86 /** 87 * Initialized internal data structures, particularly, libCEED. 88 */ 89 void reinit()90 reinit() override 91 { 92 CeedVector metric_data; 93 CeedBasis sol_basis; 94 CeedElemRestriction sol_restriction; 95 CeedElemRestriction metric_data_restriction; 96 BuildContext build_ctx_data; 97 CeedQFunctionContext build_ctx; 98 CeedQFunction qf_apply; 99 100 const auto &tria = dof_handler.get_triangulation(); 101 const auto &fe = dof_handler.get_fe(); 102 103 const auto n_components = fe.n_components(); 104 105 if (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5) 106 { 107 AssertThrow(n_components == 1, ExcInternalError()); 108 } 109 else 110 { 111 AssertThrow(n_components == dim, ExcInternalError()); 112 } 113 114 // 1) create CEED instance -> "MatrixFree" 115 const char *ceed_spec = resource.c_str(); 116 CeedInit(ceed_spec, &ceed); 117 118 // 2) create shape functions -> "ShapeInfo" 119 const unsigned int fe_degree = fe.tensor_degree(); 120 const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size(); 121 { 122 const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature, fe, 0); 123 const auto &shape_data = shape_info.get_shape_data(); 124 std::vector<CeedScalar> q_ref_1d; 125 for (const auto q : shape_data.quadrature.get_points()) 126 q_ref_1d.push_back(q(0)); 127 128 // transpose bases for compatibility with restriction 129 std::vector<CeedScalar> interp_1d(shape_data.shape_values.size()); 130 std::vector<CeedScalar> grad_1d(shape_data.shape_gradients.size()); 131 for (unsigned int i = 0; i < n_q_points; ++i) 132 for (unsigned int j = 0; j < fe_degree + 1; ++j) 133 { 134 interp_1d[j + i * (fe_degree + 1)] = shape_data.shape_values[j * n_q_points + i]; 135 grad_1d[j + i * (fe_degree + 1)] = shape_data.shape_gradients[j * n_q_points + i]; 136 } 137 138 CeedBasisCreateTensorH1(ceed, 139 dim, 140 n_components, 141 fe_degree + 1, 142 n_q_points, 143 interp_1d.data(), 144 grad_1d.data(), 145 q_ref_1d.data(), 146 quadrature.get_tensor_basis()[0].get_weights().data(), 147 &sol_basis); 148 } 149 150 // 3) create restriction matrix -> DoFInfo 151 unsigned int n_local_active_cells = 0; 152 153 for (const auto &cell : dof_handler.active_cell_iterators()) 154 if (cell->is_locally_owned()) 155 n_local_active_cells++; 156 157 partitioner = 158 std::make_shared<Utilities::MPI::Partitioner>(dof_handler.locally_owned_dofs(), 159 DoFTools::extract_locally_active_dofs( 160 dof_handler), 161 dof_handler.get_communicator()); 162 163 std::vector<CeedInt> indices; 164 indices.reserve(n_local_active_cells * fe.n_dofs_per_cell() / n_components); 165 166 const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree); 167 168 std::vector<types::global_dof_index> local_indices(fe.n_dofs_per_cell()); 169 170 for (const auto &cell : dof_handler.active_cell_iterators()) 171 if (cell->is_locally_owned()) 172 { 173 cell->get_dof_indices(local_indices); 174 175 for (const auto i : dof_mapping) 176 indices.emplace_back( 177 partitioner->global_to_local(local_indices[fe.component_to_system_index(0, i)])); 178 } 179 180 CeedElemRestrictionCreate(ceed, 181 n_local_active_cells, 182 fe.n_dofs_per_cell() / n_components, 183 n_components, 184 1, 185 this->extended_local_size(), 186 CEED_MEM_HOST, 187 CEED_COPY_VALUES, 188 indices.data(), 189 &sol_restriction); 190 191 // 4) create mapping -> MappingInfo 192 const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2); 193 194 metric_data_raw = compute_metric_data(ceed, mapping, tria, quadrature, bp); 195 196 strides = {{1, 197 static_cast<int>(quadrature.size()), 198 static_cast<int>(quadrature.size() * n_components_metric)}}; 199 CeedVectorCreate(ceed, metric_data_raw.size(), &metric_data); 200 CeedVectorSetArray(metric_data, CEED_MEM_HOST, CEED_USE_POINTER, metric_data_raw.data()); 201 CeedElemRestrictionCreateStrided(ceed, 202 n_local_active_cells, 203 quadrature.size(), 204 n_components_metric, 205 metric_data_raw.size(), 206 strides.data(), 207 &metric_data_restriction); 208 209 build_ctx_data.dim = dim; 210 build_ctx_data.space_dim = dim; 211 212 CeedQFunctionContextCreate(ceed, &build_ctx); 213 CeedQFunctionContextSetData( 214 build_ctx, CEED_MEM_HOST, CEED_COPY_VALUES, sizeof(build_ctx_data), &build_ctx_data); 215 216 // 5) create q operation 217 if (bp == BPType::BP1) 218 CeedQFunctionCreateInterior(ceed, 1, f_apply_mass, f_apply_mass_loc, &qf_apply); 219 else if (bp == BPType::BP2) 220 CeedQFunctionCreateInterior(ceed, 1, f_apply_mass_vec, f_apply_mass_vec_loc, &qf_apply); 221 else if (bp == BPType::BP3 || bp == BPType::BP5) 222 CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson, f_apply_poisson_loc, &qf_apply); 223 else if (bp == BPType::BP4 || bp == BPType::BP6) 224 CeedQFunctionCreateInterior(ceed, 1, f_apply_poisson_vec, f_apply_poisson_vec_loc, &qf_apply); 225 else 226 AssertThrow(false, ExcInternalError()); 227 228 if (bp <= BPType::BP2) 229 CeedQFunctionAddInput(qf_apply, "u", n_components, CEED_EVAL_INTERP); 230 else 231 CeedQFunctionAddInput(qf_apply, "u", dim * n_components, CEED_EVAL_GRAD); 232 233 CeedQFunctionAddInput(qf_apply, "metric data", n_components_metric, CEED_EVAL_NONE); 234 235 if (bp <= BPType::BP2) 236 CeedQFunctionAddOutput(qf_apply, "v", n_components, CEED_EVAL_INTERP); 237 else 238 CeedQFunctionAddOutput(qf_apply, "v", dim * n_components, CEED_EVAL_GRAD); 239 240 CeedQFunctionSetContext(qf_apply, build_ctx); 241 242 // 6) put everything together 243 CeedOperatorCreate(ceed, qf_apply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_apply); 244 245 CeedOperatorSetField(op_apply, "u", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE); 246 CeedOperatorSetField( 247 op_apply, "metric data", metric_data_restriction, CEED_BASIS_NONE, metric_data); 248 CeedOperatorSetField(op_apply, "v", sol_restriction, sol_basis, CEED_VECTOR_ACTIVE); 249 250 // 7) libCEED vectors 251 CeedElemRestrictionCreateVector(sol_restriction, &src_ceed, NULL); 252 CeedElemRestrictionCreateVector(sol_restriction, &dst_ceed, NULL); 253 254 // 8) cleanup 255 CeedVectorDestroy(&metric_data); 256 CeedElemRestrictionDestroy(&metric_data_restriction); 257 CeedElemRestrictionDestroy(&sol_restriction); 258 CeedBasisDestroy(&sol_basis); 259 CeedQFunctionContextDestroy(&build_ctx); 260 CeedQFunctionDestroy(&qf_apply); 261 } 262 263 /** 264 * Perform matrix-vector product. 265 */ 266 void vmult(VectorType & dst,const VectorType & src)267 vmult(VectorType &dst, const VectorType &src) const override 268 { 269 // communicate: update ghost values 270 src.update_ghost_values(); 271 272 // pass memory buffers to libCEED 273 VectorTypeCeed x(src_ceed); 274 VectorTypeCeed y(dst_ceed); 275 x.import_array(src, CEED_MEM_HOST); 276 y.import_array(dst, CEED_MEM_HOST); 277 278 // apply operator 279 CeedOperatorApply(op_apply, x(), y(), CEED_REQUEST_IMMEDIATE); 280 281 // pull arrays back to deal.II 282 x.take_array(); 283 y.take_array(); 284 285 // communicate: compress 286 src.zero_out_ghost_values(); 287 dst.compress(VectorOperation::add); 288 289 // apply constraints: we assume homogeneous DBC 290 constraints.set_zero(dst); 291 } 292 293 /** 294 * Initialized vector. 295 */ 296 void initialize_dof_vector(VectorType & vec)297 initialize_dof_vector(VectorType &vec) const override 298 { 299 vec.reinit(partitioner); 300 } 301 302 /** 303 * Compute inverse of diagonal. 304 */ 305 void compute_inverse_diagonal(VectorType & diagonal)306 compute_inverse_diagonal(VectorType &diagonal) const override 307 { 308 this->initialize_dof_vector(diagonal); 309 310 // pass memory buffer to libCEED 311 VectorTypeCeed y(dst_ceed); 312 y.import_array(diagonal, CEED_MEM_HOST); 313 314 CeedOperatorLinearAssembleDiagonal(op_apply, y(), CEED_REQUEST_IMMEDIATE); 315 316 // pull array back to deal.II 317 y.take_array(); 318 319 diagonal.compress(VectorOperation::add); 320 321 // apply constraints: we assume homogeneous DBC 322 constraints.set_zero(diagonal); 323 324 for (auto &i : diagonal) 325 i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0; 326 } 327 328 private: 329 /** 330 * Wrapper around a deal.II vector to create a libCEED vector view. 331 */ 332 class VectorTypeCeed 333 { 334 public: 335 /** 336 * Constructor. 337 */ VectorTypeCeed(const CeedVector & vec_orig)338 VectorTypeCeed(const CeedVector &vec_orig) 339 { 340 vec_ceed = NULL; 341 CeedVectorReferenceCopy(vec_orig, &vec_ceed); 342 } 343 344 /** 345 * Return libCEED vector view. 346 */ 347 CeedVector & operator()348 operator()() 349 { 350 return vec_ceed; 351 } 352 353 /** 354 * Set deal.II memory in libCEED vector. 355 */ 356 void import_array(const VectorType & vec,const CeedMemType space)357 import_array(const VectorType &vec, const CeedMemType space) 358 { 359 mem_space = space; 360 CeedVectorSetArray(vec_ceed, mem_space, CEED_USE_POINTER, vec.get_values()); 361 } 362 363 /** 364 * Sync memory from device to host. 365 */ 366 void sync_array()367 sync_array() 368 { 369 CeedVectorSyncArray(vec_ceed, mem_space); 370 } 371 372 /** 373 * Take previously set deal.II array from libCEED vector 374 */ 375 void take_array()376 take_array() 377 { 378 CeedScalar *ptr; 379 CeedVectorTakeArray(vec_ceed, mem_space, &ptr); 380 } 381 382 /** 383 * Destructor: destroy vector view. 384 */ ~VectorTypeCeed()385 ~VectorTypeCeed() 386 { 387 bool has_array; 388 CeedVectorHasBorrowedArrayOfType(vec_ceed, mem_space, &has_array); 389 if (has_array) 390 { 391 CeedScalar *ptr; 392 CeedVectorTakeArray(vec_ceed, mem_space, &ptr); 393 } 394 CeedVectorDestroy(&vec_ceed); 395 } 396 397 private: 398 /** 399 * libCEED vector view. 400 */ 401 CeedMemType mem_space; 402 CeedVector vec_ceed; 403 }; 404 405 /** 406 * Number of locally active DoFs. 407 */ 408 unsigned int extended_local_size()409 extended_local_size() const 410 { 411 return partitioner->locally_owned_size() + partitioner->n_ghost_indices(); 412 } 413 414 /** 415 * Compute metric data: Jacobian, ... 416 */ 417 static std::vector<double> compute_metric_data(const Ceed & ceed,const Mapping<dim> & mapping,const Triangulation<dim> & tria,const Quadrature<dim> & quadrature,const BPType bp)418 compute_metric_data(const Ceed &ceed, 419 const Mapping<dim> &mapping, 420 const Triangulation<dim> &tria, 421 const Quadrature<dim> &quadrature, 422 const BPType bp) 423 { 424 std::vector<double> metric_data_raw; 425 426 CeedBasis geo_basis; 427 CeedVector metric_data; 428 CeedElemRestriction metric_data_restriction; 429 CeedVector node_coords; 430 CeedElemRestriction geo_restriction; 431 CeedQFunctionContext build_ctx; 432 CeedQFunction qf_build; 433 CeedOperator op_build; 434 435 const unsigned int n_q_points = quadrature.get_tensor_basis()[0].size(); 436 437 const unsigned int n_components_metric = (bp <= BPType::BP2) ? 1 : (dim * (dim + 1) / 2); 438 439 const auto mapping_q = dynamic_cast<const MappingQ<dim> *>(&mapping); 440 441 AssertThrow(mapping_q, ExcMessage("Wrong mapping!")); 442 443 const unsigned int fe_degree = mapping_q->get_degree(); 444 445 FE_Q<dim> geo_fe(fe_degree); 446 447 { 448 const dealii::internal::MatrixFreeFunctions::ShapeInfo<double> shape_info(quadrature, 449 geo_fe, 450 0); 451 const auto &shape_data = shape_info.get_shape_data(); 452 std::vector<CeedScalar> q_ref_1d; 453 for (const auto q : shape_data.quadrature.get_points()) 454 q_ref_1d.push_back(q(0)); 455 456 // transpose bases for compatibility with restriction 457 std::vector<CeedScalar> interp_1d(shape_data.shape_values.size()); 458 std::vector<CeedScalar> grad_1d(shape_data.shape_gradients.size()); 459 for (unsigned int i = 0; i < n_q_points; ++i) 460 for (unsigned int j = 0; j < fe_degree + 1; ++j) 461 { 462 interp_1d[j + i * (fe_degree + 1)] = shape_data.shape_values[j * n_q_points + i]; 463 grad_1d[j + i * (fe_degree + 1)] = shape_data.shape_gradients[j * n_q_points + i]; 464 } 465 466 CeedBasisCreateTensorH1(ceed, 467 dim, 468 dim, 469 fe_degree + 1, 470 n_q_points, 471 interp_1d.data(), 472 grad_1d.data(), 473 q_ref_1d.data(), 474 quadrature.get_tensor_basis()[0].get_weights().data(), 475 &geo_basis); 476 } 477 478 unsigned int n_local_active_cells = 0; 479 480 for (const auto &cell : tria.active_cell_iterators()) 481 if (cell->is_locally_owned()) 482 n_local_active_cells++; 483 484 std::vector<double> geo_support_points; 485 std::vector<CeedInt> geo_indices; 486 487 DoFHandler<dim> geo_dof_handler(tria); 488 geo_dof_handler.distribute_dofs(geo_fe); 489 490 const auto geo_partitioner = 491 std::make_shared<Utilities::MPI::Partitioner>(geo_dof_handler.locally_owned_dofs(), 492 DoFTools::extract_locally_active_dofs( 493 geo_dof_handler), 494 geo_dof_handler.get_communicator()); 495 496 geo_indices.reserve(n_local_active_cells * geo_fe.n_dofs_per_cell()); 497 498 const auto dof_mapping = FETools::lexicographic_to_hierarchic_numbering<dim>(fe_degree); 499 500 FEValues<dim> fe_values(mapping, 501 geo_fe, 502 geo_fe.get_unit_support_points(), 503 update_quadrature_points); 504 505 std::vector<types::global_dof_index> local_indices(geo_fe.n_dofs_per_cell()); 506 507 const unsigned int n_points = 508 geo_partitioner->locally_owned_size() + geo_partitioner->n_ghost_indices(); 509 510 geo_support_points.resize(dim * n_points); 511 512 for (const auto &cell : geo_dof_handler.active_cell_iterators()) 513 if (cell->is_locally_owned()) 514 { 515 fe_values.reinit(cell); 516 cell->get_dof_indices(local_indices); 517 518 for (const auto i : dof_mapping) 519 { 520 const auto index = geo_partitioner->global_to_local(local_indices[i]); 521 geo_indices.emplace_back(index * dim); 522 523 const auto point = fe_values.quadrature_point(i); 524 525 for (unsigned int d = 0; d < dim; ++d) 526 geo_support_points[index * dim + d] = point[d]; 527 } 528 } 529 530 metric_data_raw.resize(n_local_active_cells * quadrature.size() * n_components_metric); 531 532 CeedInt strides[3] = {1, 533 static_cast<int>(quadrature.size()), 534 static_cast<int>(quadrature.size() * n_components_metric)}; 535 536 CeedVectorCreate(ceed, metric_data_raw.size(), &metric_data); 537 CeedVectorSetArray(metric_data, CEED_MEM_HOST, CEED_USE_POINTER, metric_data_raw.data()); 538 CeedElemRestrictionCreateStrided(ceed, 539 n_local_active_cells, 540 quadrature.size(), 541 n_components_metric, 542 metric_data_raw.size(), 543 strides, 544 &metric_data_restriction); 545 546 CeedVectorCreate(ceed, geo_support_points.size(), &node_coords); 547 CeedVectorSetArray(node_coords, CEED_MEM_HOST, CEED_USE_POINTER, geo_support_points.data()); 548 549 CeedElemRestrictionCreate(ceed, 550 n_local_active_cells, 551 geo_fe.n_dofs_per_cell(), 552 dim, 553 1, 554 geo_support_points.size(), 555 CEED_MEM_HOST, 556 CEED_COPY_VALUES, 557 geo_indices.data(), 558 &geo_restriction); 559 560 BuildContext build_ctx_data; 561 build_ctx_data.dim = dim; 562 build_ctx_data.space_dim = dim; 563 564 CeedQFunctionContextCreate(ceed, &build_ctx); 565 CeedQFunctionContextSetData( 566 build_ctx, CEED_MEM_HOST, CEED_USE_POINTER, sizeof(build_ctx_data), &build_ctx_data); 567 568 // 5) create q operation 569 if (bp <= BPType::BP2) 570 CeedQFunctionCreateInterior(ceed, 1, f_build_mass, f_build_mass_loc, &qf_build); 571 else 572 CeedQFunctionCreateInterior(ceed, 1, f_build_poisson, f_build_poisson_loc, &qf_build); 573 574 CeedQFunctionAddInput(qf_build, "geo", dim * dim, CEED_EVAL_GRAD); 575 CeedQFunctionAddInput(qf_build, "weight", 1, CEED_EVAL_WEIGHT); 576 CeedQFunctionAddOutput(qf_build, "metric data", n_components_metric, CEED_EVAL_NONE); 577 CeedQFunctionSetContext(qf_build, build_ctx); 578 579 // 6) put everything together 580 CeedOperatorCreate(ceed, qf_build, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, &op_build); 581 CeedOperatorSetField(op_build, "geo", geo_restriction, geo_basis, CEED_VECTOR_ACTIVE); 582 CeedOperatorSetField( 583 op_build, "weight", CEED_ELEMRESTRICTION_NONE, geo_basis, CEED_VECTOR_NONE); 584 CeedOperatorSetField( 585 op_build, "metric data", metric_data_restriction, CEED_BASIS_NONE, CEED_VECTOR_ACTIVE); 586 587 CeedOperatorApply(op_build, node_coords, metric_data, CEED_REQUEST_IMMEDIATE); 588 589 CeedVectorDestroy(&node_coords); 590 CeedVectorSyncArray(metric_data, CEED_MEM_HOST); 591 CeedVectorDestroy(&metric_data); 592 CeedElemRestrictionDestroy(&geo_restriction); 593 CeedElemRestrictionDestroy(&metric_data_restriction); 594 CeedBasisDestroy(&geo_basis); 595 CeedQFunctionContextDestroy(&build_ctx); 596 CeedQFunctionDestroy(&qf_build); 597 CeedOperatorDestroy(&op_build); 598 599 return metric_data_raw; 600 } 601 602 /** 603 * Mapping object passed to the constructor. 604 */ 605 const Mapping<dim> &mapping; 606 607 /** 608 * DoFHandler object passed to the constructor. 609 */ 610 const DoFHandler<dim> &dof_handler; 611 612 /** 613 * Constraints object passed to the constructor. 614 */ 615 const AffineConstraints<Number> &constraints; 616 617 /** 618 * Quadrature rule object passed to the constructor. 619 */ 620 const Quadrature<dim> &quadrature; 621 622 /** 623 * Selected BP. 624 */ 625 const BPType bp; 626 627 /** 628 * Resource name. 629 */ 630 const std::string resource; 631 632 /** 633 * Partitioner for distributed vectors. 634 */ 635 std::shared_ptr<Utilities::MPI::Partitioner> partitioner; 636 637 /** 638 * libCEED data structures. 639 */ 640 Ceed ceed; 641 std::vector<double> metric_data_raw; 642 std::array<CeedInt, 3> strides; 643 CeedVector src_ceed; 644 CeedVector dst_ceed; 645 CeedOperator op_apply; 646 }; 647 648 #endif 649